[tip:x86/timers] x86/hpet: Rename variables to prepare for switching to channels

2019-06-27 Thread tip-bot for Ingo Molnar
Commit-ID:  d415c7543140f77fe1d2d9d3942cbf51a9737993
Gitweb: https://git.kernel.org/tip/d415c7543140f77fe1d2d9d3942cbf51a9737993
Author: Ingo Molnar 
AuthorDate: Sun, 23 Jun 2019 15:24:02 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:24 +0200

x86/hpet: Rename variables to prepare for switching to channels

struct hpet_dev is gone with the next change as the clockevent storage
moves into struct hpet_channel. So the variable name hdev will not make
sense anymore. Ditto for timer vs. channel and similar details.

Doing the rename in the change makes the patch harder to review. Doing it
afterward is problematic vs. tracking down issues.  Doing it upfront is the
easiest solution as it does not change functionality.

Signed-off-by: Ingo Molnar 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132436.093113...@linutronix.de

---
 arch/x86/kernel/hpet.c | 124 -
 1 file changed, 62 insertions(+), 62 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 640ff75cc523..32f21b429881 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -315,7 +315,7 @@ static void hpet_legacy_clockevent_register(void)
pr_debug("Clockevent registered\n");
 }
 
-static int hpet_set_periodic(struct clock_event_device *evt, int timer)
+static int hpet_set_periodic(struct clock_event_device *evt, int channel)
 {
unsigned int cfg, cmp, now;
uint64_t delta;
@@ -325,11 +325,11 @@ static int hpet_set_periodic(struct clock_event_device 
*evt, int timer)
delta >>= evt->shift;
now = hpet_readl(HPET_COUNTER);
cmp = now + (unsigned int)delta;
-   cfg = hpet_readl(HPET_Tn_CFG(timer));
+   cfg = hpet_readl(HPET_Tn_CFG(channel));
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
   HPET_TN_32BIT;
-   hpet_writel(cfg, HPET_Tn_CFG(timer));
-   hpet_writel(cmp, HPET_Tn_CMP(timer));
+   hpet_writel(cfg, HPET_Tn_CFG(channel));
+   hpet_writel(cmp, HPET_Tn_CMP(channel));
udelay(1);
/*
 * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
@@ -338,32 +338,32 @@ static int hpet_set_periodic(struct clock_event_device 
*evt, int timer)
 * (See AMD-8111 HyperTransport I/O Hub Data Sheet,
 * Publication # 24674)
 */
-   hpet_writel((unsigned int)delta, HPET_Tn_CMP(timer));
+   hpet_writel((unsigned int)delta, HPET_Tn_CMP(channel));
hpet_start_counter();
hpet_print_config();
 
return 0;
 }
 
-static int hpet_set_oneshot(struct clock_event_device *evt, int timer)
+static int hpet_set_oneshot(struct clock_event_device *evt, int channel)
 {
unsigned int cfg;
 
-   cfg = hpet_readl(HPET_Tn_CFG(timer));
+   cfg = hpet_readl(HPET_Tn_CFG(channel));
cfg &= ~HPET_TN_PERIODIC;
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
-   hpet_writel(cfg, HPET_Tn_CFG(timer));
+   hpet_writel(cfg, HPET_Tn_CFG(channel));
 
return 0;
 }
 
-static int hpet_shutdown(struct clock_event_device *evt, int timer)
+static int hpet_shutdown(struct clock_event_device *evt, int channel)
 {
unsigned int cfg;
 
-   cfg = hpet_readl(HPET_Tn_CFG(timer));
+   cfg = hpet_readl(HPET_Tn_CFG(channel));
cfg &= ~HPET_TN_ENABLE;
-   hpet_writel(cfg, HPET_Tn_CFG(timer));
+   hpet_writel(cfg, HPET_Tn_CFG(channel));
 
return 0;
 }
@@ -460,30 +460,30 @@ static struct clock_event_device hpet_clockevent = {
 
 void hpet_msi_unmask(struct irq_data *data)
 {
-   struct hpet_dev *hdev = irq_data_get_irq_handler_data(data);
+   struct hpet_dev *hc = irq_data_get_irq_handler_data(data);
unsigned int cfg;
 
/* unmask it */
-   cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+   cfg = hpet_readl(HPET_Tn_CFG(hc->num));
cfg |= HPET_TN_ENABLE | HPET_TN_FSB;
-   hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+   hpet_writel(cfg, HPET_Tn_CFG(hc->num));
 }
 
 void hpet_msi_mask(struct irq_data *data)
 {
-   struct hpet_dev *hdev = irq_data_get_irq_handler_data(data);
+   struct hpet_dev *hc = irq_data_get_irq_handler_data(data);
unsigned int cfg;
 
/* mask it */
-   cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+   cfg = hpet_readl(HPET_Tn_CFG(hc->num));
cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB);
-   hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+   hpet_writel(cfg, HPET_Tn_CFG(hc->num));
 }
 
-void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg)
+void hpet_msi_write(struct hpet_dev *hc, struct msi_msg *msg)
 {
-   hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
-   hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
+   hpet_writel(msg->data, 

[tip:x86/timers] x86/hpet: Make naming consistent

2019-06-27 Thread tip-bot for Ingo Molnar
Commit-ID:  3fe50c34dc1fa8ae2c24ec202b9decbbef72921d
Gitweb: https://git.kernel.org/tip/3fe50c34dc1fa8ae2c24ec202b9decbbef72921d
Author: Ingo Molnar 
AuthorDate: Sun, 23 Jun 2019 15:23:55 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:20 +0200

x86/hpet: Make naming consistent

Use 'evt' for clockevents pointers and capitalize HPET in comments.

Signed-off-by: Ingo Molnar 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132435.454138...@linutronix.de

---
 arch/x86/kernel/hpet.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 96daae404b29..823e8d32182a 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -121,7 +121,7 @@ static inline int is_hpet_capable(void)
 }
 
 /**
- * is_hpet_enabled - check whether the hpet timer interrupt is enabled
+ * is_hpet_enabled - Check whether the legacy HPET timer interrupt is enabled
  */
 int is_hpet_enabled(void)
 {
@@ -164,7 +164,7 @@ do {
\
 } while (0)
 
 /*
- * When the hpet driver (/dev/hpet) is enabled, we need to reserve
+ * When the HPET driver (/dev/hpet) is enabled, we need to reserve
  * timer 0 and timer 1 in case of RTC emulation.
  */
 #ifdef CONFIG_HPET
@@ -212,7 +212,7 @@ static void __init hpet_reserve_platform_timers(unsigned 
int id)
 static void hpet_reserve_platform_timers(unsigned int id) { }
 #endif
 
-/* Common hpet functions */
+/* Common HPET functions */
 static void hpet_stop_counter(void)
 {
u32 cfg = hpet_readl(HPET_CFG);
@@ -266,7 +266,7 @@ static void hpet_legacy_clockevent_register(void)
hpet_enable_legacy_int();
 
/*
-* Start hpet with the boot cpu mask and make it
+* Start HPET with the boot cpu mask and make it
 * global after the IO_APIC has been initialized.
 */
hpet_clockevent.cpumask = cpumask_of(boot_cpu_data.cpu_index);
@@ -399,7 +399,7 @@ static int hpet_legacy_next_event(unsigned long delta,
 }
 
 /*
- * The hpet clock event device
+ * The HPET clock event device
  */
 static struct clock_event_device hpet_clockevent = {
.name   = "hpet",
@@ -484,14 +484,14 @@ static int hpet_msi_next_event(unsigned long delta,
 static irqreturn_t hpet_interrupt_handler(int irq, void *data)
 {
struct hpet_dev *dev = data;
-   struct clock_event_device *hevt = >evt;
+   struct clock_event_device *evt = >evt;
 
-   if (!hevt->event_handler) {
+   if (!evt->event_handler) {
pr_info("Spurious interrupt HPET timer %d\n", dev->num);
return IRQ_HANDLED;
}
 
-   hevt->event_handler(hevt);
+   evt->event_handler(evt);
return IRQ_HANDLED;
 }
 
@@ -703,7 +703,7 @@ static inline void hpet_reserve_msi_timers(struct hpet_data 
*hd) { }
  * with its associated locking overhead. And we also need 64-bit atomic
  * read.
  *
- * The lock and the hpet value are stored together and can be read in a
+ * The lock and the HPET value are stored together and can be read in a
  * single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t
  * is 32 bits in size.
  */
@@ -1053,7 +1053,7 @@ static unsigned long hpet_pie_limit;
 static rtc_irq_handler irq_handler;
 
 /*
- * Check that the hpet counter c1 is ahead of the c2
+ * Check that the HPET counter c1 is ahead of the c2
  */
 static inline int hpet_cnt_ahead(u32 c1, u32 c2)
 {


[tip:x86/timers] x86/hpet: Coding style cleanup

2019-06-27 Thread tip-bot for Ingo Molnar
Commit-ID:  0b5c597de6aa3480d6add2f37ef7de3f9312
Gitweb: https://git.kernel.org/tip/0b5c597de6aa3480d6add2f37ef7de3f9312
Author: Ingo Molnar 
AuthorDate: Sun, 23 Jun 2019 15:23:57 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:21 +0200

x86/hpet: Coding style cleanup

Signed-off-by: Ingo Molnar 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132435.637420...@linutronix.de

---
 arch/x86/kernel/hpet.c | 43 ++-
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 1a389a2ff42a..ed2d556f2c96 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -131,26 +131,33 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
 
 static void _hpet_print_config(const char *function, int line)
 {
-   u32 i, timers, l, h;
+   u32 i, id, period, cfg, status, channels, l, h;
+
pr_info("%s(%d):\n", function, line);
-   l = hpet_readl(HPET_ID);
-   h = hpet_readl(HPET_PERIOD);
-   timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
-   pr_info("ID: 0x%x, PERIOD: 0x%x\n", l, h);
-   l = hpet_readl(HPET_CFG);
-   h = hpet_readl(HPET_STATUS);
-   pr_info("CFG: 0x%x, STATUS: 0x%x\n", l, h);
+
+   id = hpet_readl(HPET_ID);
+   period = hpet_readl(HPET_PERIOD);
+   pr_info("ID: 0x%x, PERIOD: 0x%x\n", id, period);
+
+   cfg = hpet_readl(HPET_CFG);
+   status = hpet_readl(HPET_STATUS);
+   pr_info("CFG: 0x%x, STATUS: 0x%x\n", cfg, status);
+
l = hpet_readl(HPET_COUNTER);
h = hpet_readl(HPET_COUNTER+4);
pr_info("COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
 
-   for (i = 0; i < timers; i++) {
+   channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
+
+   for (i = 0; i < channels; i++) {
l = hpet_readl(HPET_Tn_CFG(i));
h = hpet_readl(HPET_Tn_CFG(i)+4);
pr_info("T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", i, l, h);
+
l = hpet_readl(HPET_Tn_CMP(i));
h = hpet_readl(HPET_Tn_CMP(i)+4);
pr_info("T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", i, l, h);
+
l = hpet_readl(HPET_Tn_ROUTE(i));
h = hpet_readl(HPET_Tn_ROUTE(i)+4);
pr_info("T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", i, l, h);
@@ -216,6 +223,7 @@ static void hpet_reserve_platform_timers(unsigned int id) { 
}
 static void hpet_stop_counter(void)
 {
u32 cfg = hpet_readl(HPET_CFG);
+
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
 }
@@ -229,6 +237,7 @@ static void hpet_reset_counter(void)
 static void hpet_start_counter(void)
 {
unsigned int cfg = hpet_readl(HPET_CFG);
+
cfg |= HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
 }
@@ -393,7 +402,7 @@ static int hpet_legacy_resume(struct clock_event_device 
*evt)
 }
 
 static int hpet_legacy_next_event(unsigned long delta,
-   struct clock_event_device *evt)
+ struct clock_event_device *evt)
 {
return hpet_next_event(delta, 0);
 }
@@ -1142,6 +1151,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
 static void hpet_disable_rtc_channel(void)
 {
u32 cfg = hpet_readl(HPET_T1_CFG);
+
cfg &= ~HPET_TN_ENABLE;
hpet_writel(cfg, HPET_T1_CFG);
 }
@@ -1183,8 +1193,7 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask)
 }
 EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit);
 
-int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
-   unsigned char sec)
+int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char 
sec)
 {
if (!is_hpet_enabled())
return 0;
@@ -1204,15 +1213,16 @@ int hpet_set_periodic_freq(unsigned long freq)
if (!is_hpet_enabled())
return 0;
 
-   if (freq <= DEFAULT_RTC_INT_FREQ)
+   if (freq <= DEFAULT_RTC_INT_FREQ) {
hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq;
-   else {
+   } else {
clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
do_div(clc, freq);
clc >>= hpet_clockevent.shift;
hpet_pie_delta = clc;
hpet_pie_limit = 0;
}
+
return 1;
 }
 EXPORT_SYMBOL_GPL(hpet_set_periodic_freq);
@@ -1272,8 +1282,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
hpet_prev_update_sec = curr_time.tm_sec;
}
 
-   if (hpet_rtc_flags & RTC_PIE &&
-   ++hpet_pie_count >= hpet_pie_limit) {
+   if (hpet_rtc_flags & RTC_PIE && ++hpet_pie_count >= hpet_pie_limit) {
rtc_int_flag |= RTC_PF;
hpet_pie_count = 0;
}
@@ -1282,7 +1291,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void 

[tip:x86/timers] x86/hpet: Clean up comments

2019-06-27 Thread tip-bot for Ingo Molnar
Commit-ID:  dfe36b573ed320ce311b2cb9251d2543be9e52ac
Gitweb: https://git.kernel.org/tip/dfe36b573ed320ce311b2cb9251d2543be9e52ac
Author: Ingo Molnar 
AuthorDate: Sun, 23 Jun 2019 15:23:56 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:20 +0200

x86/hpet: Clean up comments

Signed-off-by: Ingo Molnar 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132435.545653...@linutronix.de

---
 arch/x86/kernel/hpet.c | 41 +++--
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 823e8d32182a..1a389a2ff42a 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -266,8 +266,8 @@ static void hpet_legacy_clockevent_register(void)
hpet_enable_legacy_int();
 
/*
-* Start HPET with the boot cpu mask and make it
-* global after the IO_APIC has been initialized.
+* Start HPET with the boot CPU's cpumask and make it global after
+* the IO_APIC has been initialized.
 */
hpet_clockevent.cpumask = cpumask_of(boot_cpu_data.cpu_index);
clockevents_config_and_register(_clockevent, hpet_freq,
@@ -688,10 +688,10 @@ static inline void hpet_reserve_msi_timers(struct 
hpet_data *hd) { }
 /*
  * Reading the HPET counter is a very slow operation. If a large number of
  * CPUs are trying to access the HPET counter simultaneously, it can cause
- * massive delay and slow down system performance dramatically. This may
+ * massive delays and slow down system performance dramatically. This may
  * happen when HPET is the default clock source instead of TSC. For a
  * really large system with hundreds of CPUs, the slowdown may be so
- * severe that it may actually crash the system because of a NMI watchdog
+ * severe, that it can actually crash the system because of a NMI watchdog
  * soft lockup, for example.
  *
  * If multiple CPUs are trying to access the HPET counter at the same time,
@@ -700,8 +700,7 @@ static inline void hpet_reserve_msi_timers(struct hpet_data 
*hd) { }
  *
  * This special feature is only enabled on x86-64 systems. It is unlikely
  * that 32-bit x86 systems will have enough CPUs to require this feature
- * with its associated locking overhead. And we also need 64-bit atomic
- * read.
+ * with its associated locking overhead. We also need 64-bit atomic read.
  *
  * The lock and the HPET value are stored together and can be read in a
  * single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t
@@ -1020,19 +1019,25 @@ void hpet_disable(void)
 
 #ifdef CONFIG_HPET_EMULATE_RTC
 
-/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
+/*
+ * HPET in LegacyReplacement mode eats up the RTC interrupt line. When HPET
  * is enabled, we support RTC interrupt functionality in software.
+ *
  * RTC has 3 kinds of interrupts:
- * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
- *is updated
- * 2) Alarm Interrupt - generate an interrupt at a specific time of day
- * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
- *2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
- * (1) and (2) above are implemented using polling at a frequency of
- * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
- * overhead. (DEFAULT_RTC_INT_FREQ)
- * For (3), we use interrupts at 64Hz or user specified periodic
- * frequency, whichever is higher.
+ *
+ *  1) Update Interrupt - generate an interrupt, every second, when the
+ * RTC clock is updated
+ *  2) Alarm Interrupt - generate an interrupt at a specific time of day
+ *  3) Periodic Interrupt - generate periodic interrupt, with frequencies
+ * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all frequencies in powers of 2)
+ *
+ * (1) and (2) above are implemented using polling at a frequency of 64 Hz:
+ * DEFAULT_RTC_INT_FREQ.
+ *
+ * The exact frequency is a tradeoff between accuracy and interrupt overhead.
+ *
+ * For (3), we use interrupts at 64 Hz, or the user specified periodic 
frequency,
+ * if it's higher.
  */
 #include 
 #include 
@@ -1053,7 +1058,7 @@ static unsigned long hpet_pie_limit;
 static rtc_irq_handler irq_handler;
 
 /*
- * Check that the HPET counter c1 is ahead of the c2
+ * Check that the HPET counter c1 is ahead of c2
  */
 static inline int hpet_cnt_ahead(u32 c1, u32 c2)
 {


[tip:x86/timers] x86/hpet: Remove not required includes

2019-06-27 Thread tip-bot for Ingo Molnar
Commit-ID:  9bc9e1d4c139497553599a73839ea846dce63f72
Gitweb: https://git.kernel.org/tip/9bc9e1d4c139497553599a73839ea846dce63f72
Author: Ingo Molnar 
AuthorDate: Sun, 23 Jun 2019 15:23:54 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:20 +0200

x86/hpet: Remove not required includes

Signed-off-by: Ingo Molnar 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132435.348089...@linutronix.de

---
 arch/x86/kernel/hpet.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 4cf93294bacc..96daae404b29 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,22 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0-only
-#include 
 #include 
 #include 
-#include 
 #include 
 #include 
-#include 
-#include 
-#include 
 #include 
-#include 
 #include 
-#include 
-#include 
+#include 
 
-#include 
-#include 
-#include 
 #include 
 #include 
 


[tip:x86/paravirt] x86/paravirt: Match paravirt patchlet field definition ordering to initialization ordering

2019-05-24 Thread tip-bot for Ingo Molnar
Commit-ID:  fc93dfd9345bb8b29a62b21cb0447dd1a3815f91
Gitweb: https://git.kernel.org/tip/fc93dfd9345bb8b29a62b21cb0447dd1a3815f91
Author: Ingo Molnar 
AuthorDate: Thu, 25 Apr 2019 10:10:12 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 25 Apr 2019 12:00:44 +0200

x86/paravirt: Match paravirt patchlet field definition ordering to 
initialization ordering

Here's the objdump -D output of the PATCH_XXL data table:

0010 :
  10:   fa  cli
  11:   fb  sti
  12:   57  push   %rdi
  13:   9d  popfq
  14:   9c  pushfq
  15:   58  pop%rax
  16:   0f 20 d0mov%cr2,%rax
  19:   0f 20 d8mov%cr3,%rax
  1c:   0f 22 dfmov%rdi,%cr3
  1f:   0f 09   wbinvd
  21:   0f 01 f8swapgs
  24:   48 0f 07sysretq
  27:   0f 01 f8swapgs
  2a:   48 89 f8mov%rdi,%rax

Note how this doesn't match up to the source code:

static const struct patch_xxl patch_data_xxl = {
.irq_irq_disable= { 0xfa }, // cli
.irq_irq_enable = { 0xfb }, // sti
.irq_save_fl= { 0x9c, 0x58 },   // pushf; pop %[re]ax
.mmu_read_cr2   = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
.mmu_read_cr3   = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
.irq_restore_fl = { 0x57, 0x9d },   // push %rdi; popfq
.mmu_write_cr3  = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
.cpu_wbinvd = { 0x0f, 0x09 },   // wbinvd
.cpu_usergs_sysret64= { 0x0f, 0x01, 0xf8,
0x48, 0x0f, 0x07 }, // swapgs; sysretq
.cpu_swapgs = { 0x0f, 0x01, 0xf8 }, // swapgs
.mov64  = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
.irq_restore_fl = { 0x50, 0x9d },   // push %eax; popf
.mmu_write_cr3  = { 0x0f, 0x22, 0xd8 }, // mov %eax, %cr3
.cpu_iret   = { 0xcf }, // iret
};

Note how they are reordered: in the generated code .irq_restore_fl comes
before .irq_save_fl, etc. This is because the field ordering in struct
patch_xxl does not match the initialization ordering of patch_data_xxl.

Match up the initialization order with the definition order - this makes
the disassembly easily reviewable:

0010 :
  10:   fa  cli
  11:   fb  sti
  12:   9c  pushfq
  13:   58  pop%rax
  14:   0f 20 d0mov%cr2,%rax
  17:   0f 20 d8mov%cr3,%rax
  1a:   0f 22 dfmov%rdi,%cr3
  1d:   57  push   %rdi
  1e:   9d  popfq
  1f:   0f 09   wbinvd
  21:   0f 01 f8swapgs
  24:   48 0f 07sysretq
  27:   0f 01 f8swapgs
  2a:   48 89 f8mov%rdi,%rax

Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Dave Hansen 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/20190425081012.ga115...@gmail.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/paravirt_patch.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c
index 60e7a5e236c0..37b1d43d1e17 100644
--- a/arch/x86/kernel/paravirt_patch.c
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -21,11 +21,11 @@
 struct patch_xxl {
const unsigned char irq_irq_disable[1];
const unsigned char irq_irq_enable[1];
-   const unsigned char irq_restore_fl[2];
const unsigned char irq_save_fl[2];
const unsigned char mmu_read_cr2[3];
const unsigned char mmu_read_cr3[3];
const unsigned char mmu_write_cr3[3];
+   const unsigned char irq_restore_fl[2];
 # ifdef CONFIG_X86_64
const unsigned char cpu_wbinvd[2];
const unsigned char cpu_usergs_sysret64[6];
@@ -43,16 +43,16 @@ static const struct patch_xxl patch_data_xxl = {
.mmu_read_cr2   = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
.mmu_read_cr3   = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
 # ifdef CONFIG_X86_64
-   .irq_restore_fl = { 0x57, 0x9d },   // push %rdi; popfq
.mmu_write_cr3  = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
+   .irq_restore_fl = { 0x57, 0x9d },   // push %rdi; popfq
.cpu_wbinvd = { 0x0f, 0x09 },   // wbinvd
.cpu_usergs_sysret64= { 0x0f, 0x01, 0xf8,
0x48, 0x0f, 0x07 }, // swapgs; sysretq
.cpu_swapgs  

[tip:x86/paravirt] x86/paravirt: Detect over-sized patching bugs in paravirt_patch_insns()

2019-05-24 Thread tip-bot for Ingo Molnar
Commit-ID:  2777cae2b19d4a08ad233b3504c19c6f7a6a2ef3
Gitweb: https://git.kernel.org/tip/2777cae2b19d4a08ad233b3504c19c6f7a6a2ef3
Author: Ingo Molnar 
AuthorDate: Thu, 25 Apr 2019 11:17:17 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 25 Apr 2019 12:00:31 +0200

x86/paravirt: Detect over-sized patching bugs in paravirt_patch_insns()

So paravirt_patch_insns() contains this gem of logic:

unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
  const char *start, const char *end)
{
unsigned insn_len = end - start;

if (insn_len > len || start == NULL)
insn_len = len;
else
memcpy(insnbuf, start, insn_len);

return insn_len;
}

Note how 'len' (size of the original instruction) is checked against the new
instruction, and silently discarded with no warning printed whatsoever.

This crashes the kernel in funny ways if the patching template is buggy,
and usually in much later places.

Instead do a direct BUG_ON(), there's no way to continue successfully at that 
point.

I've tested this patch, with the vanilla kernel check never triggers, and
if I intentionally increase the size of one of the patch templates to a
too high value the assert triggers:

[0.164385] kernel BUG at arch/x86/kernel/paravirt.c:167!

Without this patch a broken kernel randomly crashes in later places,
after the silent patching failure.

Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Dave Hansen 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/20190425091717.ga72...@gmail.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/paravirt.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c0e0101133f3..7f9121f2fdac 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -163,10 +163,10 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
 {
unsigned insn_len = end - start;
 
-   if (insn_len > len || start == NULL)
-   insn_len = len;
-   else
-   memcpy(insnbuf, start, insn_len);
+   /* Alternative instruction is too large for the patch site and we 
cannot continue: */
+   BUG_ON(insn_len > len || start == NULL);
+
+   memcpy(insnbuf, start, insn_len);
 
return insn_len;
 }


[tip:x86/paravirt] x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call()

2019-05-24 Thread tip-bot for Ingo Molnar
Commit-ID:  11e86dc7f2746210f9c7dc10deaa7658f8dc8350
Gitweb: https://git.kernel.org/tip/11e86dc7f2746210f9c7dc10deaa7658f8dc8350
Author: Ingo Molnar 
AuthorDate: Thu, 25 Apr 2019 11:50:39 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 25 Apr 2019 12:00:44 +0200

x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call()

paravirt_patch_call() currently handles patching failures inconsistently:
we generate a warning in the retpoline case, but don't in other cases where
we might end up with a non-working kernel as well.

So just convert it all to a BUG_ON(), these patching calls are *not* supposed
to fail, and if they do we want to know it immediately.

This also makes the kernel smaller and removes an #ifdef ugly.

I tried it with a richly paravirt-enabled kernel and no patching bugs
were detected.

Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/20190425095039.gc115...@gmail.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/paravirt.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 7f9121f2fdac..544d386ded45 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -73,21 +73,21 @@ struct branch {
 static unsigned paravirt_patch_call(void *insnbuf, const void *target,
unsigned long addr, unsigned len)
 {
+   const int call_len = 5;
struct branch *b = insnbuf;
-   unsigned long delta = (unsigned long)target - (addr+5);
+   unsigned long delta = (unsigned long)target - (addr+call_len);
 
-   if (len < 5) {
-#ifdef CONFIG_RETPOLINE
-   WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void 
*)addr);
-#endif
-   return len; /* call too long for patch site */
+   if (len < call_len) {
+   pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", 
(void *)addr);
+   /* Kernel might not be viable if patching fails, bail out: */
+   BUG_ON(1);
}
 
b->opcode = 0xe8; /* call */
b->delta = delta;
-   BUILD_BUG_ON(sizeof(*b) != 5);
+   BUILD_BUG_ON(sizeof(*b) != call_len);
 
-   return 5;
+   return call_len;
 }
 
 #ifdef CONFIG_PARAVIRT_XXL


[tip:efi/core] efi/fdt: Apply more cleanups

2019-02-04 Thread tip-bot for Ingo Molnar
Commit-ID:  ac9aff8ef99095b9d46d53d0a779f2bda24ba181
Gitweb: https://git.kernel.org/tip/ac9aff8ef99095b9d46d53d0a779f2bda24ba181
Author: Ingo Molnar 
AuthorDate: Sat, 2 Feb 2019 10:41:14 +0100
Committer:  Ingo Molnar 
CommitDate: Mon, 4 Feb 2019 08:26:48 +0100

efi/fdt: Apply more cleanups

Apply a number of cleanups:

 - Introduce fdt_setprop_*var() helper macros to simplify and shorten repetitive
   sequences - this also makes it less likely that the wrong variable size is
   passed in. This change makes a lot of the property-setting calls single-line
   and easier to read.

 - Harmonize comment style: capitalization, punctuation, whitespaces, etc.

 - Fix some whitespace noise in the libstub Makefile which I happened to notice.

 - Use the standard tabular initialization style:

-   map.map =   _map;
-   map.map_size =  _size;
-   map.desc_size = _size;
-   map.desc_ver =  _ver;
-   map.key_ptr =   _key;
-   map.buff_size = _size;

+   map.map = _map;
+   map.map_size= _size;
+   map.desc_size   = _size;
+   map.desc_ver= _ver;
+   map.key_ptr = _key;
+   map.buff_size   = _size;

 - Use tabular structure definition for better readability.

 - Make all pr*() lines single-line, even if they marginally exceed 80 cols - 
this
   makes them visually less intrusive.

 - Unbreak line breaks into single lines when the length exceeds 80 cols only
   marginally, for better readability.

 - Move assignment closer to the actual usage site.

 - Plus some other smaller cleanups, spelling fixes, etc.

No change in functionality intended.

[ ardb: move changes to upstream libfdt into local header. ]

Signed-off-by: Ingo Molnar 
Signed-off-by: Ard Biesheuvel 
Cc: AKASHI Takahiro 
Cc: Alexander Graf 
Cc: Bjorn Andersson 
Cc: Borislav Petkov 
Cc: Heinrich Schuchardt 
Cc: Jeffrey Hugo 
Cc: Lee Jones 
Cc: Leif Lindholm 
Cc: Linus Torvalds 
Cc: Matt Fleming 
Cc: Peter Jones 
Cc: Peter Zijlstra 
Cc: Sai Praneeth Prakhya 
Cc: Thomas Gleixner 
Cc: linux-...@vger.kernel.org
Link: http://lkml.kernel.org/r/20190202094119.13230-6-ard.biesheu...@linaro.org
Signed-off-by: Ingo Molnar 
---
 drivers/firmware/efi/libstub/Makefile  |   4 +-
 drivers/firmware/efi/libstub/efistub.h |  11 
 drivers/firmware/efi/libstub/fdt.c | 107 -
 3 files changed, 64 insertions(+), 58 deletions(-)

diff --git a/drivers/firmware/efi/libstub/Makefile 
b/drivers/firmware/efi/libstub/Makefile
index d9845099635e..b0103e16fc1b 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -52,7 +52,7 @@ lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o string.o 
random.o \
 
 lib-$(CONFIG_ARM)  += arm32-stub.o
 lib-$(CONFIG_ARM64)+= arm64-stub.o
-CFLAGS_arm64-stub.o:= -DTEXT_OFFSET=$(TEXT_OFFSET)
+CFLAGS_arm64-stub.o:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 
 #
 # arm64 puts the stub in the kernel proper, which will unnecessarily retain all
@@ -89,7 +89,7 @@ quiet_cmd_stubcopy = STUBCPY $@
   cmd_stubcopy = if $(STRIP) --strip-debug $(STUBCOPY_RM-y) -o $@ $<; \
 then if $(OBJDUMP) -r $@ | grep $(STUBCOPY_RELOC-y); \
 then (echo >&2 "$@: absolute symbol references not allowed 
in the EFI stub"; \
-  rm -f $@; /bin/false); \
+  rm -f $@; /bin/false); \
 else $(OBJCOPY) $(STUBCOPY_FLAGS-y) $< $@; fi\
 else /bin/false; fi
 
diff --git a/drivers/firmware/efi/libstub/efistub.h 
b/drivers/firmware/efi/libstub/efistub.h
index 32799cf039ef..cefcf6ba3150 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -64,4 +64,15 @@ efi_status_t check_platform_features(efi_system_table_t 
*sys_table_arg);
 
 efi_status_t efi_random_get_seed(efi_system_table_t *sys_table_arg);
 
+/* Helper macros for the usual case of using simple C variables: */
+#ifndef fdt_setprop_inplace_var
+#define fdt_setprop_inplace_var(fdt, node_offset, name, var) \
+   fdt_setprop_inplace((fdt), (node_offset), (name), &(var), sizeof(var))
+#endif
+
+#ifndef fdt_setprop_var
+#define fdt_setprop_var(fdt, node_offset, name, var) \
+   fdt_setprop((fdt), (node_offset), (name), &(var), sizeof(var))
+#endif
+
 #endif
diff --git a/drivers/firmware/efi/libstub/fdt.c 
b/drivers/firmware/efi/libstub/fdt.c
index 0dc7b4987cc2..d474964b873b 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -26,10 +26,8 @@ static void fdt_update_cell_size(efi_system_table_t 
*sys_table, void *fdt)
offset = fdt_path_offset(fdt, "/");
/* Set the #address-cells and #size-cells values for an empty tree */
 
-   fdt_setprop_u32(fdt, offset, "#address-cells",
-   

[tip:perf/core] tools lib subcmd: Fix a few source code comment typos

2018-12-18 Thread tip-bot for Ingo Molnar
Commit-ID:  65c9fee2da2fbbedbba402996ddb412072e762fc
Gitweb: https://git.kernel.org/tip/65c9fee2da2fbbedbba402996ddb412072e762fc
Author: Ingo Molnar 
AuthorDate: Mon, 3 Dec 2018 11:22:00 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 17 Dec 2018 14:56:51 -0300

tools lib subcmd: Fix a few source code comment typos

Go over the tools/ files that are maintained in Arnaldo's tree and
fix common typos: half of them were in comments, the other half
in JSON files.

No change in functionality intended.

Committer notes:

This was split from a larger patch as there are code that is,
additionally, maintained outside the kernel tree, so to ease
cherry-picking and/or backporting, split this into multiple patches.

Signed-off-by: Ingo Molnar 
Cc: Jiri Olsa 
Cc: Josh Poimboeuf 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20181203102200.ga104...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/lib/subcmd/parse-options.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index 6ca2a8bfe716..af9def589863 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -71,7 +71,7 @@ typedef int parse_opt_cb(const struct option *, const char 
*arg, int unset);
  *
  * `argh`::
  *   token to explain the kind of argument this option wants. Keep it
- *   homogenous across the repository.
+ *   homogeneous across the repository.
  *
  * `help`::
  *   the short help associated to what the option does.
@@ -80,7 +80,7 @@ typedef int parse_opt_cb(const struct option *, const char 
*arg, int unset);
  *
  * `flags`::
  *   mask of parse_opt_option_flags.
- *   PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs)
+ *   PARSE_OPT_OPTARG: says that the argument is optional (not for BOOLEANs)
  *   PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs
  *   PARSE_OPT_NONEG: says that this option cannot be negated
  *   PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in


[tip:perf/core] perf tools: Fix diverse comment typos

2018-12-18 Thread tip-bot for Ingo Molnar
Commit-ID:  adba163441597ffb56141233a2ef722b75caca87
Gitweb: https://git.kernel.org/tip/adba163441597ffb56141233a2ef722b75caca87
Author: Ingo Molnar 
AuthorDate: Mon, 3 Dec 2018 11:22:00 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 17 Dec 2018 14:56:47 -0300

perf tools: Fix diverse comment typos

Go over the tools/ files that are maintained in Arnaldo's tree and
fix common typos: half of them were in comments, the other half
in JSON files.

No change in functionality intended.

Committer notes:

This was split from a larger patch as there are code that is,
additionally, maintained outside the kernel tree, so to ease
cherry-picking and/or backporting, split this into multiple patches.

Just typos in comments, no need to backport, reducing the possibility of
possible backporting artifacts.

Signed-off-by: Ingo Molnar 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20181203102200.ga104...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/arch/x86/tests/insn-x86.c | 2 +-
 tools/perf/builtin-top.c | 2 +-
 tools/perf/builtin-trace.c   | 2 +-
 tools/perf/tests/attr.c  | 2 +-
 tools/perf/util/annotate.c   | 2 +-
 tools/perf/util/header.c | 2 +-
 tools/perf/util/hist.c   | 2 +-
 tools/perf/util/jitdump.c| 2 +-
 tools/perf/util/machine.c| 2 +-
 tools/perf/util/probe-event.c| 4 ++--
 tools/perf/util/sort.c   | 2 +-
 11 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tools/perf/arch/x86/tests/insn-x86.c 
b/tools/perf/arch/x86/tests/insn-x86.c
index a5d24ae5810d..c3e5f4ab0d3e 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -170,7 +170,7 @@ static int test_data_set(struct test_data *dat_set, int 
x86_64)
  *
  * If the test passes %0 is returned, otherwise %-1 is returned.  Use the
  * verbose (-v) option to see all the instructions and whether or not they
- * decoded successfuly.
+ * decoded successfully.
  */
 int test__insn_x86(struct test *test __maybe_unused, int subtest 
__maybe_unused)
 {
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1252d1759064..c59a3eb0d697 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -595,7 +595,7 @@ static void *display_thread_tui(void *arg)
 
/*
 * Initialize the uid_filter_str, in the future the TUI will allow
-* Zooming in/out UIDs. For now juse use whatever the user passed
+* Zooming in/out UIDs. For now just use whatever the user passed
 * via --uid.
 */
evlist__for_each_entry(top->evlist, pos) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index a57a9ae1fd4b..a6aa4589ad50 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2782,7 +2782,7 @@ static int trace__run(struct trace *trace, int argc, 
const char **argv)
 * Now that we already used evsel->attr to ask the kernel to setup the
 * events, lets reuse evsel->attr.sample_max_stack as the limit in
 * trace__resolve_callchain(), allowing per-event max-stack settings
-* to override an explicitely set --max-stack global setting.
+* to override an explicitly set --max-stack global setting.
 */
evlist__for_each_entry(evlist, evsel) {
if (evsel__has_callchain(evsel) &&
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 05dfe11c2f9e..d8426547219b 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -182,7 +182,7 @@ int test__attr(struct test *test __maybe_unused, int 
subtest __maybe_unused)
char path_perf[PATH_MAX];
char path_dir[PATH_MAX];
 
-   /* First try developement tree tests. */
+   /* First try development tree tests. */
if (!lstat("./tests", ))
return run_dir("./tests", "./perf");
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index f69d8e177fa3..51d291b0b81f 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1772,7 +1772,7 @@ static int symbol__disassemble(struct symbol *sym, struct 
annotate_args *args)
while (!feof(file)) {
/*
 * The source code line number (lineno) needs to be kept in
-* accross calls to symbol__parse_objdump_line(), so that it
+* across calls to symbol__parse_objdump_line(), so that it
 * can associate it with the instructions till the next one.
 * See disasm_line__new() and struct disasm_line::line_nr.
 */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 9cc81d48a908..4a64739c67e7 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2798,7 +2798,7 @@ static int perf_header__adds_write(struct perf_header 
*header,
lseek(fd, sec_start, 

[tip:perf/core] perf bpf-loader: Fix debugging message typo

2018-12-18 Thread tip-bot for Ingo Molnar
Commit-ID:  e4a8b0af5121392da2d40204ee330fd9e88d0858
Gitweb: https://git.kernel.org/tip/e4a8b0af5121392da2d40204ee330fd9e88d0858
Author: Ingo Molnar 
AuthorDate: Mon, 3 Dec 2018 11:22:00 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 17 Dec 2018 14:56:39 -0300

perf bpf-loader: Fix debugging message typo

Go over the tools/ files that are maintained in Arnaldo's tree and
fix common typos: half of them were in comments, the other half
in JSON files.

No change in functionality intended.

Committer notes:

This was split from a larger patch as there are code that is,
additionally, maintained outside the kernel tree, so to ease cherry
picking and/or backporting, split this into multiple patches.

This one has information that is presented to the user, albeit in debug
mode.

Signed-off-by: Ingo Molnar 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Wang Nan 
Link: http://lkml.kernel.org/r/20181203102200.ga104...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/bpf-loader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 9a280647d829..2f3eb6d293ee 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -99,7 +99,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, 
bool source)
if (err)
return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
} else
-   pr_debug("bpf: successfull builtin compilation\n");
+   pr_debug("bpf: successful builtin compilation\n");
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
 
if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)


[tip:perf/core] perf tools Documentation: Fix diverse typos

2018-12-18 Thread tip-bot for Ingo Molnar
Commit-ID:  1a7ea3283f7d15d7ce76a30870c3ca648adf1fc4
Gitweb: https://git.kernel.org/tip/1a7ea3283f7d15d7ce76a30870c3ca648adf1fc4
Author: Ingo Molnar 
AuthorDate: Mon, 3 Dec 2018 11:22:00 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 17 Dec 2018 14:56:36 -0300

perf tools Documentation: Fix diverse typos

Go over the tools/ files that are maintained in Arnaldo's tree and
fix common typos: half of them were in comments, the other half
in JSON files.

No change in functionality intended.

Committer notes:

This was split from a larger patch as there are code that is,
additionally, maintained outside the kernel tree, so to ease cherry
picking and/or backporting, split this into multiple patches.

In this particular case, it affects documentation, so may be interesting
to cherry pick as it is information that is presented to the user.

Signed-off-by: Ingo Molnar 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20181203102200.ga104...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Documentation/perf-list.txt   | 2 +-
 tools/perf/Documentation/perf-report.txt | 2 +-
 tools/perf/Documentation/perf-stat.txt   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-list.txt 
b/tools/perf/Documentation/perf-list.txt
index 667c14e56031..138fb6e94b3c 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -172,7 +172,7 @@ like cycles and instructions and some software events.
 Other PMUs and global measurements are normally root only.
 Some event qualifiers, such as "any", are also root only.
 
-This can be overriden by setting the kernel.perf_event_paranoid
+This can be overridden by setting the kernel.perf_event_paranoid
 sysctl to -1, which allows non root to use these events.
 
 For accessing trace point events perf needs to have read access to
diff --git a/tools/perf/Documentation/perf-report.txt 
b/tools/perf/Documentation/perf-report.txt
index ed2bf37ab132..1a27bfe05039 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -252,7 +252,7 @@ OPTIONS
  Usually more convenient to use --branch-history for this.
 
value can be:
-   - percent: diplay overhead percent (default)
+   - percent: display overhead percent (default)
- period: display event period
- count: display event count
 
diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index b10a90b6a718..4bc2085e5197 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -50,7 +50,7 @@ report::
  /sys/bus/event_source/devices//format/*
 
Note that the last two syntaxes support prefix and glob matching in
-   the PMU name to simplify creation of events accross multiple instances
+   the PMU name to simplify creation of events across multiple instances
of the same type of PMU in large systems (e.g. memory controller PMUs).
Multiple PMU instances are typical for uncore PMUs, so the prefix
'uncore_' is also ignored when performing this match.
@@ -277,7 +277,7 @@ echo 0 > /proc/sys/kernel/nmi_watchdog
 for best results. Otherwise the bottlenecks may be inconsistent
 on workload with changing phases.
 
-This enables --metric-only, unless overriden with --no-metric-only.
+This enables --metric-only, unless overridden with --no-metric-only.
 
 To interpret the results it is usually needed to know on which
 CPUs the workload runs on. If needed the CPUs can be forced using


[tip:perf/core] tools lib traceevent: Fix diverse typos in comments

2018-12-18 Thread tip-bot for Ingo Molnar
Commit-ID:  3e449f7c36c3ac49f140b5dc3c40693e551f47d2
Gitweb: https://git.kernel.org/tip/3e449f7c36c3ac49f140b5dc3c40693e551f47d2
Author: Ingo Molnar 
AuthorDate: Mon, 3 Dec 2018 11:22:00 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 17 Dec 2018 14:56:34 -0300

tools lib traceevent: Fix diverse typos in comments

Go over the tools/ files that are maintained in Arnaldo's tree and
fix common typos: half of them were in comments, the other half
in JSON files.

No change in functionality intended.

Committer notes:

This was split from a larger patch as there are code that is,
additionally, maintained outside the kernel tree, so to ease cherry
picking and/or backporting, split this into multiple patches.

Signed-off-by: Ingo Molnar 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Steven Rostedt (VMware) 
Cc: Tzvetomir Stoyanov 
Link: http://lkml.kernel.org/r/20181203102200.ga104...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/lib/traceevent/event-parse.c | 12 ++--
 tools/lib/traceevent/plugin_kvm.c  |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c 
b/tools/lib/traceevent/event-parse.c
index ffa656b868a9..a5ed291b8a9f 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -1145,7 +1145,7 @@ static enum tep_event_type read_token(char **tok)
 }
 
 /**
- * tep_read_token - access to utilites to use the pevent parser
+ * tep_read_token - access to utilities to use the pevent parser
  * @tok: The token to return
  *
  * This will parse tokens from the string given by
@@ -3258,7 +3258,7 @@ static int event_read_print(struct tep_event *event)
  * @name: the name of the common field to return
  *
  * Returns a common field from the event by the given @name.
- * This only searchs the common fields and not all field.
+ * This only searches the common fields and not all field.
  */
 struct tep_format_field *
 tep_find_common_field(struct tep_event *event, const char *name)
@@ -3302,7 +3302,7 @@ tep_find_field(struct tep_event *event, const char *name)
  * @name: the name of the field
  *
  * Returns a field by the given @name.
- * This searchs the common field names first, then
+ * This searches the common field names first, then
  * the non-common ones if a common one was not found.
  */
 struct tep_format_field *
@@ -3841,7 +3841,7 @@ static void print_bitmask_to_seq(struct tep_handle 
*pevent,
/*
 * data points to a bit mask of size bytes.
 * In the kernel, this is an array of long words, thus
-* endianess is very important.
+* endianness is very important.
 */
if (pevent->file_bigendian)
index = size - (len + 1);
@@ -5316,9 +5316,9 @@ pid_from_cmdlist(struct tep_handle *pevent, const char 
*comm, struct cmdline *ne
  * This returns the cmdline structure that holds a pid for a given
  * comm, or NULL if none found. As there may be more than one pid for
  * a given comm, the result of this call can be passed back into
- * a recurring call in the @next paramater, and then it will find the
+ * a recurring call in the @next parameter, and then it will find the
  * next pid.
- * Also, it does a linear seach, so it may be slow.
+ * Also, it does a linear search, so it may be slow.
  */
 struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char 
*comm,
   struct cmdline *next)
diff --git a/tools/lib/traceevent/plugin_kvm.c 
b/tools/lib/traceevent/plugin_kvm.c
index 637be7c18476..754050eea467 100644
--- a/tools/lib/traceevent/plugin_kvm.c
+++ b/tools/lib/traceevent/plugin_kvm.c
@@ -387,7 +387,7 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct 
tep_record *record,
 
/*
 * We can only use the structure if file is of the same
-* endianess.
+* endianness.
 */
if (tep_is_file_bigendian(event->pevent) ==
tep_is_host_bigendian(event->pevent)) {


[tip:perf/core] perf vendor events intel: Fix diverse typos

2018-12-18 Thread tip-bot for Ingo Molnar
Commit-ID:  b1d6f155e1bbb67778c17aba661fb4ea4e1a3641
Gitweb: https://git.kernel.org/tip/b1d6f155e1bbb67778c17aba661fb4ea4e1a3641
Author: Ingo Molnar 
AuthorDate: Mon, 3 Dec 2018 11:22:00 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 17 Dec 2018 14:56:31 -0300

perf vendor events intel: Fix diverse typos

Go over the tools/ files that are maintained in Arnaldo's tree and
fix common typos: half of them were in comments, the other half
in JSON files.

( Care should be taken not to re-import these typos in the future,
  if the JSON files get updated by the vendor without fixing the typos. )

No change in functionality intended.

Committer notes:

This was split from a larger patch as there are code that is,
additionally, maintained outside the kernel tree, so to ease cherry
picking and/or backporting, split this into multiple patches.

Signed-off-by: Ingo Molnar 
Cc: Alexander Shishkin 
Cc: Andi Kleen 
Cc: Jiri Olsa 
Cc: Kan Liang 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20181203102200.ga104...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 .../perf/pmu-events/arch/x86/broadwell/cache.json  |  4 +--
 .../pmu-events/arch/x86/broadwell/pipeline.json|  2 +-
 .../pmu-events/arch/x86/broadwellde/cache.json |  4 +--
 .../pmu-events/arch/x86/broadwellde/pipeline.json  |  2 +-
 .../perf/pmu-events/arch/x86/broadwellx/cache.json |  4 +--
 .../pmu-events/arch/x86/broadwellx/pipeline.json   |  2 +-
 tools/perf/pmu-events/arch/x86/jaketown/cache.json |  4 +--
 .../pmu-events/arch/x86/jaketown/pipeline.json |  2 +-
 .../pmu-events/arch/x86/knightslanding/cache.json  | 30 +++---
 .../pmu-events/arch/x86/sandybridge/cache.json |  4 +--
 .../pmu-events/arch/x86/sandybridge/pipeline.json  |  2 +-
 .../pmu-events/arch/x86/skylakex/uncore-other.json | 12 -
 12 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/broadwell/cache.json 
b/tools/perf/pmu-events/arch/x86/broadwell/cache.json
index bba3152ec54a..0b080b0352d8 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/cache.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/cache.json
@@ -433,7 +433,7 @@
 },
 {
 "PEBS": "1",
-"PublicDescription": "This is a precise version (that is, uses PEBS) 
of the event that counts line-splitted load uops retired to the architected 
path. A line split is across 64B cache-line which includes a page split (4K).",
+"PublicDescription": "This is a precise version (that is, uses PEBS) 
of the event that counts line-split load uops retired to the architected path. 
A line split is across 64B cache-line which includes a page split (4K).",
 "EventCode": "0xD0",
 "Counter": "0,1,2,3",
 "UMask": "0x41",
@@ -445,7 +445,7 @@
 },
 {
 "PEBS": "1",
-"PublicDescription": "This is a precise version (that is, uses PEBS) 
of the event that counts line-splitted store uops retired to the architected 
path. A line split is across 64B cache-line which includes a page split (4K).",
+"PublicDescription": "This is a precise version (that is, uses PEBS) 
of the event that counts line-split store uops retired to the architected path. 
A line split is across 64B cache-line which includes a page split (4K).",
 "EventCode": "0xD0",
 "Counter": "0,1,2,3",
 "UMask": "0x42",
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json 
b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
index 97c5d0784c6c..999cf3066363 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
@@ -317,7 +317,7 @@
 "CounterHTOff": "0,1,2,3,4,5,6,7"
 },
 {
-"PublicDescription": "This event counts stalls occured due to changing 
prefix length (66, 67 or REX.W when they change the length of the decoded 
instruction). Occurrences counting is proportional to the number of prefixes in 
a 16B-line. This may result in the following penalties: three-cycle penalty for 
each LCP in a 16-byte chunk.",
+"PublicDescription": "This event counts stalls occurred due to 
changing prefix length (66, 67 or REX.W when they change the length of the 
decoded instruction). Occurrences counting is proportional to the number of 
prefixes in a 16B-line. This may result in the following penalties: three-cycle 
penalty for each LCP in a 16-byte chunk.",
 "EventCode": "0x87",
 "Counter": "0,1,2,3",
 "UMask": "0x1",
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json 
b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
index bf243fe2a0ec..4ad425312bdc 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
@@ -439,7 +439,7 @@
 "PEBS": "1",
 "Counter": "0,1,2,3",
 "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
-

[tip:perf/core] tools lib subcmd: Fix a few source code comment typos

2018-12-14 Thread tip-bot for Ingo Molnar
Commit-ID:  8cf0fe36de6a02845318a61a58e2d87d309bfc98
Gitweb: https://git.kernel.org/tip/8cf0fe36de6a02845318a61a58e2d87d309bfc98
Author: Ingo Molnar 
AuthorDate: Mon, 3 Dec 2018 11:22:00 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Thu, 6 Dec 2018 14:12:31 -0300

tools lib subcmd: Fix a few source code comment typos

Go over the tools/ files that are maintained in Arnaldo's tree and
fix common typos: half of them were in comments, the other half
in JSON files.

No change in functionality intended.

Committer notes:

This was split from a larger patch as there are code that is,
additionally, maintained outside the kernel tree, so to ease
cherry-picking and/or backporting, split this into multiple patches.

Signed-off-by: Ingo Molnar 
Cc: Jiri Olsa 
Cc: Josh Poimboeuf 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20181203102200.ga104...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/lib/subcmd/parse-options.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index 6ca2a8bfe716..af9def589863 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -71,7 +71,7 @@ typedef int parse_opt_cb(const struct option *, const char 
*arg, int unset);
  *
  * `argh`::
  *   token to explain the kind of argument this option wants. Keep it
- *   homogenous across the repository.
+ *   homogeneous across the repository.
  *
  * `help`::
  *   the short help associated to what the option does.
@@ -80,7 +80,7 @@ typedef int parse_opt_cb(const struct option *, const char 
*arg, int unset);
  *
  * `flags`::
  *   mask of parse_opt_option_flags.
- *   PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs)
+ *   PARSE_OPT_OPTARG: says that the argument is optional (not for BOOLEANs)
  *   PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs
  *   PARSE_OPT_NONEG: says that this option cannot be negated
  *   PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in


[tip:perf/core] perf vendor events intel: Fix diverse typos

2018-12-14 Thread tip-bot for Ingo Molnar
Commit-ID:  9512bca1ede7cba3a718d90db33973c556c69534
Gitweb: https://git.kernel.org/tip/9512bca1ede7cba3a718d90db33973c556c69534
Author: Ingo Molnar 
AuthorDate: Mon, 3 Dec 2018 11:22:00 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Thu, 6 Dec 2018 14:12:30 -0300

perf vendor events intel: Fix diverse typos

Go over the tools/ files that are maintained in Arnaldo's tree and
fix common typos: half of them were in comments, the other half
in JSON files.

( Care should be taken not to re-import these typos in the future,
  if the JSON files get updated by the vendor without fixing the typos. )

No change in functionality intended.

Committer notes:

This was split from a larger patch as there are code that is,
additionally, maintained outside the kernel tree, so to ease cherry
picking and/or backporting, split this into multiple patches.

Signed-off-by: Ingo Molnar 
Cc: Alexander Shishkin 
Cc: Andi Kleen 
Cc: Jiri Olsa 
Cc: Kan Liang 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20181203102200.ga104...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 .../perf/pmu-events/arch/x86/broadwell/cache.json  |  4 +--
 .../pmu-events/arch/x86/broadwell/pipeline.json|  2 +-
 .../pmu-events/arch/x86/broadwellde/cache.json |  4 +--
 .../pmu-events/arch/x86/broadwellde/pipeline.json  |  2 +-
 .../perf/pmu-events/arch/x86/broadwellx/cache.json |  4 +--
 .../pmu-events/arch/x86/broadwellx/pipeline.json   |  2 +-
 tools/perf/pmu-events/arch/x86/jaketown/cache.json |  4 +--
 .../pmu-events/arch/x86/jaketown/pipeline.json |  2 +-
 .../pmu-events/arch/x86/knightslanding/cache.json  | 30 +++---
 .../pmu-events/arch/x86/sandybridge/cache.json |  4 +--
 .../pmu-events/arch/x86/sandybridge/pipeline.json  |  2 +-
 .../pmu-events/arch/x86/skylakex/uncore-other.json | 12 -
 12 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/broadwell/cache.json 
b/tools/perf/pmu-events/arch/x86/broadwell/cache.json
index bba3152ec54a..0b080b0352d8 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/cache.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/cache.json
@@ -433,7 +433,7 @@
 },
 {
 "PEBS": "1",
-"PublicDescription": "This is a precise version (that is, uses PEBS) 
of the event that counts line-splitted load uops retired to the architected 
path. A line split is across 64B cache-line which includes a page split (4K).",
+"PublicDescription": "This is a precise version (that is, uses PEBS) 
of the event that counts line-split load uops retired to the architected path. 
A line split is across 64B cache-line which includes a page split (4K).",
 "EventCode": "0xD0",
 "Counter": "0,1,2,3",
 "UMask": "0x41",
@@ -445,7 +445,7 @@
 },
 {
 "PEBS": "1",
-"PublicDescription": "This is a precise version (that is, uses PEBS) 
of the event that counts line-splitted store uops retired to the architected 
path. A line split is across 64B cache-line which includes a page split (4K).",
+"PublicDescription": "This is a precise version (that is, uses PEBS) 
of the event that counts line-split store uops retired to the architected path. 
A line split is across 64B cache-line which includes a page split (4K).",
 "EventCode": "0xD0",
 "Counter": "0,1,2,3",
 "UMask": "0x42",
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json 
b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
index 97c5d0784c6c..999cf3066363 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
@@ -317,7 +317,7 @@
 "CounterHTOff": "0,1,2,3,4,5,6,7"
 },
 {
-"PublicDescription": "This event counts stalls occured due to changing 
prefix length (66, 67 or REX.W when they change the length of the decoded 
instruction). Occurrences counting is proportional to the number of prefixes in 
a 16B-line. This may result in the following penalties: three-cycle penalty for 
each LCP in a 16-byte chunk.",
+"PublicDescription": "This event counts stalls occurred due to 
changing prefix length (66, 67 or REX.W when they change the length of the 
decoded instruction). Occurrences counting is proportional to the number of 
prefixes in a 16B-line. This may result in the following penalties: three-cycle 
penalty for each LCP in a 16-byte chunk.",
 "EventCode": "0x87",
 "Counter": "0,1,2,3",
 "UMask": "0x1",
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json 
b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
index bf243fe2a0ec..4ad425312bdc 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
@@ -439,7 +439,7 @@
 "PEBS": "1",
 "Counter": "0,1,2,3",
 "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
-

[tip:perf/core] perf tools: Fix diverse comment typos

2018-12-14 Thread tip-bot for Ingo Molnar
Commit-ID:  f04ae48fe61a13e3ea63c2761837f646bd1f6980
Gitweb: https://git.kernel.org/tip/f04ae48fe61a13e3ea63c2761837f646bd1f6980
Author: Ingo Molnar 
AuthorDate: Mon, 3 Dec 2018 11:22:00 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Thu, 6 Dec 2018 14:12:31 -0300

perf tools: Fix diverse comment typos

Go over the tools/ files that are maintained in Arnaldo's tree and
fix common typos: half of them were in comments, the other half
in JSON files.

No change in functionality intended.

Committer notes:

This was split from a larger patch as there are code that is,
additionally, maintained outside the kernel tree, so to ease
cherry-picking and/or backporting, split this into multiple patches.

Just typos in comments, no need to backport, reducing the possibility of
possible backporting artifacts.

Signed-off-by: Ingo Molnar 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20181203102200.ga104...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/arch/x86/tests/insn-x86.c | 2 +-
 tools/perf/builtin-top.c | 2 +-
 tools/perf/builtin-trace.c   | 2 +-
 tools/perf/tests/attr.c  | 2 +-
 tools/perf/util/annotate.c   | 2 +-
 tools/perf/util/header.c | 2 +-
 tools/perf/util/hist.c   | 2 +-
 tools/perf/util/jitdump.c| 2 +-
 tools/perf/util/machine.c| 2 +-
 tools/perf/util/probe-event.c| 4 ++--
 tools/perf/util/sort.c   | 2 +-
 11 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tools/perf/arch/x86/tests/insn-x86.c 
b/tools/perf/arch/x86/tests/insn-x86.c
index a5d24ae5810d..c3e5f4ab0d3e 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -170,7 +170,7 @@ static int test_data_set(struct test_data *dat_set, int 
x86_64)
  *
  * If the test passes %0 is returned, otherwise %-1 is returned.  Use the
  * verbose (-v) option to see all the instructions and whether or not they
- * decoded successfuly.
+ * decoded successfully.
  */
 int test__insn_x86(struct test *test __maybe_unused, int subtest 
__maybe_unused)
 {
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1252d1759064..c59a3eb0d697 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -595,7 +595,7 @@ static void *display_thread_tui(void *arg)
 
/*
 * Initialize the uid_filter_str, in the future the TUI will allow
-* Zooming in/out UIDs. For now juse use whatever the user passed
+* Zooming in/out UIDs. For now just use whatever the user passed
 * via --uid.
 */
evlist__for_each_entry(top->evlist, pos) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index a57a9ae1fd4b..a6aa4589ad50 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2782,7 +2782,7 @@ static int trace__run(struct trace *trace, int argc, 
const char **argv)
 * Now that we already used evsel->attr to ask the kernel to setup the
 * events, lets reuse evsel->attr.sample_max_stack as the limit in
 * trace__resolve_callchain(), allowing per-event max-stack settings
-* to override an explicitely set --max-stack global setting.
+* to override an explicitly set --max-stack global setting.
 */
evlist__for_each_entry(evlist, evsel) {
if (evsel__has_callchain(evsel) &&
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 05dfe11c2f9e..d8426547219b 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -182,7 +182,7 @@ int test__attr(struct test *test __maybe_unused, int 
subtest __maybe_unused)
char path_perf[PATH_MAX];
char path_dir[PATH_MAX];
 
-   /* First try developement tree tests. */
+   /* First try development tree tests. */
if (!lstat("./tests", ))
return run_dir("./tests", "./perf");
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index f69d8e177fa3..51d291b0b81f 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1772,7 +1772,7 @@ static int symbol__disassemble(struct symbol *sym, struct 
annotate_args *args)
while (!feof(file)) {
/*
 * The source code line number (lineno) needs to be kept in
-* accross calls to symbol__parse_objdump_line(), so that it
+* across calls to symbol__parse_objdump_line(), so that it
 * can associate it with the instructions till the next one.
 * See disasm_line__new() and struct disasm_line::line_nr.
 */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 9cc81d48a908..4a64739c67e7 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2798,7 +2798,7 @@ static int perf_header__adds_write(struct perf_header 
*header,
lseek(fd, sec_start, 

[tip:perf/core] perf tools Documentation: Fix diverse typos

2018-12-14 Thread tip-bot for Ingo Molnar
Commit-ID:  e1eebe9cc3d548a2fbbd97d978d133801a348cc3
Gitweb: https://git.kernel.org/tip/e1eebe9cc3d548a2fbbd97d978d133801a348cc3
Author: Ingo Molnar 
AuthorDate: Mon, 3 Dec 2018 11:22:00 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Thu, 6 Dec 2018 14:12:31 -0300

perf tools Documentation: Fix diverse typos

Go over the tools/ files that are maintained in Arnaldo's tree and
fix common typos: half of them were in comments, the other half
in JSON files.

No change in functionality intended.

Committer notes:

This was split from a larger patch as there are code that is,
additionally, maintained outside the kernel tree, so to ease cherry
picking and/or backporting, split this into multiple patches.

In this particular case, it affects documentation, so may be interesting
to cherry pick as it is information that is presented to the user.

Signed-off-by: Ingo Molnar 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20181203102200.ga104...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Documentation/perf-list.txt   | 2 +-
 tools/perf/Documentation/perf-report.txt | 2 +-
 tools/perf/Documentation/perf-stat.txt   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-list.txt 
b/tools/perf/Documentation/perf-list.txt
index 667c14e56031..138fb6e94b3c 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -172,7 +172,7 @@ like cycles and instructions and some software events.
 Other PMUs and global measurements are normally root only.
 Some event qualifiers, such as "any", are also root only.
 
-This can be overriden by setting the kernel.perf_event_paranoid
+This can be overridden by setting the kernel.perf_event_paranoid
 sysctl to -1, which allows non root to use these events.
 
 For accessing trace point events perf needs to have read access to
diff --git a/tools/perf/Documentation/perf-report.txt 
b/tools/perf/Documentation/perf-report.txt
index ed2bf37ab132..1a27bfe05039 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -252,7 +252,7 @@ OPTIONS
  Usually more convenient to use --branch-history for this.
 
value can be:
-   - percent: diplay overhead percent (default)
+   - percent: display overhead percent (default)
- period: display event period
- count: display event count
 
diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index b10a90b6a718..4bc2085e5197 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -50,7 +50,7 @@ report::
  /sys/bus/event_source/devices//format/*
 
Note that the last two syntaxes support prefix and glob matching in
-   the PMU name to simplify creation of events accross multiple instances
+   the PMU name to simplify creation of events across multiple instances
of the same type of PMU in large systems (e.g. memory controller PMUs).
Multiple PMU instances are typical for uncore PMUs, so the prefix
'uncore_' is also ignored when performing this match.
@@ -277,7 +277,7 @@ echo 0 > /proc/sys/kernel/nmi_watchdog
 for best results. Otherwise the bottlenecks may be inconsistent
 on workload with changing phases.
 
-This enables --metric-only, unless overriden with --no-metric-only.
+This enables --metric-only, unless overridden with --no-metric-only.
 
 To interpret the results it is usually needed to know on which
 CPUs the workload runs on. If needed the CPUs can be forced using


[tip:perf/core] perf bpf-loader: Fix debugging message typo

2018-12-14 Thread tip-bot for Ingo Molnar
Commit-ID:  d401b02c41f6afcb8ed32479a016a20cbfd59d6f
Gitweb: https://git.kernel.org/tip/d401b02c41f6afcb8ed32479a016a20cbfd59d6f
Author: Ingo Molnar 
AuthorDate: Mon, 3 Dec 2018 11:22:00 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Thu, 6 Dec 2018 14:12:31 -0300

perf bpf-loader: Fix debugging message typo

Go over the tools/ files that are maintained in Arnaldo's tree and
fix common typos: half of them were in comments, the other half
in JSON files.

No change in functionality intended.

Committer notes:

This was split from a larger patch as there are code that is,
additionally, maintained outside the kernel tree, so to ease cherry
picking and/or backporting, split this into multiple patches.

This one has information that is presented to the user, albeit in debug
mode.

Signed-off-by: Ingo Molnar 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Wang Nan 
Link: http://lkml.kernel.org/r/20181203102200.ga104...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/bpf-loader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 9a280647d829..2f3eb6d293ee 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -99,7 +99,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, 
bool source)
if (err)
return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
} else
-   pr_debug("bpf: successfull builtin compilation\n");
+   pr_debug("bpf: successful builtin compilation\n");
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
 
if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)


[tip:perf/core] tools lib traceevent: Fix diverse typos in comments

2018-12-14 Thread tip-bot for Ingo Molnar
Commit-ID:  0dac8c80c833e2f9f09b9d358c51c6359f1d306b
Gitweb: https://git.kernel.org/tip/0dac8c80c833e2f9f09b9d358c51c6359f1d306b
Author: Ingo Molnar 
AuthorDate: Mon, 3 Dec 2018 11:22:00 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Thu, 6 Dec 2018 14:12:31 -0300

tools lib traceevent: Fix diverse typos in comments

Go over the tools/ files that are maintained in Arnaldo's tree and
fix common typos: half of them were in comments, the other half
in JSON files.

No change in functionality intended.

Committer notes:

This was split from a larger patch as there are code that is,
additionally, maintained outside the kernel tree, so to ease cherry
picking and/or backporting, split this into multiple patches.

Signed-off-by: Ingo Molnar 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Steven Rostedt (VMware) 
Cc: Tzvetomir Stoyanov 
Link: http://lkml.kernel.org/r/20181203102200.ga104...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/lib/traceevent/event-parse.c | 12 ++--
 tools/lib/traceevent/plugin_kvm.c  |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c 
b/tools/lib/traceevent/event-parse.c
index ffa656b868a9..a5ed291b8a9f 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -1145,7 +1145,7 @@ static enum tep_event_type read_token(char **tok)
 }
 
 /**
- * tep_read_token - access to utilites to use the pevent parser
+ * tep_read_token - access to utilities to use the pevent parser
  * @tok: The token to return
  *
  * This will parse tokens from the string given by
@@ -3258,7 +3258,7 @@ static int event_read_print(struct tep_event *event)
  * @name: the name of the common field to return
  *
  * Returns a common field from the event by the given @name.
- * This only searchs the common fields and not all field.
+ * This only searches the common fields and not all field.
  */
 struct tep_format_field *
 tep_find_common_field(struct tep_event *event, const char *name)
@@ -3302,7 +3302,7 @@ tep_find_field(struct tep_event *event, const char *name)
  * @name: the name of the field
  *
  * Returns a field by the given @name.
- * This searchs the common field names first, then
+ * This searches the common field names first, then
  * the non-common ones if a common one was not found.
  */
 struct tep_format_field *
@@ -3841,7 +3841,7 @@ static void print_bitmask_to_seq(struct tep_handle 
*pevent,
/*
 * data points to a bit mask of size bytes.
 * In the kernel, this is an array of long words, thus
-* endianess is very important.
+* endianness is very important.
 */
if (pevent->file_bigendian)
index = size - (len + 1);
@@ -5316,9 +5316,9 @@ pid_from_cmdlist(struct tep_handle *pevent, const char 
*comm, struct cmdline *ne
  * This returns the cmdline structure that holds a pid for a given
  * comm, or NULL if none found. As there may be more than one pid for
  * a given comm, the result of this call can be passed back into
- * a recurring call in the @next paramater, and then it will find the
+ * a recurring call in the @next parameter, and then it will find the
  * next pid.
- * Also, it does a linear seach, so it may be slow.
+ * Also, it does a linear search, so it may be slow.
  */
 struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char 
*comm,
   struct cmdline *next)
diff --git a/tools/lib/traceevent/plugin_kvm.c 
b/tools/lib/traceevent/plugin_kvm.c
index 637be7c18476..754050eea467 100644
--- a/tools/lib/traceevent/plugin_kvm.c
+++ b/tools/lib/traceevent/plugin_kvm.c
@@ -387,7 +387,7 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct 
tep_record *record,
 
/*
 * We can only use the structure if file is of the same
-* endianess.
+* endianness.
 */
if (tep_is_file_bigendian(event->pevent) ==
tep_is_host_bigendian(event->pevent)) {


[tip:sched/core] sched/completions/Documentation: Clean up the document some more

2018-10-11 Thread tip-bot for Ingo Molnar
Commit-ID:  0c373344b5c1eaa9e186368a32a169a2802be3ca
Gitweb: https://git.kernel.org/tip/0c373344b5c1eaa9e186368a32a169a2802be3ca
Author: Ingo Molnar 
AuthorDate: Thu, 11 Oct 2018 10:36:23 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 11 Oct 2018 10:36:23 +0200

sched/completions/Documentation: Clean up the document some more

Refresh the document:

 - Remove unnecessary liguistic complexity and improve the clarity of the text

 - Improve the explanations all around

 - Remove unnecessary and stale version info

 - Fix whitespace noise

 - Make pseudo-code match kernel style

 - Fix minor syntax errors in pseudo-code

 - Use consistent denotation

 - Mark multi-CPU sequences more explicitly

 - Unbreak line breaks

 - Use quotes to refer to 'struct completion'

 - Use 'IRQ context' and 'IRQs' consistently

 - Improve grammar

 - etc.

Cc: John Garry 
Cc: Linus Torvalds 
Cc: Nicholas Mc Guire 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: cor...@lwn.net
Cc: linux-...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1539183392-239389-1-git-send-email-john.ga...@huawei.com
Signed-off-by: Ingo Molnar 
---
 Documentation/scheduler/completion.txt | 233 ++---
 1 file changed, 125 insertions(+), 108 deletions(-)

diff --git a/Documentation/scheduler/completion.txt 
b/Documentation/scheduler/completion.txt
index 108bd0f264b3..91a11a668354 100644
--- a/Documentation/scheduler/completion.txt
+++ b/Documentation/scheduler/completion.txt
@@ -1,146 +1,161 @@
-completions - wait for completion handling
-==
-
-This document was originally written based on 3.18.0 (linux-next)
+Completions - "wait for completion" barrier APIs
+
 
 Introduction:
 -
 
-If you have one or more threads of execution that must wait for some process
+If you have one or more threads that must wait for some kernel activity
 to have reached a point or a specific state, completions can provide a
 race-free solution to this problem. Semantically they are somewhat like a
-pthread_barrier and have similar use-cases.
+pthread_barrier() and have similar use-cases.
 
 Completions are a code synchronization mechanism which is preferable to any
-misuse of locks. Any time you think of using yield() or some quirky
-msleep(1) loop to allow something else to proceed, you probably want to
-look into using one of the wait_for_completion*() calls instead. The
-advantage of using completions is clear intent of the code, but also more
-efficient code as both threads can continue until the result is actually
-needed.
-
-Completions are built on top of the generic event infrastructure in Linux,
-with the event reduced to a simple flag (appropriately called "done") in
-struct completion that tells the waiting threads of execution if they
-can continue safely.
-
-As completions are scheduling related, the code is found in
+misuse of locks/semaphores and busy-loops. Any time you think of using
+yield() or some quirky msleep(1) loop to allow something else to proceed,
+you probably want to look into using one of the wait_for_completion*()
+calls and complete() instead.
+
+The advantage of using completions is that they have a well defined, focused
+purpose which makes it very easy to see the intent of the code, but they
+also result in more efficient code as all threads can continue execution
+until the result is actually needed, and both the waiting and the signalling
+is highly efficient using low level scheduler sleep/wakeup facilities.
+
+Completions are built on top of the waitqueue and wakeup infrastructure of
+the Linux scheduler. The event the threads on the waitqueue are waiting for
+is reduced to a simple flag in 'struct completion', appropriately called 
"done".
+
+As completions are scheduling related, the code can be found in
 kernel/sched/completion.c.
 
 
 Usage:
 --
 
-There are three parts to using completions, the initialization of the
-struct completion, the waiting part through a call to one of the variants of
-wait_for_completion() and the signaling side through a call to complete()
-or complete_all(). Further there are some helper functions for checking the
-state of completions.
+There are three main parts to using completions:
+
+ - the initialization of the 'struct completion' synchronization object
+ - the waiting part through a call to one of the variants of 
wait_for_completion(),
+ - the signaling side through a call to complete() or complete_all().
+
+There are also some helper functions for checking the state of completions.
+Note that while initialization must happen first, the waiting and signaling
+part can happen in any order. I.e. it's entirely normal for a thread
+to have marked a completion as 'done' before another thread checks whether
+it has to wait for it.
 
-To use completions one needs to include  and
-create a variable of type struct completion. The structure used for
-handling of completions 

[tip:sched/core] sched/completions/Documentation: Clean up the document some more

2018-10-11 Thread tip-bot for Ingo Molnar
Commit-ID:  0c373344b5c1eaa9e186368a32a169a2802be3ca
Gitweb: https://git.kernel.org/tip/0c373344b5c1eaa9e186368a32a169a2802be3ca
Author: Ingo Molnar 
AuthorDate: Thu, 11 Oct 2018 10:36:23 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 11 Oct 2018 10:36:23 +0200

sched/completions/Documentation: Clean up the document some more

Refresh the document:

 - Remove unnecessary liguistic complexity and improve the clarity of the text

 - Improve the explanations all around

 - Remove unnecessary and stale version info

 - Fix whitespace noise

 - Make pseudo-code match kernel style

 - Fix minor syntax errors in pseudo-code

 - Use consistent denotation

 - Mark multi-CPU sequences more explicitly

 - Unbreak line breaks

 - Use quotes to refer to 'struct completion'

 - Use 'IRQ context' and 'IRQs' consistently

 - Improve grammar

 - etc.

Cc: John Garry 
Cc: Linus Torvalds 
Cc: Nicholas Mc Guire 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: cor...@lwn.net
Cc: linux-...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1539183392-239389-1-git-send-email-john.ga...@huawei.com
Signed-off-by: Ingo Molnar 
---
 Documentation/scheduler/completion.txt | 233 ++---
 1 file changed, 125 insertions(+), 108 deletions(-)

diff --git a/Documentation/scheduler/completion.txt 
b/Documentation/scheduler/completion.txt
index 108bd0f264b3..91a11a668354 100644
--- a/Documentation/scheduler/completion.txt
+++ b/Documentation/scheduler/completion.txt
@@ -1,146 +1,161 @@
-completions - wait for completion handling
-==
-
-This document was originally written based on 3.18.0 (linux-next)
+Completions - "wait for completion" barrier APIs
+
 
 Introduction:
 -
 
-If you have one or more threads of execution that must wait for some process
+If you have one or more threads that must wait for some kernel activity
 to have reached a point or a specific state, completions can provide a
 race-free solution to this problem. Semantically they are somewhat like a
-pthread_barrier and have similar use-cases.
+pthread_barrier() and have similar use-cases.
 
 Completions are a code synchronization mechanism which is preferable to any
-misuse of locks. Any time you think of using yield() or some quirky
-msleep(1) loop to allow something else to proceed, you probably want to
-look into using one of the wait_for_completion*() calls instead. The
-advantage of using completions is clear intent of the code, but also more
-efficient code as both threads can continue until the result is actually
-needed.
-
-Completions are built on top of the generic event infrastructure in Linux,
-with the event reduced to a simple flag (appropriately called "done") in
-struct completion that tells the waiting threads of execution if they
-can continue safely.
-
-As completions are scheduling related, the code is found in
+misuse of locks/semaphores and busy-loops. Any time you think of using
+yield() or some quirky msleep(1) loop to allow something else to proceed,
+you probably want to look into using one of the wait_for_completion*()
+calls and complete() instead.
+
+The advantage of using completions is that they have a well defined, focused
+purpose which makes it very easy to see the intent of the code, but they
+also result in more efficient code as all threads can continue execution
+until the result is actually needed, and both the waiting and the signalling
+is highly efficient using low level scheduler sleep/wakeup facilities.
+
+Completions are built on top of the waitqueue and wakeup infrastructure of
+the Linux scheduler. The event the threads on the waitqueue are waiting for
+is reduced to a simple flag in 'struct completion', appropriately called 
"done".
+
+As completions are scheduling related, the code can be found in
 kernel/sched/completion.c.
 
 
 Usage:
 --
 
-There are three parts to using completions, the initialization of the
-struct completion, the waiting part through a call to one of the variants of
-wait_for_completion() and the signaling side through a call to complete()
-or complete_all(). Further there are some helper functions for checking the
-state of completions.
+There are three main parts to using completions:
+
+ - the initialization of the 'struct completion' synchronization object
+ - the waiting part through a call to one of the variants of 
wait_for_completion(),
+ - the signaling side through a call to complete() or complete_all().
+
+There are also some helper functions for checking the state of completions.
+Note that while initialization must happen first, the waiting and signaling
+part can happen in any order. I.e. it's entirely normal for a thread
+to have marked a completion as 'done' before another thread checks whether
+it has to wait for it.
 
-To use completions one needs to include  and
-create a variable of type struct completion. The structure used for
-handling of completions 

[tip:x86/asm] x86/segments: Introduce the 'CPUNODE' naming to better document the segment limit CPU/node NR trick

2018-10-08 Thread tip-bot for Ingo Molnar
Commit-ID:  22245bdf0ad805d6c29f82b6d5e977ee94bb2166
Gitweb: https://git.kernel.org/tip/22245bdf0ad805d6c29f82b6d5e977ee94bb2166
Author: Ingo Molnar 
AuthorDate: Mon, 8 Oct 2018 10:41:59 +0200
Committer:  Ingo Molnar 
CommitDate: Mon, 8 Oct 2018 10:45:02 +0200

x86/segments: Introduce the 'CPUNODE' naming to better document the segment 
limit CPU/node NR trick

We have a special segment descriptor entry in the GDT, whose sole purpose is to
encode the CPU and node numbers in its limit (size) field. There are user-space
instructions that allow the reading of the limit field, which gives us a really
fast way to read the CPU and node IDs from the vDSO for example.

But the naming of related functionality does not make this clear, at all:

VDSO_CPU_SIZE
VDSO_CPU_MASK
__CPU_NUMBER_SEG
GDT_ENTRY_CPU_NUMBER
vdso_encode_cpu_node
vdso_read_cpu_node

There's a number of problems:

 - The 'VDSO_CPU_SIZE' doesn't really make it clear that these are number
   of bits, nor does it make it clear which 'CPU' this refers to, i.e.
   that this is about a GDT entry whose limit encodes the CPU and node number.

 - Furthermore, the 'CPU_NUMBER' naming is actively misleading as well,
   because the segment limit encodes not just the CPU number but the
   node ID as well ...

So use a better nomenclature all around: name everything related to this trick
as 'CPUNODE', to make it clear that this is something special, and add
_BITS to make it clear that these are number of bits, and propagate this to
every affected name:

VDSO_CPU_SIZE =>  VDSO_CPUNODE_BITS
VDSO_CPU_MASK =>  VDSO_CPUNODE_MASK
__CPU_NUMBER_SEG  =>  __CPUNODE_SEG
GDT_ENTRY_CPU_NUMBER  =>  GDT_ENTRY_CPUNODE
vdso_encode_cpu_node  =>  vdso_encode_cpunode
vdso_read_cpu_node=>  vdso_read_cpunode

This, beyond being less confusing, also makes it easier to grep for all related
functionality:

  $ git grep -i cpunode arch/x86

Also, while at it, fix "return is not a function" style sloppiness in 
vdso_encode_cpunode().

Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Chang S. Bae 
Cc: Dave Hansen 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Markus T Metzger 
Cc: Peter Zijlstra 
Cc: Ravi Shankar 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: linux-kernel@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1537312139-5580-2-git-send-email-chang.seok@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/vdso/vgetcpu.c  |  2 +-
 arch/x86/include/asm/segment.h | 22 +++---
 arch/x86/kernel/cpu/common.c   |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c
index de78fc9cd963..edd214f5264d 100644
--- a/arch/x86/entry/vdso/vgetcpu.c
+++ b/arch/x86/entry/vdso/vgetcpu.c
@@ -13,7 +13,7 @@
 notrace long
 __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
 {
-   vdso_read_cpu_node(cpu, node);
+   vdso_read_cpunode(cpu, node);
return 0;
 }
 
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 4d1f6cc62e13..a314087add07 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -186,7 +186,7 @@
 #define GDT_ENTRY_TLS_MIN  12
 #define GDT_ENTRY_TLS_MAX  14
 
-#define GDT_ENTRY_CPU_NUMBER   15
+#define GDT_ENTRY_CPUNODE  15
 
 /*
  * Number of entries in the GDT table:
@@ -206,7 +206,7 @@
 #define __USER_DS  (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
 #define __USER32_DS__USER_DS
 #define __USER_CS  (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
-#define __CPU_NUMBER_SEG   (GDT_ENTRY_CPU_NUMBER*8 + 3)
+#define __CPUNODE_SEG  (GDT_ENTRY_CPUNODE*8 + 3)
 
 #endif
 
@@ -227,24 +227,24 @@
 #ifdef CONFIG_X86_64
 
 /* Bit size and mask of CPU number stored in the per CPU data (and TSC_AUX) */
-#define VDSO_CPU_SIZE  12
-#define VDSO_CPU_MASK  0xfff
+#define VDSO_CPUNODE_BITS  12
+#define VDSO_CPUNODE_MASK  0xfff
 
 #ifndef __ASSEMBLY__
 
 /* Helper functions to store/load CPU and node numbers */
 
-static inline unsigned long vdso_encode_cpu_node(int cpu, unsigned long node)
+static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node)
 {
-   return ((node << VDSO_CPU_SIZE) | cpu);
+   return (node << VDSO_CPUNODE_BITS) | cpu;
 }
 
-static inline void vdso_read_cpu_node(unsigned *cpu, unsigned *node)
+static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
 {
unsigned int p;
 
/*
-* Load CPU and node number from GDT.  LSL is faster than RDTSCP
+* Load CPU and node number from the GDT.  LSL is faster than RDTSCP
 * and works on all CPUs.  This is volatile so that it orders
 * correctly with respect to 

[tip:x86/asm] x86/fsgsbase/64: Clean up various details

2018-10-08 Thread tip-bot for Ingo Molnar
Commit-ID:  ec3a94188df7d28b374868d9a2a0face910e62ab
Gitweb: https://git.kernel.org/tip/ec3a94188df7d28b374868d9a2a0face910e62ab
Author: Ingo Molnar 
AuthorDate: Mon, 8 Oct 2018 10:41:59 +0200
Committer:  Ingo Molnar 
CommitDate: Mon, 8 Oct 2018 10:45:04 +0200

x86/fsgsbase/64: Clean up various details

So:

 - use 'extern' consistently for APIs

 - fix weird header guard

 - clarify code comments

 - reorder APIs by type

Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Chang S. Bae 
Cc: Dave Hansen 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Markus T Metzger 
Cc: Peter Zijlstra 
Cc: Ravi Shankar 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: linux-kernel@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1537312139-5580-2-git-send-email-chang.seok@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/vdso/vgetcpu.c   |  1 +
 arch/x86/include/asm/fsgsbase.h | 22 --
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c
index edd214f5264d..f86ab0ae1777 100644
--- a/arch/x86/entry/vdso/vgetcpu.c
+++ b/arch/x86/entry/vdso/vgetcpu.c
@@ -14,6 +14,7 @@ notrace long
 __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
 {
vdso_read_cpunode(cpu, node);
+
return 0;
 }
 
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index 5e9cbcce318a..eb377b6e9eed 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_FSGSBASE_H
-#define _ASM_FSGSBASE_H 1
+#define _ASM_FSGSBASE_H
 
 #ifndef __ASSEMBLY__
 
@@ -9,14 +9,15 @@
 #include 
 
 /*
- * Read/write a task's fsbase or gsbase. This returns the value that
+ * Read/write a task's FSBASE or GSBASE. This returns the value that
  * the FS/GS base would have (if the task were to be resumed). These
- * work on current or on a different non-running task.
+ * work on the current task or on a non-running (typically stopped
+ * ptrace child) task.
  */
-unsigned long x86_fsbase_read_task(struct task_struct *task);
-unsigned long x86_gsbase_read_task(struct task_struct *task);
-int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
-int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
+extern unsigned long x86_fsbase_read_task(struct task_struct *task);
+extern unsigned long x86_gsbase_read_task(struct task_struct *task);
+extern int x86_fsbase_write_task(struct task_struct *task, unsigned long 
fsbase);
+extern int x86_gsbase_write_task(struct task_struct *task, unsigned long 
gsbase);
 
 /* Helper functions for reading/writing FS/GS base */
 
@@ -25,20 +26,21 @@ static inline unsigned long x86_fsbase_read_cpu(void)
unsigned long fsbase;
 
rdmsrl(MSR_FS_BASE, fsbase);
+
return fsbase;
 }
 
-void x86_fsbase_write_cpu(unsigned long fsbase);
-
 static inline unsigned long x86_gsbase_read_cpu_inactive(void)
 {
unsigned long gsbase;
 
rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
+
return gsbase;
 }
 
-void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
+extern void x86_fsbase_write_cpu(unsigned long fsbase);
+extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
 
 #endif /* CONFIG_X86_64 */
 


[tip:x86/asm] x86/segments: Introduce the 'CPUNODE' naming to better document the segment limit CPU/node NR trick

2018-10-08 Thread tip-bot for Ingo Molnar
Commit-ID:  22245bdf0ad805d6c29f82b6d5e977ee94bb2166
Gitweb: https://git.kernel.org/tip/22245bdf0ad805d6c29f82b6d5e977ee94bb2166
Author: Ingo Molnar 
AuthorDate: Mon, 8 Oct 2018 10:41:59 +0200
Committer:  Ingo Molnar 
CommitDate: Mon, 8 Oct 2018 10:45:02 +0200

x86/segments: Introduce the 'CPUNODE' naming to better document the segment 
limit CPU/node NR trick

We have a special segment descriptor entry in the GDT, whose sole purpose is to
encode the CPU and node numbers in its limit (size) field. There are user-space
instructions that allow the reading of the limit field, which gives us a really
fast way to read the CPU and node IDs from the vDSO for example.

But the naming of related functionality does not make this clear, at all:

VDSO_CPU_SIZE
VDSO_CPU_MASK
__CPU_NUMBER_SEG
GDT_ENTRY_CPU_NUMBER
vdso_encode_cpu_node
vdso_read_cpu_node

There's a number of problems:

 - The 'VDSO_CPU_SIZE' doesn't really make it clear that these are number
   of bits, nor does it make it clear which 'CPU' this refers to, i.e.
   that this is about a GDT entry whose limit encodes the CPU and node number.

 - Furthermore, the 'CPU_NUMBER' naming is actively misleading as well,
   because the segment limit encodes not just the CPU number but the
   node ID as well ...

So use a better nomenclature all around: name everything related to this trick
as 'CPUNODE', to make it clear that this is something special, and add
_BITS to make it clear that these are number of bits, and propagate this to
every affected name:

VDSO_CPU_SIZE =>  VDSO_CPUNODE_BITS
VDSO_CPU_MASK =>  VDSO_CPUNODE_MASK
__CPU_NUMBER_SEG  =>  __CPUNODE_SEG
GDT_ENTRY_CPU_NUMBER  =>  GDT_ENTRY_CPUNODE
vdso_encode_cpu_node  =>  vdso_encode_cpunode
vdso_read_cpu_node=>  vdso_read_cpunode

This, beyond being less confusing, also makes it easier to grep for all related
functionality:

  $ git grep -i cpunode arch/x86

Also, while at it, fix "return is not a function" style sloppiness in 
vdso_encode_cpunode().

Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Chang S. Bae 
Cc: Dave Hansen 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Markus T Metzger 
Cc: Peter Zijlstra 
Cc: Ravi Shankar 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: linux-kernel@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1537312139-5580-2-git-send-email-chang.seok@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/vdso/vgetcpu.c  |  2 +-
 arch/x86/include/asm/segment.h | 22 +++---
 arch/x86/kernel/cpu/common.c   |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c
index de78fc9cd963..edd214f5264d 100644
--- a/arch/x86/entry/vdso/vgetcpu.c
+++ b/arch/x86/entry/vdso/vgetcpu.c
@@ -13,7 +13,7 @@
 notrace long
 __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
 {
-   vdso_read_cpu_node(cpu, node);
+   vdso_read_cpunode(cpu, node);
return 0;
 }
 
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 4d1f6cc62e13..a314087add07 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -186,7 +186,7 @@
 #define GDT_ENTRY_TLS_MIN  12
 #define GDT_ENTRY_TLS_MAX  14
 
-#define GDT_ENTRY_CPU_NUMBER   15
+#define GDT_ENTRY_CPUNODE  15
 
 /*
  * Number of entries in the GDT table:
@@ -206,7 +206,7 @@
 #define __USER_DS  (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
 #define __USER32_DS__USER_DS
 #define __USER_CS  (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
-#define __CPU_NUMBER_SEG   (GDT_ENTRY_CPU_NUMBER*8 + 3)
+#define __CPUNODE_SEG  (GDT_ENTRY_CPUNODE*8 + 3)
 
 #endif
 
@@ -227,24 +227,24 @@
 #ifdef CONFIG_X86_64
 
 /* Bit size and mask of CPU number stored in the per CPU data (and TSC_AUX) */
-#define VDSO_CPU_SIZE  12
-#define VDSO_CPU_MASK  0xfff
+#define VDSO_CPUNODE_BITS  12
+#define VDSO_CPUNODE_MASK  0xfff
 
 #ifndef __ASSEMBLY__
 
 /* Helper functions to store/load CPU and node numbers */
 
-static inline unsigned long vdso_encode_cpu_node(int cpu, unsigned long node)
+static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node)
 {
-   return ((node << VDSO_CPU_SIZE) | cpu);
+   return (node << VDSO_CPUNODE_BITS) | cpu;
 }
 
-static inline void vdso_read_cpu_node(unsigned *cpu, unsigned *node)
+static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
 {
unsigned int p;
 
/*
-* Load CPU and node number from GDT.  LSL is faster than RDTSCP
+* Load CPU and node number from the GDT.  LSL is faster than RDTSCP
 * and works on all CPUs.  This is volatile so that it orders
 * correctly with respect to 

[tip:x86/asm] x86/fsgsbase/64: Clean up various details

2018-10-08 Thread tip-bot for Ingo Molnar
Commit-ID:  ec3a94188df7d28b374868d9a2a0face910e62ab
Gitweb: https://git.kernel.org/tip/ec3a94188df7d28b374868d9a2a0face910e62ab
Author: Ingo Molnar 
AuthorDate: Mon, 8 Oct 2018 10:41:59 +0200
Committer:  Ingo Molnar 
CommitDate: Mon, 8 Oct 2018 10:45:04 +0200

x86/fsgsbase/64: Clean up various details

So:

 - use 'extern' consistently for APIs

 - fix weird header guard

 - clarify code comments

 - reorder APIs by type

Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Chang S. Bae 
Cc: Dave Hansen 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Markus T Metzger 
Cc: Peter Zijlstra 
Cc: Ravi Shankar 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: linux-kernel@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1537312139-5580-2-git-send-email-chang.seok@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/vdso/vgetcpu.c   |  1 +
 arch/x86/include/asm/fsgsbase.h | 22 --
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c
index edd214f5264d..f86ab0ae1777 100644
--- a/arch/x86/entry/vdso/vgetcpu.c
+++ b/arch/x86/entry/vdso/vgetcpu.c
@@ -14,6 +14,7 @@ notrace long
 __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
 {
vdso_read_cpunode(cpu, node);
+
return 0;
 }
 
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index 5e9cbcce318a..eb377b6e9eed 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_FSGSBASE_H
-#define _ASM_FSGSBASE_H 1
+#define _ASM_FSGSBASE_H
 
 #ifndef __ASSEMBLY__
 
@@ -9,14 +9,15 @@
 #include 
 
 /*
- * Read/write a task's fsbase or gsbase. This returns the value that
+ * Read/write a task's FSBASE or GSBASE. This returns the value that
  * the FS/GS base would have (if the task were to be resumed). These
- * work on current or on a different non-running task.
+ * work on the current task or on a non-running (typically stopped
+ * ptrace child) task.
  */
-unsigned long x86_fsbase_read_task(struct task_struct *task);
-unsigned long x86_gsbase_read_task(struct task_struct *task);
-int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
-int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
+extern unsigned long x86_fsbase_read_task(struct task_struct *task);
+extern unsigned long x86_gsbase_read_task(struct task_struct *task);
+extern int x86_fsbase_write_task(struct task_struct *task, unsigned long 
fsbase);
+extern int x86_gsbase_write_task(struct task_struct *task, unsigned long 
gsbase);
 
 /* Helper functions for reading/writing FS/GS base */
 
@@ -25,20 +26,21 @@ static inline unsigned long x86_fsbase_read_cpu(void)
unsigned long fsbase;
 
rdmsrl(MSR_FS_BASE, fsbase);
+
return fsbase;
 }
 
-void x86_fsbase_write_cpu(unsigned long fsbase);
-
 static inline unsigned long x86_gsbase_read_cpu_inactive(void)
 {
unsigned long gsbase;
 
rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
+
return gsbase;
 }
 
-void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
+extern void x86_fsbase_write_cpu(unsigned long fsbase);
+extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
 
 #endif /* CONFIG_X86_64 */
 


[tip:efi/core] efi/x86: Clean up the eboot code

2018-07-15 Thread tip-bot for Ingo Molnar
Commit-ID:  90a2186b7df183c2fd35f724d0d16a0c10fac9b2
Gitweb: https://git.kernel.org/tip/90a2186b7df183c2fd35f724d0d16a0c10fac9b2
Author: Ingo Molnar 
AuthorDate: Wed, 11 Jul 2018 11:40:33 +0200
Committer:  Ingo Molnar 
CommitDate: Mon, 16 Jul 2018 00:43:05 +0200

efi/x86: Clean up the eboot code

Various small cleanups:

 - Standardize printk messages:

 'alloc' => 'allocate'
 'mem'   => 'memory'

   also put variable names in printk messages between quotes.

 - Align mass-assignments vertically for better readability

 - Break multi-line function prototypes at the name where possible,
   not in the middle of the parameter list

 - Use a newline before return statements consistently.

 - Use curly braces in a balanced fashion.

 - Remove stray newlines.

No change in functionality.

Signed-off-by: Ingo Molnar 
Signed-off-by: Ard Biesheuvel 
Cc: Linus Torvalds 
Cc: Matt Fleming 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux-...@vger.kernel.org
Link: http://lkml.kernel.org/r/20180711094040.12506-2-ard.biesheu...@linaro.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/boot/compressed/eboot.c | 245 ---
 1 file changed, 125 insertions(+), 120 deletions(-)

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index e98522ea6f09..9f6813493945 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -34,9 +34,9 @@ static void setup_boot_services##bits(struct efi_config *c)   
\
\
table = (typeof(table))sys_table;   \
\
-   c->runtime_services = table->runtime;   \
-   c->boot_services = table->boottime; \
-   c->text_output = table->con_out;\
+   c->runtime_services = table->runtime;   \
+   c->boot_services= table->boottime;  \
+   c->text_output  = table->con_out;   \
 }
 BOOT_SERVICES(32);
 BOOT_SERVICES(64);
@@ -64,6 +64,7 @@ static inline efi_status_t __open_volume32(void *__image, 
void **__fh)
efi_printk(sys_table, "Failed to open volume\n");
 
*__fh = fh;
+
return status;
 }
 
@@ -90,6 +91,7 @@ static inline efi_status_t __open_volume64(void *__image, 
void **__fh)
efi_printk(sys_table, "Failed to open volume\n");
 
*__fh = fh;
+
return status;
 }
 
@@ -134,16 +136,16 @@ __setup_efi_pci(efi_pci_io_protocol_t *pci, struct 
pci_setup_rom **__rom)
 
status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, );
if (status != EFI_SUCCESS) {
-   efi_printk(sys_table, "Failed to alloc mem for rom\n");
+   efi_printk(sys_table, "Failed to allocate memory for 'rom'\n");
return status;
}
 
memset(rom, 0, sizeof(*rom));
 
-   rom->data.type = SETUP_PCI;
-   rom->data.len = size - sizeof(struct setup_data);
-   rom->data.next = 0;
-   rom->pcilen = pci->romsize;
+   rom->data.type  = SETUP_PCI;
+   rom->data.len   = size - sizeof(struct setup_data);
+   rom->data.next  = 0;
+   rom->pcilen = pci->romsize;
*__rom = rom;
 
status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
@@ -180,8 +182,7 @@ free_struct:
 }
 
 static void
-setup_efi_pci32(struct boot_params *params, void **pci_handle,
-   unsigned long size)
+setup_efi_pci32(struct boot_params *params, void **pci_handle, unsigned long 
size)
 {
efi_pci_io_protocol_t *pci = NULL;
efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
@@ -220,13 +221,11 @@ setup_efi_pci32(struct boot_params *params, void 
**pci_handle,
params->hdr.setup_data = (unsigned long)rom;
 
data = (struct setup_data *)rom;
-
}
 }
 
 static void
-setup_efi_pci64(struct boot_params *params, void **pci_handle,
-   unsigned long size)
+setup_efi_pci64(struct boot_params *params, void **pci_handle, unsigned long 
size)
 {
efi_pci_io_protocol_t *pci = NULL;
efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
@@ -265,7 +264,6 @@ setup_efi_pci64(struct boot_params *params, void 
**pci_handle,
params->hdr.setup_data = (unsigned long)rom;
 
data = (struct setup_data *)rom;
-
}
 }
 
@@ -295,7 +293,7 @@ static void setup_efi_pci(struct boot_params *params)
size, (void **)_handle);
 
if (status != EFI_SUCCESS) {
-   efi_printk(sys_table, "Failed to alloc mem for 
pci_handle\n");
+   efi_printk(sys_table, "Failed to allocate memory for 
'pci_handle'\n");
return;
  

[tip:efi/core] efi/x86: Clean up the eboot code

2018-07-15 Thread tip-bot for Ingo Molnar
Commit-ID:  90a2186b7df183c2fd35f724d0d16a0c10fac9b2
Gitweb: https://git.kernel.org/tip/90a2186b7df183c2fd35f724d0d16a0c10fac9b2
Author: Ingo Molnar 
AuthorDate: Wed, 11 Jul 2018 11:40:33 +0200
Committer:  Ingo Molnar 
CommitDate: Mon, 16 Jul 2018 00:43:05 +0200

efi/x86: Clean up the eboot code

Various small cleanups:

 - Standardize printk messages:

 'alloc' => 'allocate'
 'mem'   => 'memory'

   also put variable names in printk messages between quotes.

 - Align mass-assignments vertically for better readability

 - Break multi-line function prototypes at the name where possible,
   not in the middle of the parameter list

 - Use a newline before return statements consistently.

 - Use curly braces in a balanced fashion.

 - Remove stray newlines.

No change in functionality.

Signed-off-by: Ingo Molnar 
Signed-off-by: Ard Biesheuvel 
Cc: Linus Torvalds 
Cc: Matt Fleming 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux-...@vger.kernel.org
Link: http://lkml.kernel.org/r/20180711094040.12506-2-ard.biesheu...@linaro.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/boot/compressed/eboot.c | 245 ---
 1 file changed, 125 insertions(+), 120 deletions(-)

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index e98522ea6f09..9f6813493945 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -34,9 +34,9 @@ static void setup_boot_services##bits(struct efi_config *c)   
\
\
table = (typeof(table))sys_table;   \
\
-   c->runtime_services = table->runtime;   \
-   c->boot_services = table->boottime; \
-   c->text_output = table->con_out;\
+   c->runtime_services = table->runtime;   \
+   c->boot_services= table->boottime;  \
+   c->text_output  = table->con_out;   \
 }
 BOOT_SERVICES(32);
 BOOT_SERVICES(64);
@@ -64,6 +64,7 @@ static inline efi_status_t __open_volume32(void *__image, 
void **__fh)
efi_printk(sys_table, "Failed to open volume\n");
 
*__fh = fh;
+
return status;
 }
 
@@ -90,6 +91,7 @@ static inline efi_status_t __open_volume64(void *__image, 
void **__fh)
efi_printk(sys_table, "Failed to open volume\n");
 
*__fh = fh;
+
return status;
 }
 
@@ -134,16 +136,16 @@ __setup_efi_pci(efi_pci_io_protocol_t *pci, struct 
pci_setup_rom **__rom)
 
status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, );
if (status != EFI_SUCCESS) {
-   efi_printk(sys_table, "Failed to alloc mem for rom\n");
+   efi_printk(sys_table, "Failed to allocate memory for 'rom'\n");
return status;
}
 
memset(rom, 0, sizeof(*rom));
 
-   rom->data.type = SETUP_PCI;
-   rom->data.len = size - sizeof(struct setup_data);
-   rom->data.next = 0;
-   rom->pcilen = pci->romsize;
+   rom->data.type  = SETUP_PCI;
+   rom->data.len   = size - sizeof(struct setup_data);
+   rom->data.next  = 0;
+   rom->pcilen = pci->romsize;
*__rom = rom;
 
status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
@@ -180,8 +182,7 @@ free_struct:
 }
 
 static void
-setup_efi_pci32(struct boot_params *params, void **pci_handle,
-   unsigned long size)
+setup_efi_pci32(struct boot_params *params, void **pci_handle, unsigned long 
size)
 {
efi_pci_io_protocol_t *pci = NULL;
efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
@@ -220,13 +221,11 @@ setup_efi_pci32(struct boot_params *params, void 
**pci_handle,
params->hdr.setup_data = (unsigned long)rom;
 
data = (struct setup_data *)rom;
-
}
 }
 
 static void
-setup_efi_pci64(struct boot_params *params, void **pci_handle,
-   unsigned long size)
+setup_efi_pci64(struct boot_params *params, void **pci_handle, unsigned long 
size)
 {
efi_pci_io_protocol_t *pci = NULL;
efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
@@ -265,7 +264,6 @@ setup_efi_pci64(struct boot_params *params, void 
**pci_handle,
params->hdr.setup_data = (unsigned long)rom;
 
data = (struct setup_data *)rom;
-
}
 }
 
@@ -295,7 +293,7 @@ static void setup_efi_pci(struct boot_params *params)
size, (void **)_handle);
 
if (status != EFI_SUCCESS) {
-   efi_printk(sys_table, "Failed to alloc mem for 
pci_handle\n");
+   efi_printk(sys_table, "Failed to allocate memory for 
'pci_handle'\n");
return;
  

[tip:x86/cache] x86/intel_rdt: Simplify index type

2018-06-21 Thread tip-bot for Ingo Molnar
Commit-ID:  a449a5325528c1ef2c5efae0fa422bf1c4a270e6
Gitweb: https://git.kernel.org/tip/a449a5325528c1ef2c5efae0fa422bf1c4a270e6
Author: Ingo Molnar 
AuthorDate: Tue, 19 Jun 2018 23:19:27 -0700
Committer:  Ingo Molnar 
CommitDate: Thu, 21 Jun 2018 11:21:35 +0200

x86/intel_rdt: Simplify index type

Simplify this pattern:

 #ifdef CONFIG_X86_64
u64 i;
 #else
u32 i;
 #endif

... to the more natural and shorter one:

unsigned long i;

No change in functionality.

Acked-by Thomas Gleixner 
Cc: Reinette Chatre 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: fenghua...@intel.com
Cc: h...@zytor.com
Cc: tony.l...@intel.com
Fixes: 0438fb1aebf4 ("x86/intel_rdt: Pseudo-lock region creation/removal core")
Link: 
https://lkml.kernel.org/r/5773274f9947c4d8becbabd2655bd1628f060147.1529474468.git.reinette.cha...@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c | 18 +++---
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c 
b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
index a1670e50d6ce..df68972d5e3e 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -415,11 +415,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
struct rdtgroup *rdtgrp = _rdtgrp;
struct pseudo_lock_region *plr = rdtgrp->plr;
u32 rmid_p, closid_p;
-#ifdef CONFIG_X86_64
-   u64 i;
-#else
-   u32 i;
-#endif
+   unsigned long i;
 #ifdef CONFIG_KASAN
/*
 * The registers used for local register variables are also used
@@ -874,11 +870,7 @@ static int measure_cycles_lat_fn(void *_plr)
 {
struct pseudo_lock_region *plr = _plr;
u64 start, end;
-#ifdef CONFIG_X86_64
-   u64 i;
-#else
-   u32 i;
-#endif
+   unsigned long i;
 #ifdef CONFIG_KASAN
/*
 * The registers used for local register variables are also used
@@ -932,11 +924,7 @@ static int measure_cycles_perf_fn(void *_plr)
struct pseudo_lock_region *plr = _plr;
unsigned long long l2_hits, l2_miss;
u64 l2_hit_bits, l2_miss_bits;
-#ifdef CONFIG_X86_64
-   u64 i;
-#else
-   u32 i;
-#endif
+   unsigned long i;
 #ifdef CONFIG_KASAN
/*
 * The registers used for local register variables are also used


[tip:x86/cache] x86/intel_rdt: Simplify index type

2018-06-21 Thread tip-bot for Ingo Molnar
Commit-ID:  a449a5325528c1ef2c5efae0fa422bf1c4a270e6
Gitweb: https://git.kernel.org/tip/a449a5325528c1ef2c5efae0fa422bf1c4a270e6
Author: Ingo Molnar 
AuthorDate: Tue, 19 Jun 2018 23:19:27 -0700
Committer:  Ingo Molnar 
CommitDate: Thu, 21 Jun 2018 11:21:35 +0200

x86/intel_rdt: Simplify index type

Simplify this pattern:

 #ifdef CONFIG_X86_64
u64 i;
 #else
u32 i;
 #endif

... to the more natural and shorter one:

unsigned long i;

No change in functionality.

Acked-by Thomas Gleixner 
Cc: Reinette Chatre 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: fenghua...@intel.com
Cc: h...@zytor.com
Cc: tony.l...@intel.com
Fixes: 0438fb1aebf4 ("x86/intel_rdt: Pseudo-lock region creation/removal core")
Link: 
https://lkml.kernel.org/r/5773274f9947c4d8becbabd2655bd1628f060147.1529474468.git.reinette.cha...@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c | 18 +++---
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c 
b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
index a1670e50d6ce..df68972d5e3e 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -415,11 +415,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
struct rdtgroup *rdtgrp = _rdtgrp;
struct pseudo_lock_region *plr = rdtgrp->plr;
u32 rmid_p, closid_p;
-#ifdef CONFIG_X86_64
-   u64 i;
-#else
-   u32 i;
-#endif
+   unsigned long i;
 #ifdef CONFIG_KASAN
/*
 * The registers used for local register variables are also used
@@ -874,11 +870,7 @@ static int measure_cycles_lat_fn(void *_plr)
 {
struct pseudo_lock_region *plr = _plr;
u64 start, end;
-#ifdef CONFIG_X86_64
-   u64 i;
-#else
-   u32 i;
-#endif
+   unsigned long i;
 #ifdef CONFIG_KASAN
/*
 * The registers used for local register variables are also used
@@ -932,11 +924,7 @@ static int measure_cycles_perf_fn(void *_plr)
struct pseudo_lock_region *plr = _plr;
unsigned long long l2_hits, l2_miss;
u64 l2_hit_bits, l2_miss_bits;
-#ifdef CONFIG_X86_64
-   u64 i;
-#else
-   u32 i;
-#endif
+   unsigned long i;
 #ifdef CONFIG_KASAN
/*
 * The registers used for local register variables are also used


[tip:x86/urgent] x86/mpx/selftests: Adjust the self-test to fresh distros that export the MPX ABI

2018-05-14 Thread tip-bot for Ingo Molnar
Commit-ID:  73bb4d6cd192b8629c5125aaada9892d9fc986b6
Gitweb: https://git.kernel.org/tip/73bb4d6cd192b8629c5125aaada9892d9fc986b6
Author: Ingo Molnar 
AuthorDate: Mon, 14 May 2018 10:59:08 +0200
Committer:  Ingo Molnar 
CommitDate: Mon, 14 May 2018 11:14:45 +0200

x86/mpx/selftests: Adjust the self-test to fresh distros that export the MPX ABI

Fix this warning:

  mpx-mini-test.c:422:0: warning: "SEGV_BNDERR" redefined

Cc: Dave Hansen 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@linux-foundation.org
Cc: dave.han...@intel.com
Cc: linux...@kvack.org
Cc: linux...@us.ibm.com
Cc: m...@ellerman.id.au
Cc: shake...@google.com
Cc: sh...@kernel.org
Link: http://lkml.kernel.org/r/20180514085908.ga12...@gmail.com
Signed-off-by: Ingo Molnar 
---
 tools/testing/selftests/x86/mpx-mini-test.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/x86/mpx-mini-test.c 
b/tools/testing/selftests/x86/mpx-mini-test.c
index 9c0325e1ea68..50f7e9272481 100644
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -368,6 +368,11 @@ static int expected_bnd_index = -1;
 uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers 
*/
 unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
 
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR   3
+#endif
+
 /*
  * The kernel is supposed to provide some information about the bounds
  * exception in the siginfo.  It should match what we have in the bounds
@@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext)
br_count++;
dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
 
-#define SEGV_BNDERR 3  /* failed address bound checks */
-
dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
status, ip, br_reason);
dprintf2("si_signo: %d\n", si->si_signo);


[tip:x86/urgent] x86/mpx/selftests: Adjust the self-test to fresh distros that export the MPX ABI

2018-05-14 Thread tip-bot for Ingo Molnar
Commit-ID:  73bb4d6cd192b8629c5125aaada9892d9fc986b6
Gitweb: https://git.kernel.org/tip/73bb4d6cd192b8629c5125aaada9892d9fc986b6
Author: Ingo Molnar 
AuthorDate: Mon, 14 May 2018 10:59:08 +0200
Committer:  Ingo Molnar 
CommitDate: Mon, 14 May 2018 11:14:45 +0200

x86/mpx/selftests: Adjust the self-test to fresh distros that export the MPX ABI

Fix this warning:

  mpx-mini-test.c:422:0: warning: "SEGV_BNDERR" redefined

Cc: Dave Hansen 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@linux-foundation.org
Cc: dave.han...@intel.com
Cc: linux...@kvack.org
Cc: linux...@us.ibm.com
Cc: m...@ellerman.id.au
Cc: shake...@google.com
Cc: sh...@kernel.org
Link: http://lkml.kernel.org/r/20180514085908.ga12...@gmail.com
Signed-off-by: Ingo Molnar 
---
 tools/testing/selftests/x86/mpx-mini-test.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/x86/mpx-mini-test.c 
b/tools/testing/selftests/x86/mpx-mini-test.c
index 9c0325e1ea68..50f7e9272481 100644
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -368,6 +368,11 @@ static int expected_bnd_index = -1;
 uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers 
*/
 unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
 
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR   3
+#endif
+
 /*
  * The kernel is supposed to provide some information about the bounds
  * exception in the siginfo.  It should match what we have in the bounds
@@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext)
br_count++;
dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
 
-#define SEGV_BNDERR 3  /* failed address bound checks */
-
dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
status, ip, br_reason);
dprintf2("si_signo: %d\n", si->si_signo);


[tip:x86/urgent] x86/pkeys/selftests: Adjust the self-test to fresh distros that export the pkeys ABI

2018-05-14 Thread tip-bot for Ingo Molnar
Commit-ID:  0fb96620dce351608aa82eed5942e2f58b07beda
Gitweb: https://git.kernel.org/tip/0fb96620dce351608aa82eed5942e2f58b07beda
Author: Ingo Molnar 
AuthorDate: Mon, 14 May 2018 10:56:23 +0200
Committer:  Ingo Molnar 
CommitDate: Mon, 14 May 2018 11:14:45 +0200

x86/pkeys/selftests: Adjust the self-test to fresh distros that export the 
pkeys ABI

Ubuntu 18.04 started exporting pkeys details in header files, resulting
in build failures and warnings in the pkeys self-tests:

  protection_keys.c:232:0: warning: "SEGV_BNDERR" redefined
  protection_keys.c:387:5: error: conflicting types for ‘pkey_get’
  protection_keys.c:409:5: error: conflicting types for ‘pkey_set’
  ...

Fix these namespace conflicts and double definitions, plus also
clean up the ABI definitions to make it all a bit more readable ...

Cc: Dave Hansen 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@linux-foundation.org
Cc: dave.han...@intel.com
Cc: linux...@kvack.org
Cc: linux...@us.ibm.com
Cc: m...@ellerman.id.au
Cc: shake...@google.com
Cc: sh...@kernel.org
Link: http://lkml.kernel.org/r/20180514085623.gb7...@gmail.com
Signed-off-by: Ingo Molnar 
---
 tools/testing/selftests/x86/protection_keys.c | 67 ---
 1 file changed, 41 insertions(+), 26 deletions(-)

diff --git a/tools/testing/selftests/x86/protection_keys.c 
b/tools/testing/selftests/x86/protection_keys.c
index f15aa5a76fe3..bbe80a5c31c7 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -191,26 +191,30 @@ void lots_o_noops_around_write(int *write_to_me)
 #ifdef __i386__
 
 #ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
+# define SYS_mprotect_key  380
 #endif
+
 #ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free  382
+# define SYS_pkey_alloc381
+# define SYS_pkey_free 382
 #endif
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
+
+#define REG_IP_IDX REG_EIP
+#define si_pkey_offset 0x14
 
 #else
 
 #ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
+# define SYS_mprotect_key  329
 #endif
+
 #ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free  331
+# define SYS_pkey_alloc330
+# define SYS_pkey_free 331
 #endif
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
+
+#define REG_IP_IDX REG_RIP
+#define si_pkey_offset 0x20
 
 #endif
 
@@ -225,8 +229,14 @@ void dump_mem(void *dumpme, int len_bytes)
}
 }
 
-#define SEGV_BNDERR 3  /* failed address bound checks */
-#define SEGV_PKUERR 4
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR   3
+#endif
+
+#ifndef SEGV_PKUERR
+# define SEGV_PKUERR   4
+#endif
 
 static char *si_code_str(int si_code)
 {
@@ -393,10 +403,15 @@ pid_t fork_lazy_child(void)
return forkret;
 }
 
-#define PKEY_DISABLE_ACCESS0x1
-#define PKEY_DISABLE_WRITE 0x2
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS   0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE0x2
+#endif
 
-u32 pkey_get(int pkey, unsigned long flags)
+static u32 hw_pkey_get(int pkey, unsigned long flags)
 {
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
u32 pkru = __rdpkru();
@@ -418,7 +433,7 @@ u32 pkey_get(int pkey, unsigned long flags)
return masked_pkru;
 }
 
-int pkey_set(int pkey, unsigned long rights, unsigned long flags)
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
 {
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
u32 old_pkru = __rdpkru();
@@ -452,15 +467,15 @@ void pkey_disable_set(int pkey, int flags)
pkey, flags);
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
 
-   pkey_rights = pkey_get(pkey, syscall_flags);
+   pkey_rights = hw_pkey_get(pkey, syscall_flags);
 
-   dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+   dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
pkey_assert(pkey_rights >= 0);
 
pkey_rights |= flags;
 
-   ret = pkey_set(pkey, pkey_rights, syscall_flags);
+   ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
assert(!ret);
/*pkru and flags have the same format */
shadow_pkru |= flags << (pkey * 2);
@@ -468,8 +483,8 @@ void pkey_disable_set(int pkey, int flags)
 
pkey_assert(ret >= 0);
 
-   pkey_rights = pkey_get(pkey, syscall_flags);
-   dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+   pkey_rights = hw_pkey_get(pkey, syscall_flags);
+   dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
 
   

[tip:x86/urgent] x86/pkeys/selftests: Adjust the self-test to fresh distros that export the pkeys ABI

2018-05-14 Thread tip-bot for Ingo Molnar
Commit-ID:  0fb96620dce351608aa82eed5942e2f58b07beda
Gitweb: https://git.kernel.org/tip/0fb96620dce351608aa82eed5942e2f58b07beda
Author: Ingo Molnar 
AuthorDate: Mon, 14 May 2018 10:56:23 +0200
Committer:  Ingo Molnar 
CommitDate: Mon, 14 May 2018 11:14:45 +0200

x86/pkeys/selftests: Adjust the self-test to fresh distros that export the 
pkeys ABI

Ubuntu 18.04 started exporting pkeys details in header files, resulting
in build failures and warnings in the pkeys self-tests:

  protection_keys.c:232:0: warning: "SEGV_BNDERR" redefined
  protection_keys.c:387:5: error: conflicting types for ‘pkey_get’
  protection_keys.c:409:5: error: conflicting types for ‘pkey_set’
  ...

Fix these namespace conflicts and double definitions, plus also
clean up the ABI definitions to make it all a bit more readable ...

Cc: Dave Hansen 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@linux-foundation.org
Cc: dave.han...@intel.com
Cc: linux...@kvack.org
Cc: linux...@us.ibm.com
Cc: m...@ellerman.id.au
Cc: shake...@google.com
Cc: sh...@kernel.org
Link: http://lkml.kernel.org/r/20180514085623.gb7...@gmail.com
Signed-off-by: Ingo Molnar 
---
 tools/testing/selftests/x86/protection_keys.c | 67 ---
 1 file changed, 41 insertions(+), 26 deletions(-)

diff --git a/tools/testing/selftests/x86/protection_keys.c 
b/tools/testing/selftests/x86/protection_keys.c
index f15aa5a76fe3..bbe80a5c31c7 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -191,26 +191,30 @@ void lots_o_noops_around_write(int *write_to_me)
 #ifdef __i386__
 
 #ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
+# define SYS_mprotect_key  380
 #endif
+
 #ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free  382
+# define SYS_pkey_alloc381
+# define SYS_pkey_free 382
 #endif
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
+
+#define REG_IP_IDX REG_EIP
+#define si_pkey_offset 0x14
 
 #else
 
 #ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
+# define SYS_mprotect_key  329
 #endif
+
 #ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free  331
+# define SYS_pkey_alloc330
+# define SYS_pkey_free 331
 #endif
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
+
+#define REG_IP_IDX REG_RIP
+#define si_pkey_offset 0x20
 
 #endif
 
@@ -225,8 +229,14 @@ void dump_mem(void *dumpme, int len_bytes)
}
 }
 
-#define SEGV_BNDERR 3  /* failed address bound checks */
-#define SEGV_PKUERR 4
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR   3
+#endif
+
+#ifndef SEGV_PKUERR
+# define SEGV_PKUERR   4
+#endif
 
 static char *si_code_str(int si_code)
 {
@@ -393,10 +403,15 @@ pid_t fork_lazy_child(void)
return forkret;
 }
 
-#define PKEY_DISABLE_ACCESS0x1
-#define PKEY_DISABLE_WRITE 0x2
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS   0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE0x2
+#endif
 
-u32 pkey_get(int pkey, unsigned long flags)
+static u32 hw_pkey_get(int pkey, unsigned long flags)
 {
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
u32 pkru = __rdpkru();
@@ -418,7 +433,7 @@ u32 pkey_get(int pkey, unsigned long flags)
return masked_pkru;
 }
 
-int pkey_set(int pkey, unsigned long rights, unsigned long flags)
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
 {
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
u32 old_pkru = __rdpkru();
@@ -452,15 +467,15 @@ void pkey_disable_set(int pkey, int flags)
pkey, flags);
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
 
-   pkey_rights = pkey_get(pkey, syscall_flags);
+   pkey_rights = hw_pkey_get(pkey, syscall_flags);
 
-   dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+   dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
pkey_assert(pkey_rights >= 0);
 
pkey_rights |= flags;
 
-   ret = pkey_set(pkey, pkey_rights, syscall_flags);
+   ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
assert(!ret);
/*pkru and flags have the same format */
shadow_pkru |= flags << (pkey * 2);
@@ -468,8 +483,8 @@ void pkey_disable_set(int pkey, int flags)
 
pkey_assert(ret >= 0);
 
-   pkey_rights = pkey_get(pkey, syscall_flags);
-   dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+   pkey_rights = hw_pkey_get(pkey, syscall_flags);
+   dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
 
dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
@@ -483,24 +498,24 @@ void pkey_disable_clear(int pkey, int flags)
 {
unsigned long 

[tip:locking/core] locking/atomics: Shorten the __atomic_op() defines to __op()

2018-05-06 Thread tip-bot for Ingo Molnar
Commit-ID:  ad6812db385540eb2457c945a8e95fc9095b706c
Gitweb: https://git.kernel.org/tip/ad6812db385540eb2457c945a8e95fc9095b706c
Author: Ingo Molnar 
AuthorDate: Sat, 5 May 2018 12:48:58 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 5 May 2018 15:23:55 +0200

locking/atomics: Shorten the __atomic_op() defines to __op()

The __atomic prefix is somewhat of a misnomer, because not all
APIs we use with these macros have an atomic_ prefix.

This also reduces the length of the longest lines in the header,
making them more readable on PeterZ's terminals.

No change in functionality.

Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Mark Rutland 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: aryabi...@virtuozzo.com
Cc: boqun.f...@gmail.com
Cc: catalin.mari...@arm.com
Cc: dvyu...@google.com
Cc: linux-arm-ker...@lists.infradead.org
Link: http://lkml.kernel.org/r/20180505104858.ap4bfv6ip2vpr...@gmail.com
Signed-off-by: Ingo Molnar 
---
 arch/alpha/include/asm/atomic.h|   4 +-
 arch/powerpc/include/asm/cmpxchg.h |   8 +-
 arch/riscv/include/asm/atomic.h|   4 +-
 include/linux/atomic.h | 204 +++--
 4 files changed, 111 insertions(+), 109 deletions(-)

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 767bfdd42992..786edb5f16c4 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -21,8 +21,8 @@
  * barriered versions. To avoid redundant back-to-back fences, we can
  * define the _acquire and _fence versions explicitly.
  */
-#define __atomic_op_acquire(op, args...)   op##_relaxed(args)
-#define __atomic_op_fence  __atomic_op_release
+#define __op_acquire(op, args...)  op##_relaxed(args)
+#define __op_fence __op_release
 
 #define ATOMIC_INIT(i) { (i) }
 #define ATOMIC64_INIT(i)   { (i) }
diff --git a/arch/powerpc/include/asm/cmpxchg.h 
b/arch/powerpc/include/asm/cmpxchg.h
index e27a612b957f..dc5a5426d683 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -13,14 +13,14 @@
  * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
  * on the platform without lwsync.
  */
-#define __atomic_op_acquire(op, args...)   \
+#define __op_acquire(op, args...)  \
 ({ \
typeof(op##_relaxed(args)) __ret  = op##_relaxed(args); \
__asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory");\
__ret;  \
 })
 
-#define __atomic_op_release(op, args...)   \
+#define __op_release(op, args...)  \
 ({ \
__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory");\
op##_relaxed(args); \
@@ -531,7 +531,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned 
long new,
sizeof(*(ptr)));\
 })
 
-#define cmpxchg_release(...) __atomic_op_release(cmpxchg, __VA_ARGS__)
+#define cmpxchg_release(...) __op_release(cmpxchg, __VA_ARGS__)
 
 #ifdef CONFIG_PPC64
 #define cmpxchg64(ptr, o, n)   \
@@ -555,7 +555,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned 
long new,
cmpxchg_acquire((ptr), (o), (n));   \
 })
 
-#define cmpxchg64_release(...) __atomic_op_release(cmpxchg64, __VA_ARGS__)
+#define cmpxchg64_release(...) __op_release(cmpxchg64, __VA_ARGS__)
 
 #else
 #include 
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 855115ace98c..992c0aff9554 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -25,14 +25,14 @@
 
 #define ATOMIC_INIT(i) { (i) }
 
-#define __atomic_op_acquire(op, args...)   \
+#define __op_acquire(op, args...)  \
 ({ \
typeof(op##_relaxed(args)) __ret  = op##_relaxed(args); \
__asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory");\
__ret;  \
 })
 
-#define __atomic_op_release(op, args...)   \
+#define __op_release(op, args...)  \
 ({ \
__asm__ 

[tip:locking/core] locking/atomics: Shorten the __atomic_op() defines to __op()

2018-05-06 Thread tip-bot for Ingo Molnar
Commit-ID:  ad6812db385540eb2457c945a8e95fc9095b706c
Gitweb: https://git.kernel.org/tip/ad6812db385540eb2457c945a8e95fc9095b706c
Author: Ingo Molnar 
AuthorDate: Sat, 5 May 2018 12:48:58 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 5 May 2018 15:23:55 +0200

locking/atomics: Shorten the __atomic_op() defines to __op()

The __atomic prefix is somewhat of a misnomer, because not all
APIs we use with these macros have an atomic_ prefix.

This also reduces the length of the longest lines in the header,
making them more readable on PeterZ's terminals.

No change in functionality.

Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Mark Rutland 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: aryabi...@virtuozzo.com
Cc: boqun.f...@gmail.com
Cc: catalin.mari...@arm.com
Cc: dvyu...@google.com
Cc: linux-arm-ker...@lists.infradead.org
Link: http://lkml.kernel.org/r/20180505104858.ap4bfv6ip2vpr...@gmail.com
Signed-off-by: Ingo Molnar 
---
 arch/alpha/include/asm/atomic.h|   4 +-
 arch/powerpc/include/asm/cmpxchg.h |   8 +-
 arch/riscv/include/asm/atomic.h|   4 +-
 include/linux/atomic.h | 204 +++--
 4 files changed, 111 insertions(+), 109 deletions(-)

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 767bfdd42992..786edb5f16c4 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -21,8 +21,8 @@
  * barriered versions. To avoid redundant back-to-back fences, we can
  * define the _acquire and _fence versions explicitly.
  */
-#define __atomic_op_acquire(op, args...)   op##_relaxed(args)
-#define __atomic_op_fence  __atomic_op_release
+#define __op_acquire(op, args...)  op##_relaxed(args)
+#define __op_fence __op_release
 
 #define ATOMIC_INIT(i) { (i) }
 #define ATOMIC64_INIT(i)   { (i) }
diff --git a/arch/powerpc/include/asm/cmpxchg.h 
b/arch/powerpc/include/asm/cmpxchg.h
index e27a612b957f..dc5a5426d683 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -13,14 +13,14 @@
  * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
  * on the platform without lwsync.
  */
-#define __atomic_op_acquire(op, args...)   \
+#define __op_acquire(op, args...)  \
 ({ \
typeof(op##_relaxed(args)) __ret  = op##_relaxed(args); \
__asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory");\
__ret;  \
 })
 
-#define __atomic_op_release(op, args...)   \
+#define __op_release(op, args...)  \
 ({ \
__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory");\
op##_relaxed(args); \
@@ -531,7 +531,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned 
long new,
sizeof(*(ptr)));\
 })
 
-#define cmpxchg_release(...) __atomic_op_release(cmpxchg, __VA_ARGS__)
+#define cmpxchg_release(...) __op_release(cmpxchg, __VA_ARGS__)
 
 #ifdef CONFIG_PPC64
 #define cmpxchg64(ptr, o, n)   \
@@ -555,7 +555,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned 
long new,
cmpxchg_acquire((ptr), (o), (n));   \
 })
 
-#define cmpxchg64_release(...) __atomic_op_release(cmpxchg64, __VA_ARGS__)
+#define cmpxchg64_release(...) __op_release(cmpxchg64, __VA_ARGS__)
 
 #else
 #include 
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 855115ace98c..992c0aff9554 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -25,14 +25,14 @@
 
 #define ATOMIC_INIT(i) { (i) }
 
-#define __atomic_op_acquire(op, args...)   \
+#define __op_acquire(op, args...)  \
 ({ \
typeof(op##_relaxed(args)) __ret  = op##_relaxed(args); \
__asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory");\
__ret;  \
 })
 
-#define __atomic_op_release(op, args...)   \
+#define __op_release(op, args...)  \
 ({ \
__asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory");\
op##_relaxed(args); \
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 1176cf7c6f03..f32ff6d9e4d2 

[tip:locking/core] locking/atomics: Combine the atomic_andnot() and atomic64_andnot() API definitions

2018-05-06 Thread tip-bot for Ingo Molnar
Commit-ID:  7b9b2e57c7edaeac5404f39c5974ff227540d41e
Gitweb: https://git.kernel.org/tip/7b9b2e57c7edaeac5404f39c5974ff227540d41e
Author: Ingo Molnar 
AuthorDate: Sat, 5 May 2018 10:54:45 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 5 May 2018 15:22:45 +0200

locking/atomics: Combine the atomic_andnot() and atomic64_andnot() API 
definitions

The atomic_andnot() and atomic64_andnot() are defined in 4 separate groups
spred out in the atomic.h header:

 #ifdef atomic_andnot
 ...
 #endif /* atomic_andnot */
 ...
 #ifndef atomic_andnot
 ...
 #endif
 ...
 #ifdef atomic64_andnot
 ...
 #endif /* atomic64_andnot */
 ...
 #ifndef atomic64_andnot
 ...
 #endif

Combine them into unify them into two groups:

 #ifdef atomic_andnot
 #else
 #endif

 ...

 #ifdef atomic64_andnot
 #else
 #endif

So that one API group is defined in a single place within the header.

Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Mark Rutland 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: aryabi...@virtuozzo.com
Cc: boqun.f...@gmail.com
Cc: catalin.mari...@arm.com
Cc: dvyu...@google.com
Cc: linux-arm-ker...@lists.infradead.org
Link: http://lkml.kernel.org/r/20180505085445.cmdnqh6xpnpfo...@gmail.com
Signed-off-by: Ingo Molnar 
---
 include/linux/atomic.h | 72 +-
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 352ecc72d7f5..1176cf7c6f03 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -205,22 +205,6 @@
 # endif
 #endif
 
-#ifdef atomic_andnot
-
-#ifndef atomic_fetch_andnot_relaxed
-# define atomic_fetch_andnot_relaxed   atomic_fetch_andnot
-# define atomic_fetch_andnot_acquire   atomic_fetch_andnot
-# define atomic_fetch_andnot_release   atomic_fetch_andnot
-#else
-# ifndef atomic_fetch_andnot
-#  define atomic_fetch_andnot(...) 
__atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__)
-#  define atomic_fetch_andnot_acquire(...) 
__atomic_op_acquire(atomic_fetch_andnot, __VA_ARGS__)
-#  define atomic_fetch_andnot_release(...) 
__atomic_op_release(atomic_fetch_andnot, __VA_ARGS__)
-# endif
-#endif
-
-#endif /* atomic_andnot */
-
 #ifndef atomic_fetch_xor_relaxed
 # define atomic_fetch_xor_relaxed  atomic_fetch_xor
 # define atomic_fetch_xor_acquire  atomic_fetch_xor
@@ -338,7 +322,22 @@ static inline int atomic_add_unless(atomic_t *v, int a, 
int u)
 # define atomic_inc_not_zero(v)atomic_add_unless((v), 
1, 0)
 #endif
 
-#ifndef atomic_andnot
+#ifdef atomic_andnot
+
+#ifndef atomic_fetch_andnot_relaxed
+# define atomic_fetch_andnot_relaxed   atomic_fetch_andnot
+# define atomic_fetch_andnot_acquire   atomic_fetch_andnot
+# define atomic_fetch_andnot_release   atomic_fetch_andnot
+#else
+# ifndef atomic_fetch_andnot
+#  define atomic_fetch_andnot(...) 
__atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__)
+#  define atomic_fetch_andnot_acquire(...) 
__atomic_op_acquire(atomic_fetch_andnot, __VA_ARGS__)
+#  define atomic_fetch_andnot_release(...) 
__atomic_op_release(atomic_fetch_andnot, __VA_ARGS__)
+# endif
+#endif
+
+#else /* !atomic_andnot: */
+
 static inline void atomic_andnot(int i, atomic_t *v)
 {
atomic_and(~i, v);
@@ -363,7 +362,8 @@ static inline int atomic_fetch_andnot_release(int i, 
atomic_t *v)
 {
return atomic_fetch_and_release(~i, v);
 }
-#endif
+
+#endif /* !atomic_andnot */
 
 /**
  * atomic_inc_not_zero_hint - increment if not null
@@ -600,22 +600,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 # endif
 #endif
 
-#ifdef atomic64_andnot
-
-#ifndef atomic64_fetch_andnot_relaxed
-# define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot
-# define atomic64_fetch_andnot_acquire atomic64_fetch_andnot
-# define atomic64_fetch_andnot_release atomic64_fetch_andnot
-#else
-# ifndef atomic64_fetch_andnot
-#  define atomic64_fetch_andnot(...)   
__atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__)
-#  define atomic64_fetch_andnot_acquire(...)   
__atomic_op_acquire(atomic64_fetch_andnot, __VA_ARGS__)
-#  define atomic64_fetch_andnot_release(...)   
__atomic_op_release(atomic64_fetch_andnot, __VA_ARGS__)
-# endif
-#endif
-
-#endif /* atomic64_andnot */
-
 #ifndef atomic64_fetch_xor_relaxed
 # define atomic64_fetch_xor_relaxedatomic64_fetch_xor
 # define atomic64_fetch_xor_acquireatomic64_fetch_xor
@@ -672,7 +656,22 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 # define atomic64_try_cmpxchg_release  atomic64_try_cmpxchg
 #endif
 
-#ifndef atomic64_andnot
+#ifdef atomic64_andnot
+
+#ifndef 

[tip:locking/core] locking/atomics: Simplify the op definitions in atomic.h some more

2018-05-06 Thread tip-bot for Ingo Molnar
Commit-ID:  87d655a48dfe74293f72dc001ed042142cf00d44
Gitweb: https://git.kernel.org/tip/87d655a48dfe74293f72dc001ed042142cf00d44
Author: Ingo Molnar 
AuthorDate: Sat, 5 May 2018 10:36:35 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 5 May 2018 15:22:44 +0200

locking/atomics: Simplify the op definitions in atomic.h some more

Before:

 #ifndef atomic_fetch_dec_relaxed
 # ifndef atomic_fetch_dec
 #  define atomic_fetch_dec(v)  atomic_fetch_sub(1, (v))
 #  define atomic_fetch_dec_relaxed(v)  atomic_fetch_sub_relaxed(1, (v))
 #  define atomic_fetch_dec_acquire(v)  atomic_fetch_sub_acquire(1, (v))
 #  define atomic_fetch_dec_release(v)  atomic_fetch_sub_release(1, (v))
 # else
 #  define atomic_fetch_dec_relaxed atomic_fetch_dec
 #  define atomic_fetch_dec_acquire atomic_fetch_dec
 #  define atomic_fetch_dec_release atomic_fetch_dec
 # endif
 #else
 # ifndef atomic_fetch_dec_acquire
 #  define atomic_fetch_dec_acquire(...)
__atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__)
 # endif
 # ifndef atomic_fetch_dec_release
 #  define atomic_fetch_dec_release(...)
__atomic_op_release(atomic_fetch_dec, __VA_ARGS__)
 # endif
 # ifndef atomic_fetch_dec
 #  define atomic_fetch_dec(...)
__atomic_op_fence(atomic_fetch_dec, __VA_ARGS__)
 # endif
 #endif

After:

 #ifndef atomic_fetch_dec_relaxed
 # ifndef atomic_fetch_dec
 #  define atomic_fetch_dec(v)  atomic_fetch_sub(1, (v))
 #  define atomic_fetch_dec_relaxed(v)  atomic_fetch_sub_relaxed(1, (v))
 #  define atomic_fetch_dec_acquire(v)  atomic_fetch_sub_acquire(1, (v))
 #  define atomic_fetch_dec_release(v)  atomic_fetch_sub_release(1, (v))
 # else
 #  define atomic_fetch_dec_relaxed atomic_fetch_dec
 #  define atomic_fetch_dec_acquire atomic_fetch_dec
 #  define atomic_fetch_dec_release atomic_fetch_dec
 # endif
 #else
 # ifndef atomic_fetch_dec
 #  define atomic_fetch_dec(...)
__atomic_op_fence(atomic_fetch_dec, __VA_ARGS__)
 #  define atomic_fetch_dec_acquire(...)
__atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__)
 #  define atomic_fetch_dec_release(...)
__atomic_op_release(atomic_fetch_dec, __VA_ARGS__)
 # endif
 #endif

The idea is that because we already group these APIs by certain defines
such as atomic_fetch_dec_relaxed and atomic_fetch_dec in the primary
branches - we can do the same in the secondary branch as well.

( Also remove some unnecessarily duplicate comments, as the API
  group defines are now pretty much self-documenting. )

No change in functionality.

Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Mark Rutland 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: aryabi...@virtuozzo.com
Cc: boqun.f...@gmail.com
Cc: catalin.mari...@arm.com
Cc: dvyu...@google.com
Cc: linux-arm-ker...@lists.infradead.org
Link: http://lkml.kernel.org/r/20180505083635.622xmcvb42dw5...@gmail.com
Signed-off-by: Ingo Molnar 
---
 include/linux/atomic.h | 312 ++---
 1 file changed, 62 insertions(+), 250 deletions(-)

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 12f4ad559ab1..352ecc72d7f5 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -71,98 +71,66 @@
 })
 #endif
 
-/* atomic_add_return_relaxed() et al: */
-
 #ifndef atomic_add_return_relaxed
 # define atomic_add_return_relaxed atomic_add_return
 # define atomic_add_return_acquire atomic_add_return
 # define atomic_add_return_release atomic_add_return
 #else
-# ifndef atomic_add_return_acquire
-#  define atomic_add_return_acquire(...)   
__atomic_op_acquire(atomic_add_return, __VA_ARGS__)
-# endif
-# ifndef atomic_add_return_release
-#  define atomic_add_return_release(...)   
__atomic_op_release(atomic_add_return, __VA_ARGS__)
-# endif
 # ifndef atomic_add_return
 #  define atomic_add_return(...)   
__atomic_op_fence(atomic_add_return, __VA_ARGS__)
+#  define atomic_add_return_acquire(...)   
__atomic_op_acquire(atomic_add_return, __VA_ARGS__)
+#  define atomic_add_return_release(...)   
__atomic_op_release(atomic_add_return, __VA_ARGS__)
 # endif
 #endif
 
-/* atomic_inc_return_relaxed() et al: */
-
 #ifndef atomic_inc_return_relaxed
 # define atomic_inc_return_relaxed atomic_inc_return
 # define atomic_inc_return_acquire atomic_inc_return
 # define atomic_inc_return_release atomic_inc_return
 #else
-# ifndef atomic_inc_return_acquire
-#  define atomic_inc_return_acquire(...)   
__atomic_op_acquire(atomic_inc_return, __VA_ARGS__)
-# endif
-# ifndef 

[tip:locking/core] locking/atomics: Combine the atomic_andnot() and atomic64_andnot() API definitions

2018-05-06 Thread tip-bot for Ingo Molnar
Commit-ID:  7b9b2e57c7edaeac5404f39c5974ff227540d41e
Gitweb: https://git.kernel.org/tip/7b9b2e57c7edaeac5404f39c5974ff227540d41e
Author: Ingo Molnar 
AuthorDate: Sat, 5 May 2018 10:54:45 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 5 May 2018 15:22:45 +0200

locking/atomics: Combine the atomic_andnot() and atomic64_andnot() API 
definitions

The atomic_andnot() and atomic64_andnot() are defined in 4 separate groups
spred out in the atomic.h header:

 #ifdef atomic_andnot
 ...
 #endif /* atomic_andnot */
 ...
 #ifndef atomic_andnot
 ...
 #endif
 ...
 #ifdef atomic64_andnot
 ...
 #endif /* atomic64_andnot */
 ...
 #ifndef atomic64_andnot
 ...
 #endif

Combine them into unify them into two groups:

 #ifdef atomic_andnot
 #else
 #endif

 ...

 #ifdef atomic64_andnot
 #else
 #endif

So that one API group is defined in a single place within the header.

Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Mark Rutland 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: aryabi...@virtuozzo.com
Cc: boqun.f...@gmail.com
Cc: catalin.mari...@arm.com
Cc: dvyu...@google.com
Cc: linux-arm-ker...@lists.infradead.org
Link: http://lkml.kernel.org/r/20180505085445.cmdnqh6xpnpfo...@gmail.com
Signed-off-by: Ingo Molnar 
---
 include/linux/atomic.h | 72 +-
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 352ecc72d7f5..1176cf7c6f03 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -205,22 +205,6 @@
 # endif
 #endif
 
-#ifdef atomic_andnot
-
-#ifndef atomic_fetch_andnot_relaxed
-# define atomic_fetch_andnot_relaxed   atomic_fetch_andnot
-# define atomic_fetch_andnot_acquire   atomic_fetch_andnot
-# define atomic_fetch_andnot_release   atomic_fetch_andnot
-#else
-# ifndef atomic_fetch_andnot
-#  define atomic_fetch_andnot(...) 
__atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__)
-#  define atomic_fetch_andnot_acquire(...) 
__atomic_op_acquire(atomic_fetch_andnot, __VA_ARGS__)
-#  define atomic_fetch_andnot_release(...) 
__atomic_op_release(atomic_fetch_andnot, __VA_ARGS__)
-# endif
-#endif
-
-#endif /* atomic_andnot */
-
 #ifndef atomic_fetch_xor_relaxed
 # define atomic_fetch_xor_relaxed  atomic_fetch_xor
 # define atomic_fetch_xor_acquire  atomic_fetch_xor
@@ -338,7 +322,22 @@ static inline int atomic_add_unless(atomic_t *v, int a, 
int u)
 # define atomic_inc_not_zero(v)atomic_add_unless((v), 
1, 0)
 #endif
 
-#ifndef atomic_andnot
+#ifdef atomic_andnot
+
+#ifndef atomic_fetch_andnot_relaxed
+# define atomic_fetch_andnot_relaxed   atomic_fetch_andnot
+# define atomic_fetch_andnot_acquire   atomic_fetch_andnot
+# define atomic_fetch_andnot_release   atomic_fetch_andnot
+#else
+# ifndef atomic_fetch_andnot
+#  define atomic_fetch_andnot(...) 
__atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__)
+#  define atomic_fetch_andnot_acquire(...) 
__atomic_op_acquire(atomic_fetch_andnot, __VA_ARGS__)
+#  define atomic_fetch_andnot_release(...) 
__atomic_op_release(atomic_fetch_andnot, __VA_ARGS__)
+# endif
+#endif
+
+#else /* !atomic_andnot: */
+
 static inline void atomic_andnot(int i, atomic_t *v)
 {
atomic_and(~i, v);
@@ -363,7 +362,8 @@ static inline int atomic_fetch_andnot_release(int i, 
atomic_t *v)
 {
return atomic_fetch_and_release(~i, v);
 }
-#endif
+
+#endif /* !atomic_andnot */
 
 /**
  * atomic_inc_not_zero_hint - increment if not null
@@ -600,22 +600,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 # endif
 #endif
 
-#ifdef atomic64_andnot
-
-#ifndef atomic64_fetch_andnot_relaxed
-# define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot
-# define atomic64_fetch_andnot_acquire atomic64_fetch_andnot
-# define atomic64_fetch_andnot_release atomic64_fetch_andnot
-#else
-# ifndef atomic64_fetch_andnot
-#  define atomic64_fetch_andnot(...)   
__atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__)
-#  define atomic64_fetch_andnot_acquire(...)   
__atomic_op_acquire(atomic64_fetch_andnot, __VA_ARGS__)
-#  define atomic64_fetch_andnot_release(...)   
__atomic_op_release(atomic64_fetch_andnot, __VA_ARGS__)
-# endif
-#endif
-
-#endif /* atomic64_andnot */
-
 #ifndef atomic64_fetch_xor_relaxed
 # define atomic64_fetch_xor_relaxedatomic64_fetch_xor
 # define atomic64_fetch_xor_acquireatomic64_fetch_xor
@@ -672,7 +656,22 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 # define atomic64_try_cmpxchg_release  atomic64_try_cmpxchg
 #endif
 
-#ifndef atomic64_andnot
+#ifdef atomic64_andnot
+
+#ifndef atomic64_fetch_andnot_relaxed
+# define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot
+# define atomic64_fetch_andnot_acquire atomic64_fetch_andnot
+# define atomic64_fetch_andnot_release atomic64_fetch_andnot

[tip:locking/core] locking/atomics: Simplify the op definitions in atomic.h some more

2018-05-06 Thread tip-bot for Ingo Molnar
Commit-ID:  87d655a48dfe74293f72dc001ed042142cf00d44
Gitweb: https://git.kernel.org/tip/87d655a48dfe74293f72dc001ed042142cf00d44
Author: Ingo Molnar 
AuthorDate: Sat, 5 May 2018 10:36:35 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 5 May 2018 15:22:44 +0200

locking/atomics: Simplify the op definitions in atomic.h some more

Before:

 #ifndef atomic_fetch_dec_relaxed
 # ifndef atomic_fetch_dec
 #  define atomic_fetch_dec(v)  atomic_fetch_sub(1, (v))
 #  define atomic_fetch_dec_relaxed(v)  atomic_fetch_sub_relaxed(1, (v))
 #  define atomic_fetch_dec_acquire(v)  atomic_fetch_sub_acquire(1, (v))
 #  define atomic_fetch_dec_release(v)  atomic_fetch_sub_release(1, (v))
 # else
 #  define atomic_fetch_dec_relaxed atomic_fetch_dec
 #  define atomic_fetch_dec_acquire atomic_fetch_dec
 #  define atomic_fetch_dec_release atomic_fetch_dec
 # endif
 #else
 # ifndef atomic_fetch_dec_acquire
 #  define atomic_fetch_dec_acquire(...)
__atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__)
 # endif
 # ifndef atomic_fetch_dec_release
 #  define atomic_fetch_dec_release(...)
__atomic_op_release(atomic_fetch_dec, __VA_ARGS__)
 # endif
 # ifndef atomic_fetch_dec
 #  define atomic_fetch_dec(...)
__atomic_op_fence(atomic_fetch_dec, __VA_ARGS__)
 # endif
 #endif

After:

 #ifndef atomic_fetch_dec_relaxed
 # ifndef atomic_fetch_dec
 #  define atomic_fetch_dec(v)  atomic_fetch_sub(1, (v))
 #  define atomic_fetch_dec_relaxed(v)  atomic_fetch_sub_relaxed(1, (v))
 #  define atomic_fetch_dec_acquire(v)  atomic_fetch_sub_acquire(1, (v))
 #  define atomic_fetch_dec_release(v)  atomic_fetch_sub_release(1, (v))
 # else
 #  define atomic_fetch_dec_relaxed atomic_fetch_dec
 #  define atomic_fetch_dec_acquire atomic_fetch_dec
 #  define atomic_fetch_dec_release atomic_fetch_dec
 # endif
 #else
 # ifndef atomic_fetch_dec
 #  define atomic_fetch_dec(...)
__atomic_op_fence(atomic_fetch_dec, __VA_ARGS__)
 #  define atomic_fetch_dec_acquire(...)
__atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__)
 #  define atomic_fetch_dec_release(...)
__atomic_op_release(atomic_fetch_dec, __VA_ARGS__)
 # endif
 #endif

The idea is that because we already group these APIs by certain defines
such as atomic_fetch_dec_relaxed and atomic_fetch_dec in the primary
branches - we can do the same in the secondary branch as well.

( Also remove some unnecessarily duplicate comments, as the API
  group defines are now pretty much self-documenting. )

No change in functionality.

Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Mark Rutland 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: aryabi...@virtuozzo.com
Cc: boqun.f...@gmail.com
Cc: catalin.mari...@arm.com
Cc: dvyu...@google.com
Cc: linux-arm-ker...@lists.infradead.org
Link: http://lkml.kernel.org/r/20180505083635.622xmcvb42dw5...@gmail.com
Signed-off-by: Ingo Molnar 
---
 include/linux/atomic.h | 312 ++---
 1 file changed, 62 insertions(+), 250 deletions(-)

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 12f4ad559ab1..352ecc72d7f5 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -71,98 +71,66 @@
 })
 #endif
 
-/* atomic_add_return_relaxed() et al: */
-
 #ifndef atomic_add_return_relaxed
 # define atomic_add_return_relaxed atomic_add_return
 # define atomic_add_return_acquire atomic_add_return
 # define atomic_add_return_release atomic_add_return
 #else
-# ifndef atomic_add_return_acquire
-#  define atomic_add_return_acquire(...)   
__atomic_op_acquire(atomic_add_return, __VA_ARGS__)
-# endif
-# ifndef atomic_add_return_release
-#  define atomic_add_return_release(...)   
__atomic_op_release(atomic_add_return, __VA_ARGS__)
-# endif
 # ifndef atomic_add_return
 #  define atomic_add_return(...)   
__atomic_op_fence(atomic_add_return, __VA_ARGS__)
+#  define atomic_add_return_acquire(...)   
__atomic_op_acquire(atomic_add_return, __VA_ARGS__)
+#  define atomic_add_return_release(...)   
__atomic_op_release(atomic_add_return, __VA_ARGS__)
 # endif
 #endif
 
-/* atomic_inc_return_relaxed() et al: */
-
 #ifndef atomic_inc_return_relaxed
 # define atomic_inc_return_relaxed atomic_inc_return
 # define atomic_inc_return_acquire atomic_inc_return
 # define atomic_inc_return_release atomic_inc_return
 #else
-# ifndef atomic_inc_return_acquire
-#  define atomic_inc_return_acquire(...)   
__atomic_op_acquire(atomic_inc_return, __VA_ARGS__)
-# endif
-# ifndef atomic_inc_return_release
-#  define atomic_inc_return_release(...)   
__atomic_op_release(atomic_inc_return, __VA_ARGS__)
-# endif
 # ifndef atomic_inc_return
 #  define atomic_inc_return(...)   

[tip:locking/core] locking/atomics: Clean up the atomic.h maze of #defines

2018-05-06 Thread tip-bot for Ingo Molnar
Commit-ID:  a2d636a4bfd5e9b31215e5d1913e7fe0d0c0970a
Gitweb: https://git.kernel.org/tip/a2d636a4bfd5e9b31215e5d1913e7fe0d0c0970a
Author: Ingo Molnar 
AuthorDate: Sat, 5 May 2018 10:11:00 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 5 May 2018 15:22:44 +0200

locking/atomics: Clean up the atomic.h maze of #defines

Use structured defines to make it all much more readable.

Before:

 #ifndef atomic_fetch_dec_relaxed

 #ifndef atomic_fetch_dec
 #define atomic_fetch_dec(v)atomic_fetch_sub(1, (v))
 #define atomic_fetch_dec_relaxed(v)atomic_fetch_sub_relaxed(1, (v))
 #define atomic_fetch_dec_acquire(v)atomic_fetch_sub_acquire(1, (v))
 #define atomic_fetch_dec_release(v)atomic_fetch_sub_release(1, (v))
 #else /* atomic_fetch_dec */
 #define atomic_fetch_dec_relaxed   atomic_fetch_dec
 #define atomic_fetch_dec_acquire   atomic_fetch_dec
 #define atomic_fetch_dec_release   atomic_fetch_dec
 #endif /* atomic_fetch_dec */

 #else /* atomic_fetch_dec_relaxed */

 #ifndef atomic_fetch_dec_acquire
 #define atomic_fetch_dec_acquire(...)  \
__atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__)
 #endif

 #ifndef atomic_fetch_dec_release
 #define atomic_fetch_dec_release(...)  \
__atomic_op_release(atomic_fetch_dec, __VA_ARGS__)
 #endif

 #ifndef atomic_fetch_dec
 #define atomic_fetch_dec(...)  \
__atomic_op_fence(atomic_fetch_dec, __VA_ARGS__)
 #endif
 #endif /* atomic_fetch_dec_relaxed */

After:

 #ifndef atomic_fetch_dec_relaxed
 # ifndef atomic_fetch_dec
 #  define atomic_fetch_dec(v)  atomic_fetch_sub(1, (v))
 #  define atomic_fetch_dec_relaxed(v)  atomic_fetch_sub_relaxed(1, (v))
 #  define atomic_fetch_dec_acquire(v)  atomic_fetch_sub_acquire(1, (v))
 #  define atomic_fetch_dec_release(v)  atomic_fetch_sub_release(1, (v))
 # else
 #  define atomic_fetch_dec_relaxed atomic_fetch_dec
 #  define atomic_fetch_dec_acquire atomic_fetch_dec
 #  define atomic_fetch_dec_release atomic_fetch_dec
 # endif
 #else
 # ifndef atomic_fetch_dec_acquire
 #  define atomic_fetch_dec_acquire(...)
__atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__)
 # endif
 # ifndef atomic_fetch_dec_release
 #  define atomic_fetch_dec_release(...)
__atomic_op_release(atomic_fetch_dec, __VA_ARGS__)
 # endif
 # ifndef atomic_fetch_dec
 #  define atomic_fetch_dec(...)
__atomic_op_fence(atomic_fetch_dec, __VA_ARGS__)
 # endif
 #endif

Beyond the linecount reduction this also makes it easier to follow
the various conditions.

Also clean up a few other minor details and make the code more
consistent throughout.

No change in functionality.

Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Mark Rutland 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: aryabi...@virtuozzo.com
Cc: boqun.f...@gmail.com
Cc: catalin.mari...@arm.com
Cc: dvyu...@google.com
Cc: linux-arm-ker...@lists.infradead.org
Link: http://lkml.kernel.org/r/20180505081100.nsyrqrpzq2vd2...@gmail.com
Signed-off-by: Ingo Molnar 
---
 include/linux/atomic.h | 1275 +---
 1 file changed, 543 insertions(+), 732 deletions(-)

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 01ce3997cb42..12f4ad559ab1 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -24,11 +24,11 @@
  */
 
 #ifndef atomic_read_acquire
-#define  atomic_read_acquire(v)smp_load_acquire(&(v)->counter)
+# define atomic_read_acquire(v)
smp_load_acquire(&(v)->counter)
 #endif
 
 #ifndef atomic_set_release
-#define  atomic_set_release(v, i)  smp_store_release(&(v)->counter, (i))
+# define atomic_set_release(v, i)  
smp_store_release(&(v)->counter, (i))
 #endif
 
 /*
@@ -71,454 +71,351 @@
 })
 #endif
 
-/* atomic_add_return_relaxed */
-#ifndef atomic_add_return_relaxed
-#define  atomic_add_return_relaxed atomic_add_return
-#define  atomic_add_return_acquire atomic_add_return
-#define  atomic_add_return_release atomic_add_return
-
-#else /* atomic_add_return_relaxed */
-
-#ifndef atomic_add_return_acquire
-#define  atomic_add_return_acquire(...)
\
-   __atomic_op_acquire(atomic_add_return, __VA_ARGS__)
-#endif
+/* atomic_add_return_relaxed() et al: */
 
-#ifndef atomic_add_return_release
-#define  atomic_add_return_release(...)
\
-   __atomic_op_release(atomic_add_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic_add_return
-#define  atomic_add_return(...)  

[tip:locking/core] locking/atomics: Clean up the atomic.h maze of #defines

2018-05-06 Thread tip-bot for Ingo Molnar
Commit-ID:  a2d636a4bfd5e9b31215e5d1913e7fe0d0c0970a
Gitweb: https://git.kernel.org/tip/a2d636a4bfd5e9b31215e5d1913e7fe0d0c0970a
Author: Ingo Molnar 
AuthorDate: Sat, 5 May 2018 10:11:00 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 5 May 2018 15:22:44 +0200

locking/atomics: Clean up the atomic.h maze of #defines

Use structured defines to make it all much more readable.

Before:

 #ifndef atomic_fetch_dec_relaxed

 #ifndef atomic_fetch_dec
 #define atomic_fetch_dec(v)atomic_fetch_sub(1, (v))
 #define atomic_fetch_dec_relaxed(v)atomic_fetch_sub_relaxed(1, (v))
 #define atomic_fetch_dec_acquire(v)atomic_fetch_sub_acquire(1, (v))
 #define atomic_fetch_dec_release(v)atomic_fetch_sub_release(1, (v))
 #else /* atomic_fetch_dec */
 #define atomic_fetch_dec_relaxed   atomic_fetch_dec
 #define atomic_fetch_dec_acquire   atomic_fetch_dec
 #define atomic_fetch_dec_release   atomic_fetch_dec
 #endif /* atomic_fetch_dec */

 #else /* atomic_fetch_dec_relaxed */

 #ifndef atomic_fetch_dec_acquire
 #define atomic_fetch_dec_acquire(...)  \
__atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__)
 #endif

 #ifndef atomic_fetch_dec_release
 #define atomic_fetch_dec_release(...)  \
__atomic_op_release(atomic_fetch_dec, __VA_ARGS__)
 #endif

 #ifndef atomic_fetch_dec
 #define atomic_fetch_dec(...)  \
__atomic_op_fence(atomic_fetch_dec, __VA_ARGS__)
 #endif
 #endif /* atomic_fetch_dec_relaxed */

After:

 #ifndef atomic_fetch_dec_relaxed
 # ifndef atomic_fetch_dec
 #  define atomic_fetch_dec(v)  atomic_fetch_sub(1, (v))
 #  define atomic_fetch_dec_relaxed(v)  atomic_fetch_sub_relaxed(1, (v))
 #  define atomic_fetch_dec_acquire(v)  atomic_fetch_sub_acquire(1, (v))
 #  define atomic_fetch_dec_release(v)  atomic_fetch_sub_release(1, (v))
 # else
 #  define atomic_fetch_dec_relaxed atomic_fetch_dec
 #  define atomic_fetch_dec_acquire atomic_fetch_dec
 #  define atomic_fetch_dec_release atomic_fetch_dec
 # endif
 #else
 # ifndef atomic_fetch_dec_acquire
 #  define atomic_fetch_dec_acquire(...)
__atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__)
 # endif
 # ifndef atomic_fetch_dec_release
 #  define atomic_fetch_dec_release(...)
__atomic_op_release(atomic_fetch_dec, __VA_ARGS__)
 # endif
 # ifndef atomic_fetch_dec
 #  define atomic_fetch_dec(...)
__atomic_op_fence(atomic_fetch_dec, __VA_ARGS__)
 # endif
 #endif

Beyond the linecount reduction this also makes it easier to follow
the various conditions.

Also clean up a few other minor details and make the code more
consistent throughout.

No change in functionality.

Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Mark Rutland 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: aryabi...@virtuozzo.com
Cc: boqun.f...@gmail.com
Cc: catalin.mari...@arm.com
Cc: dvyu...@google.com
Cc: linux-arm-ker...@lists.infradead.org
Link: http://lkml.kernel.org/r/20180505081100.nsyrqrpzq2vd2...@gmail.com
Signed-off-by: Ingo Molnar 
---
 include/linux/atomic.h | 1275 +---
 1 file changed, 543 insertions(+), 732 deletions(-)

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 01ce3997cb42..12f4ad559ab1 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -24,11 +24,11 @@
  */
 
 #ifndef atomic_read_acquire
-#define  atomic_read_acquire(v)smp_load_acquire(&(v)->counter)
+# define atomic_read_acquire(v)
smp_load_acquire(&(v)->counter)
 #endif
 
 #ifndef atomic_set_release
-#define  atomic_set_release(v, i)  smp_store_release(&(v)->counter, (i))
+# define atomic_set_release(v, i)  
smp_store_release(&(v)->counter, (i))
 #endif
 
 /*
@@ -71,454 +71,351 @@
 })
 #endif
 
-/* atomic_add_return_relaxed */
-#ifndef atomic_add_return_relaxed
-#define  atomic_add_return_relaxed atomic_add_return
-#define  atomic_add_return_acquire atomic_add_return
-#define  atomic_add_return_release atomic_add_return
-
-#else /* atomic_add_return_relaxed */
-
-#ifndef atomic_add_return_acquire
-#define  atomic_add_return_acquire(...)
\
-   __atomic_op_acquire(atomic_add_return, __VA_ARGS__)
-#endif
+/* atomic_add_return_relaxed() et al: */
 
-#ifndef atomic_add_return_release
-#define  atomic_add_return_release(...)
\
-   __atomic_op_release(atomic_add_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic_add_return
-#define  atomic_add_return(...)
\
-   __atomic_op_fence(atomic_add_return, __VA_ARGS__)
-#endif
-#endif /* atomic_add_return_relaxed */
+#ifndef atomic_add_return_relaxed
+# define atomic_add_return_relaxed atomic_add_return
+# 

[tip:perf/urgent] tools/headers: Synchronize kernel ABI headers, v4.17-rc1

2018-04-21 Thread tip-bot for Ingo Molnar
Commit-ID:  e2f73a1828e9ffd2765ce1726b9a9c6e022e3cd6
Gitweb: https://git.kernel.org/tip/e2f73a1828e9ffd2765ce1726b9a9c6e022e3cd6
Author: Ingo Molnar 
AuthorDate: Mon, 16 Apr 2018 08:18:22 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Tue, 17 Apr 2018 09:47:39 -0300

tools/headers: Synchronize kernel ABI headers, v4.17-rc1

Sync the following tooling headers with the latest kernel version:

  tools/arch/arm/include/uapi/asm/kvm.h
- New ABI: KVM_REG_ARM_*

  tools/arch/x86/include/asm/required-features.h
- Removal of NEED_LA57 dependency

  tools/arch/x86/include/uapi/asm/kvm.h
- New KVM ABI: KVM_SYNC_X86_*

  tools/include/uapi/asm-generic/mman-common.h
- New ABI: MAP_FIXED_NOREPLACE flag

  tools/include/uapi/linux/bpf.h
- New ABI: BPF_F_SEQ_NUMBER functions

  tools/include/uapi/linux/if_link.h
- New ABI: IFLA tun and rmnet support

  tools/include/uapi/linux/kvm.h
- New ABI: hyperv eventfd and CONN_ID_MASK support plus header cleanups

  tools/include/uapi/sound/asound.h
- New ABI: SNDRV_PCM_FORMAT_FIRST PCM format specifier

  tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
- The x86 system call table description changed due to the ptregs changes 
and the renames, in:

d5a00528b58c: syscalls/core, syscalls/x86: Rename struct pt_regs-based 
sys_*() to __x64_sys_*()
5ac9efa3c50d: syscalls/core, syscalls/x86: Clean up compat syscall stub 
naming convention
ebeb8c82ffaf: syscalls/x86: Use 'struct pt_regs' based syscall calling 
for IA32_EMULATION and x32

Also fix the x86 syscall table warning:

  -Warning: Kernel ABI header at 'tools/arch/x86/entry/syscalls/syscall_64.tbl' 
differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'
  +Warning: Kernel ABI header at 
'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version 
at 'arch/x86/entry/syscalls/syscall_64.tbl'

None of these changes impact existing tooling code, so we only have to copy the 
kernel version.

Signed-off-by: Ingo Molnar 
Cc: Adrian Hunter 
Cc: Alexander Potapenko 
Cc: Alexander Shishkin 
Cc: Alexei Starovoitov 
Cc: Alexey Budankov 
Cc: Andi Kleen 
Cc: Andrey Ryabinin 
Cc: Andy Lutomirski 
Cc: Arnd Bergmann 
Cc: Brian Robbins 
Cc: Clark Williams 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: Dmitriy Vyukov  
Cc: Heiko Carstens 
Cc: Hendrik Brueckner 
Cc: Jesper Dangaard Brouer 
Cc: Jin Yao 
Cc: Jiri Olsa 
Cc: Josh Poimboeuf 
Cc: Kan Liang 
Cc: Kim Phillips 
Cc: Linus Torvalds 
Cc: Li Zhijian 
Cc: Mark Rutland 
Cc: Martin Liška 
Cc: Martin Schwidefsky 
Cc: Matthias Kaehlcke 
Cc: Miguel Bernal Marin 
Cc: Namhyung Kim 
Cc: Naveen N. Rao 
Cc: Peter Zijlstra 
Cc: Ravi Bangoria 
Cc: Sandipan Das 
Cc: Stephane Eranian 
Cc: Stephen Rothwell 
Cc: Takuya Yamamoto 
Cc: Thomas Gleixner 
Cc: Thomas Richter 
Cc: Wang Nan 
Cc: William Cohen 
Cc: Yonghong Song 
Link: http://lkml.kernel.org/r/20180416064024.ofjtrz5yuu3yk...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/arch/arm/include/uapi/asm/kvm.h |   9 +
 tools/arch/x86/include/asm/required-features.h|   8 +-
 tools/arch/x86/include/uapi/asm/kvm.h |  19 +-
 tools/include/uapi/asm-generic/mman-common.h  |   3 +
 tools/include/uapi/linux/bpf.h|   1 +
 tools/include/uapi/linux/if_link.h|  39 ++
 tools/include/uapi/linux/kvm.h|  21 +-
 tools/include/uapi/sound/asound.h |   1 +
 tools/perf/arch/x86/Makefile  |   2 +-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 712 +++---
 10 files changed, 451 insertions(+), 364 deletions(-)

diff --git a/tools/arch/arm/include/uapi/asm/kvm.h 
b/tools/arch/arm/include/uapi/asm/kvm.h
index 6edd177bb1c7..2ba95d6fe852 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -135,6 +135,15 @@ struct 

[tip:perf/urgent] tools/headers: Synchronize kernel ABI headers, v4.17-rc1

2018-04-21 Thread tip-bot for Ingo Molnar
Commit-ID:  e2f73a1828e9ffd2765ce1726b9a9c6e022e3cd6
Gitweb: https://git.kernel.org/tip/e2f73a1828e9ffd2765ce1726b9a9c6e022e3cd6
Author: Ingo Molnar 
AuthorDate: Mon, 16 Apr 2018 08:18:22 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Tue, 17 Apr 2018 09:47:39 -0300

tools/headers: Synchronize kernel ABI headers, v4.17-rc1

Sync the following tooling headers with the latest kernel version:

  tools/arch/arm/include/uapi/asm/kvm.h
- New ABI: KVM_REG_ARM_*

  tools/arch/x86/include/asm/required-features.h
- Removal of NEED_LA57 dependency

  tools/arch/x86/include/uapi/asm/kvm.h
- New KVM ABI: KVM_SYNC_X86_*

  tools/include/uapi/asm-generic/mman-common.h
- New ABI: MAP_FIXED_NOREPLACE flag

  tools/include/uapi/linux/bpf.h
- New ABI: BPF_F_SEQ_NUMBER functions

  tools/include/uapi/linux/if_link.h
- New ABI: IFLA tun and rmnet support

  tools/include/uapi/linux/kvm.h
- New ABI: hyperv eventfd and CONN_ID_MASK support plus header cleanups

  tools/include/uapi/sound/asound.h
- New ABI: SNDRV_PCM_FORMAT_FIRST PCM format specifier

  tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
- The x86 system call table description changed due to the ptregs changes 
and the renames, in:

d5a00528b58c: syscalls/core, syscalls/x86: Rename struct pt_regs-based 
sys_*() to __x64_sys_*()
5ac9efa3c50d: syscalls/core, syscalls/x86: Clean up compat syscall stub 
naming convention
ebeb8c82ffaf: syscalls/x86: Use 'struct pt_regs' based syscall calling 
for IA32_EMULATION and x32

Also fix the x86 syscall table warning:

  -Warning: Kernel ABI header at 'tools/arch/x86/entry/syscalls/syscall_64.tbl' 
differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'
  +Warning: Kernel ABI header at 
'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version 
at 'arch/x86/entry/syscalls/syscall_64.tbl'

None of these changes impact existing tooling code, so we only have to copy the 
kernel version.

Signed-off-by: Ingo Molnar 
Cc: Adrian Hunter 
Cc: Alexander Potapenko 
Cc: Alexander Shishkin 
Cc: Alexei Starovoitov 
Cc: Alexey Budankov 
Cc: Andi Kleen 
Cc: Andrey Ryabinin 
Cc: Andy Lutomirski 
Cc: Arnd Bergmann 
Cc: Brian Robbins 
Cc: Clark Williams 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: Dmitriy Vyukov  
Cc: Heiko Carstens 
Cc: Hendrik Brueckner 
Cc: Jesper Dangaard Brouer 
Cc: Jin Yao 
Cc: Jiri Olsa 
Cc: Josh Poimboeuf 
Cc: Kan Liang 
Cc: Kim Phillips 
Cc: Linus Torvalds 
Cc: Li Zhijian 
Cc: Mark Rutland 
Cc: Martin Liška 
Cc: Martin Schwidefsky 
Cc: Matthias Kaehlcke 
Cc: Miguel Bernal Marin 
Cc: Namhyung Kim 
Cc: Naveen N. Rao 
Cc: Peter Zijlstra 
Cc: Ravi Bangoria 
Cc: Sandipan Das 
Cc: Stephane Eranian 
Cc: Stephen Rothwell 
Cc: Takuya Yamamoto 
Cc: Thomas Gleixner 
Cc: Thomas Richter 
Cc: Wang Nan 
Cc: William Cohen 
Cc: Yonghong Song 
Link: http://lkml.kernel.org/r/20180416064024.ofjtrz5yuu3yk...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/arch/arm/include/uapi/asm/kvm.h |   9 +
 tools/arch/x86/include/asm/required-features.h|   8 +-
 tools/arch/x86/include/uapi/asm/kvm.h |  19 +-
 tools/include/uapi/asm-generic/mman-common.h  |   3 +
 tools/include/uapi/linux/bpf.h|   1 +
 tools/include/uapi/linux/if_link.h|  39 ++
 tools/include/uapi/linux/kvm.h|  21 +-
 tools/include/uapi/sound/asound.h |   1 +
 tools/perf/arch/x86/Makefile  |   2 +-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 712 +++---
 10 files changed, 451 insertions(+), 364 deletions(-)

diff --git a/tools/arch/arm/include/uapi/asm/kvm.h 
b/tools/arch/arm/include/uapi/asm/kvm.h
index 6edd177bb1c7..2ba95d6fe852 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -135,6 +135,15 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_CRM_SHIFT  7
 #define KVM_REG_ARM_32_CRN_MASK0x7800
 #define KVM_REG_ARM_32_CRN_SHIFT   11
+/*
+ * For KVM currently all guest registers are nonsecure, but we reserve a bit
+ * in the encoding to distinguish secure from nonsecure for AArch32 system
+ * registers that are banked by security. This is 1 for the secure banked
+ * register, and 0 for the nonsecure banked register or if the register is
+ * not banked by security.
+ */
+#define KVM_REG_ARM_SECURE_MASK0x1000
+#define KVM_REG_ARM_SECURE_SHIFT   28
 
 #define ARM_CP15_REG_SHIFT_MASK(x,n) \
(((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
diff --git a/tools/arch/x86/include/asm/required-features.h 
b/tools/arch/x86/include/asm/required-features.h
index fb3a6de7440b..6847d85400a8 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -53,12 +53,6 @@
 # define NEED_MOVBE0
 #endif
 
-#ifdef CONFIG_X86_5LEVEL
-# define 

[tip:perf/core] perf report: Show zero counters as well in 'perf report --stat'

2018-03-20 Thread tip-bot for Ingo Molnar
Commit-ID:  39ce7fb31530c6d4648919e03e16c5e9286a5940
Gitweb: https://git.kernel.org/tip/39ce7fb31530c6d4648919e03e16c5e9286a5940
Author: Ingo Molnar 
AuthorDate: Wed, 7 Mar 2018 16:24:30 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Fri, 16 Mar 2018 13:55:36 -0300

perf report: Show zero counters as well in 'perf report --stat'

When recently using 'perf report --stat' it was not clear to me from the
output whether a particular statistics field (LOST_SAMPLES) was not
present, or just zero:

  fomalhaut:~> perf report --stat

  Aggregated stats:
   TOTAL events: 495984
MMAP events: 85
COMM events:   3389
EXIT events:   1605
THROTTLE events:  2
  UNTHROTTLE events:  2
FORK events:   3377
  SAMPLE events: 472629
   MMAP2 events:  14753
  FINISHED_ROUND events:139
  THREAD_MAP events:  1
 CPU_MAP events:  1
   TIME_CONV events:  1

I had to check the output several times to ascertain that I'm not
misreading the output, that the field didn't change and that I didn't
misremember the name. In fact I had to look into the perf source to make
sure that zero fields are indeed not shown.

With the patch applied:

  fomalhaut:~> perf report --stat

  Aggregated stats:
   TOTAL events: 495984
MMAP events: 85
LOST events:  0
COMM events:   3389
EXIT events:   1605
THROTTLE events:  2
  UNTHROTTLE events:  2
FORK events:   3377
READ events:  0
  SAMPLE events: 472629
   MMAP2 events:  14753
 AUX events:  0
ITRACE_START events:  0
LOST_SAMPLES events:  0
  SWITCH events:  0
 SWITCH_CPU_WIDE events:  0
  NAMESPACES events:  0
ATTR events:  0
  EVENT_TYPE events:  0
TRACING_DATA events:  0
BUILD_ID events:  0
  FINISHED_ROUND events:139
ID_INDEX events:  0
   AUXTRACE_INFO events:  0
AUXTRACE events:  0
  AUXTRACE_ERROR events:  0
  THREAD_MAP events:  1
 CPU_MAP events:  1
 STAT_CONFIG events:  0
STAT events:  0
  STAT_ROUND events:  0
EVENT_UPDATE events:  0
   TIME_CONV events:  1
 FEATURE events:  0

It's pretty clear at a glance that LOST_SAMPLES is present but zero.

The original output can still be gotten via:

  fomalhaut:~> perf report --stat | grep -vw 0

  Aggregated stats:
   TOTAL events: 495984
MMAP events: 85
COMM events:   3389
EXIT events:   1605
THROTTLE events:  2
  UNTHROTTLE events:  2
FORK events:   3377
  SAMPLE events: 472629
   MMAP2 events:  14753
  FINISHED_ROUND events:139
  THREAD_MAP events:  1
 CPU_MAP events:  1
   TIME_CONV events:  1

So I don't think there's any real loss in functionality.

Signed-off-by: Ingo Molnar 
Acked-by: Jiri Olsa 
Link: http://lkml.kernel.org/r/20180307152430.7e5h7e657b7bg...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/ui/stdio/hist.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 25dd1e0ecc58..6832fcb2e6ff 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -840,15 +840,11 @@ size_t events_stats__fprintf(struct events_stats *stats, 
FILE *fp)
for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
const char *name;
 
-   if (stats->nr_events[i] == 0)
-   continue;
-
name = perf_event__name(i);
if (!strcmp(name, "UNKNOWN"))
continue;
 
-   ret += fprintf(fp, "%16s events: %10d\n", name,
-  stats->nr_events[i]);
+   ret += fprintf(fp, "%16s events: %10d\n", name, 
stats->nr_events[i]);
}
 
return ret;


[tip:perf/core] perf report: Show zero counters as well in 'perf report --stat'

2018-03-20 Thread tip-bot for Ingo Molnar
Commit-ID:  39ce7fb31530c6d4648919e03e16c5e9286a5940
Gitweb: https://git.kernel.org/tip/39ce7fb31530c6d4648919e03e16c5e9286a5940
Author: Ingo Molnar 
AuthorDate: Wed, 7 Mar 2018 16:24:30 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Fri, 16 Mar 2018 13:55:36 -0300

perf report: Show zero counters as well in 'perf report --stat'

When recently using 'perf report --stat' it was not clear to me from the
output whether a particular statistics field (LOST_SAMPLES) was not
present, or just zero:

  fomalhaut:~> perf report --stat

  Aggregated stats:
   TOTAL events: 495984
MMAP events: 85
COMM events:   3389
EXIT events:   1605
THROTTLE events:  2
  UNTHROTTLE events:  2
FORK events:   3377
  SAMPLE events: 472629
   MMAP2 events:  14753
  FINISHED_ROUND events:139
  THREAD_MAP events:  1
 CPU_MAP events:  1
   TIME_CONV events:  1

I had to check the output several times to ascertain that I'm not
misreading the output, that the field didn't change and that I didn't
misremember the name. In fact I had to look into the perf source to make
sure that zero fields are indeed not shown.

With the patch applied:

  fomalhaut:~> perf report --stat

  Aggregated stats:
   TOTAL events: 495984
MMAP events: 85
LOST events:  0
COMM events:   3389
EXIT events:   1605
THROTTLE events:  2
  UNTHROTTLE events:  2
FORK events:   3377
READ events:  0
  SAMPLE events: 472629
   MMAP2 events:  14753
 AUX events:  0
ITRACE_START events:  0
LOST_SAMPLES events:  0
  SWITCH events:  0
 SWITCH_CPU_WIDE events:  0
  NAMESPACES events:  0
ATTR events:  0
  EVENT_TYPE events:  0
TRACING_DATA events:  0
BUILD_ID events:  0
  FINISHED_ROUND events:139
ID_INDEX events:  0
   AUXTRACE_INFO events:  0
AUXTRACE events:  0
  AUXTRACE_ERROR events:  0
  THREAD_MAP events:  1
 CPU_MAP events:  1
 STAT_CONFIG events:  0
STAT events:  0
  STAT_ROUND events:  0
EVENT_UPDATE events:  0
   TIME_CONV events:  1
 FEATURE events:  0

It's pretty clear at a glance that LOST_SAMPLES is present but zero.

The original output can still be gotten via:

  fomalhaut:~> perf report --stat | grep -vw 0

  Aggregated stats:
   TOTAL events: 495984
MMAP events: 85
COMM events:   3389
EXIT events:   1605
THROTTLE events:  2
  UNTHROTTLE events:  2
FORK events:   3377
  SAMPLE events: 472629
   MMAP2 events:  14753
  FINISHED_ROUND events:139
  THREAD_MAP events:  1
 CPU_MAP events:  1
   TIME_CONV events:  1

So I don't think there's any real loss in functionality.

Signed-off-by: Ingo Molnar 
Acked-by: Jiri Olsa 
Link: http://lkml.kernel.org/r/20180307152430.7e5h7e657b7bg...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/ui/stdio/hist.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 25dd1e0ecc58..6832fcb2e6ff 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -840,15 +840,11 @@ size_t events_stats__fprintf(struct events_stats *stats, 
FILE *fp)
for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
const char *name;
 
-   if (stats->nr_events[i] == 0)
-   continue;
-
name = perf_event__name(i);
if (!strcmp(name, "UNKNOWN"))
continue;
 
-   ret += fprintf(fp, "%16s events: %10d\n", name,
-  stats->nr_events[i]);
+   ret += fprintf(fp, "%16s events: %10d\n", name, 
stats->nr_events[i]);
}
 
return ret;


[tip:irq/core] softirq: Consolidate common code in tasklet_[hi]_action()

2018-03-09 Thread tip-bot for Ingo Molnar
Commit-ID:  82b691bedf05f258f1c86c96ee574b0d7795c0a1
Gitweb: https://git.kernel.org/tip/82b691bedf05f258f1c86c96ee574b0d7795c0a1
Author: Ingo Molnar 
AuthorDate: Tue, 27 Feb 2018 17:48:08 +0100
Committer:  Thomas Gleixner 
CommitDate: Fri, 9 Mar 2018 11:50:55 +0100

softirq: Consolidate common code in tasklet_[hi]_action()

tasklet_action() + tasklet_hi_action() are almost identical.  Move the
common code from both function into __tasklet_action_common() and let
both functions invoke it with different arguments.

[ bigeasy: Splitted out from RT's "tasklet: Prevent tasklets from going
   into infinite spin in RT" and added commit message]

Signed-off-by: Ingo Molnar 
Signed-off-by: Steven Rostedt 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Sebastian Andrzej Siewior 
Cc: Julia Cartwright 
Link: https://lkml.kernel.org/r/20180227164808.10093-3-bige...@linutronix.de

---
 kernel/softirq.c | 54 +++---
 1 file changed, 15 insertions(+), 39 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 2394b009994f..177de3640c78 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -490,14 +490,16 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
 }
 EXPORT_SYMBOL(__tasklet_hi_schedule);
 
-static __latent_entropy void tasklet_action(struct softirq_action *a)
+static void tasklet_action_common(struct softirq_action *a,
+ struct tasklet_head *tl_head,
+ unsigned int softirq_nr)
 {
struct tasklet_struct *list;
 
local_irq_disable();
-   list = __this_cpu_read(tasklet_vec.head);
-   __this_cpu_write(tasklet_vec.head, NULL);
-   __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(_vec.head));
+   list = tl_head->head;
+   tl_head->head = NULL;
+   tl_head->tail = _head->head;
local_irq_enable();
 
while (list) {
@@ -519,47 +521,21 @@ static __latent_entropy void tasklet_action(struct 
softirq_action *a)
 
local_irq_disable();
t->next = NULL;
-   *__this_cpu_read(tasklet_vec.tail) = t;
-   __this_cpu_write(tasklet_vec.tail, &(t->next));
-   __raise_softirq_irqoff(TASKLET_SOFTIRQ);
+   *tl_head->tail = t;
+   tl_head->tail = >next;
+   __raise_softirq_irqoff(softirq_nr);
local_irq_enable();
}
 }
 
-static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
+static __latent_entropy void tasklet_action(struct softirq_action *a)
 {
-   struct tasklet_struct *list;
-
-   local_irq_disable();
-   list = __this_cpu_read(tasklet_hi_vec.head);
-   __this_cpu_write(tasklet_hi_vec.head, NULL);
-   __this_cpu_write(tasklet_hi_vec.tail, 
this_cpu_ptr(_hi_vec.head));
-   local_irq_enable();
-
-   while (list) {
-   struct tasklet_struct *t = list;
-
-   list = list->next;
-
-   if (tasklet_trylock(t)) {
-   if (!atomic_read(>count)) {
-   if (!test_and_clear_bit(TASKLET_STATE_SCHED,
-   >state))
-   BUG();
-   t->func(t->data);
-   tasklet_unlock(t);
-   continue;
-   }
-   tasklet_unlock(t);
-   }
+   tasklet_action_common(a, this_cpu_ptr(_vec), TASKLET_SOFTIRQ);
+}
 
-   local_irq_disable();
-   t->next = NULL;
-   *__this_cpu_read(tasklet_hi_vec.tail) = t;
-   __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
-   __raise_softirq_irqoff(HI_SOFTIRQ);
-   local_irq_enable();
-   }
+static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
+{
+   tasklet_action_common(a, this_cpu_ptr(_hi_vec), HI_SOFTIRQ);
 }
 
 void tasklet_init(struct tasklet_struct *t,


[tip:irq/core] softirq: Consolidate common code in tasklet_[hi]_action()

2018-03-09 Thread tip-bot for Ingo Molnar
Commit-ID:  82b691bedf05f258f1c86c96ee574b0d7795c0a1
Gitweb: https://git.kernel.org/tip/82b691bedf05f258f1c86c96ee574b0d7795c0a1
Author: Ingo Molnar 
AuthorDate: Tue, 27 Feb 2018 17:48:08 +0100
Committer:  Thomas Gleixner 
CommitDate: Fri, 9 Mar 2018 11:50:55 +0100

softirq: Consolidate common code in tasklet_[hi]_action()

tasklet_action() + tasklet_hi_action() are almost identical.  Move the
common code from both function into __tasklet_action_common() and let
both functions invoke it with different arguments.

[ bigeasy: Splitted out from RT's "tasklet: Prevent tasklets from going
   into infinite spin in RT" and added commit message]

Signed-off-by: Ingo Molnar 
Signed-off-by: Steven Rostedt 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Sebastian Andrzej Siewior 
Cc: Julia Cartwright 
Link: https://lkml.kernel.org/r/20180227164808.10093-3-bige...@linutronix.de

---
 kernel/softirq.c | 54 +++---
 1 file changed, 15 insertions(+), 39 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 2394b009994f..177de3640c78 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -490,14 +490,16 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
 }
 EXPORT_SYMBOL(__tasklet_hi_schedule);
 
-static __latent_entropy void tasklet_action(struct softirq_action *a)
+static void tasklet_action_common(struct softirq_action *a,
+ struct tasklet_head *tl_head,
+ unsigned int softirq_nr)
 {
struct tasklet_struct *list;
 
local_irq_disable();
-   list = __this_cpu_read(tasklet_vec.head);
-   __this_cpu_write(tasklet_vec.head, NULL);
-   __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(_vec.head));
+   list = tl_head->head;
+   tl_head->head = NULL;
+   tl_head->tail = _head->head;
local_irq_enable();
 
while (list) {
@@ -519,47 +521,21 @@ static __latent_entropy void tasklet_action(struct 
softirq_action *a)
 
local_irq_disable();
t->next = NULL;
-   *__this_cpu_read(tasklet_vec.tail) = t;
-   __this_cpu_write(tasklet_vec.tail, &(t->next));
-   __raise_softirq_irqoff(TASKLET_SOFTIRQ);
+   *tl_head->tail = t;
+   tl_head->tail = >next;
+   __raise_softirq_irqoff(softirq_nr);
local_irq_enable();
}
 }
 
-static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
+static __latent_entropy void tasklet_action(struct softirq_action *a)
 {
-   struct tasklet_struct *list;
-
-   local_irq_disable();
-   list = __this_cpu_read(tasklet_hi_vec.head);
-   __this_cpu_write(tasklet_hi_vec.head, NULL);
-   __this_cpu_write(tasklet_hi_vec.tail, 
this_cpu_ptr(_hi_vec.head));
-   local_irq_enable();
-
-   while (list) {
-   struct tasklet_struct *t = list;
-
-   list = list->next;
-
-   if (tasklet_trylock(t)) {
-   if (!atomic_read(>count)) {
-   if (!test_and_clear_bit(TASKLET_STATE_SCHED,
-   >state))
-   BUG();
-   t->func(t->data);
-   tasklet_unlock(t);
-   continue;
-   }
-   tasklet_unlock(t);
-   }
+   tasklet_action_common(a, this_cpu_ptr(_vec), TASKLET_SOFTIRQ);
+}
 
-   local_irq_disable();
-   t->next = NULL;
-   *__this_cpu_read(tasklet_hi_vec.tail) = t;
-   __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
-   __raise_softirq_irqoff(HI_SOFTIRQ);
-   local_irq_enable();
-   }
+static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
+{
+   tasklet_action_common(a, this_cpu_ptr(_hi_vec), HI_SOFTIRQ);
 }
 
 void tasklet_init(struct tasklet_struct *t,


[tip:irq/core] softirq: Consolidate common code in __tasklet_[hi]_schedule()

2018-03-09 Thread tip-bot for Ingo Molnar
Commit-ID:  6498ddad301c7a94162915d06d1efe2e5d20f6dc
Gitweb: https://git.kernel.org/tip/6498ddad301c7a94162915d06d1efe2e5d20f6dc
Author: Ingo Molnar 
AuthorDate: Tue, 27 Feb 2018 17:48:07 +0100
Committer:  Thomas Gleixner 
CommitDate: Fri, 9 Mar 2018 11:50:55 +0100

softirq: Consolidate common code in __tasklet_[hi]_schedule()

__tasklet_schedule() and __tasklet_hi_schedule() are almost identical.
Move the common code from both function into __tasklet_schedule_common()
and let both functions invoke it with different arguments.

[ bigeasy: Splitted out from RT's "tasklet: Prevent tasklets from going
   into infinite spin in RT" and added commit message. Use
   this_cpu_ptr(headp) in __tasklet_schedule_common() as suggested
   by Julia Cartwright ]

Signed-off-by: Ingo Molnar 
Signed-off-by: Steven Rostedt 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Sebastian Andrzej Siewior 
Cc: Julia Cartwright 
Link: https://lkml.kernel.org/r/20180227164808.10093-2-bige...@linutronix.de

---
 kernel/softirq.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 24d243ef8e71..2394b009994f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -460,29 +460,33 @@ struct tasklet_head {
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
 
-void __tasklet_schedule(struct tasklet_struct *t)
+static void __tasklet_schedule_common(struct tasklet_struct *t,
+ struct tasklet_head __percpu *headp,
+ unsigned int softirq_nr)
 {
+   struct tasklet_head *head;
unsigned long flags;
 
local_irq_save(flags);
+   head = this_cpu_ptr(headp);
t->next = NULL;
-   *__this_cpu_read(tasklet_vec.tail) = t;
-   __this_cpu_write(tasklet_vec.tail, &(t->next));
-   raise_softirq_irqoff(TASKLET_SOFTIRQ);
+   *head->tail = t;
+   head->tail = &(t->next);
+   raise_softirq_irqoff(softirq_nr);
local_irq_restore(flags);
 }
+
+void __tasklet_schedule(struct tasklet_struct *t)
+{
+   __tasklet_schedule_common(t, _vec,
+ TASKLET_SOFTIRQ);
+}
 EXPORT_SYMBOL(__tasklet_schedule);
 
 void __tasklet_hi_schedule(struct tasklet_struct *t)
 {
-   unsigned long flags;
-
-   local_irq_save(flags);
-   t->next = NULL;
-   *__this_cpu_read(tasklet_hi_vec.tail) = t;
-   __this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
-   raise_softirq_irqoff(HI_SOFTIRQ);
-   local_irq_restore(flags);
+   __tasklet_schedule_common(t, _hi_vec,
+ HI_SOFTIRQ);
 }
 EXPORT_SYMBOL(__tasklet_hi_schedule);
 


[tip:irq/core] softirq: Consolidate common code in __tasklet_[hi]_schedule()

2018-03-09 Thread tip-bot for Ingo Molnar
Commit-ID:  6498ddad301c7a94162915d06d1efe2e5d20f6dc
Gitweb: https://git.kernel.org/tip/6498ddad301c7a94162915d06d1efe2e5d20f6dc
Author: Ingo Molnar 
AuthorDate: Tue, 27 Feb 2018 17:48:07 +0100
Committer:  Thomas Gleixner 
CommitDate: Fri, 9 Mar 2018 11:50:55 +0100

softirq: Consolidate common code in __tasklet_[hi]_schedule()

__tasklet_schedule() and __tasklet_hi_schedule() are almost identical.
Move the common code from both function into __tasklet_schedule_common()
and let both functions invoke it with different arguments.

[ bigeasy: Splitted out from RT's "tasklet: Prevent tasklets from going
   into infinite spin in RT" and added commit message. Use
   this_cpu_ptr(headp) in __tasklet_schedule_common() as suggested
   by Julia Cartwright ]

Signed-off-by: Ingo Molnar 
Signed-off-by: Steven Rostedt 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Sebastian Andrzej Siewior 
Cc: Julia Cartwright 
Link: https://lkml.kernel.org/r/20180227164808.10093-2-bige...@linutronix.de

---
 kernel/softirq.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 24d243ef8e71..2394b009994f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -460,29 +460,33 @@ struct tasklet_head {
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
 
-void __tasklet_schedule(struct tasklet_struct *t)
+static void __tasklet_schedule_common(struct tasklet_struct *t,
+ struct tasklet_head __percpu *headp,
+ unsigned int softirq_nr)
 {
+   struct tasklet_head *head;
unsigned long flags;
 
local_irq_save(flags);
+   head = this_cpu_ptr(headp);
t->next = NULL;
-   *__this_cpu_read(tasklet_vec.tail) = t;
-   __this_cpu_write(tasklet_vec.tail, &(t->next));
-   raise_softirq_irqoff(TASKLET_SOFTIRQ);
+   *head->tail = t;
+   head->tail = &(t->next);
+   raise_softirq_irqoff(softirq_nr);
local_irq_restore(flags);
 }
+
+void __tasklet_schedule(struct tasklet_struct *t)
+{
+   __tasklet_schedule_common(t, _vec,
+ TASKLET_SOFTIRQ);
+}
 EXPORT_SYMBOL(__tasklet_schedule);
 
 void __tasklet_hi_schedule(struct tasklet_struct *t)
 {
-   unsigned long flags;
-
-   local_irq_save(flags);
-   t->next = NULL;
-   *__this_cpu_read(tasklet_hi_vec.tail) = t;
-   __this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
-   raise_softirq_irqoff(HI_SOFTIRQ);
-   local_irq_restore(flags);
+   __tasklet_schedule_common(t, _hi_vec,
+ HI_SOFTIRQ);
 }
 EXPORT_SYMBOL(__tasklet_hi_schedule);
 


[tip:x86/pti] x86/entry/64: Fix CR3 restore in paranoid_exit()

2018-02-14 Thread tip-bot for Ingo Molnar
Commit-ID:  e48657573481a5dff7cfdc3d57005c80aa816500
Gitweb: https://git.kernel.org/tip/e48657573481a5dff7cfdc3d57005c80aa816500
Author: Ingo Molnar 
AuthorDate: Wed, 14 Feb 2018 08:39:11 +0100
Committer:  Ingo Molnar 
CommitDate: Thu, 15 Feb 2018 01:15:54 +0100

x86/entry/64: Fix CR3 restore in paranoid_exit()

Josh Poimboeuf noticed the following bug:

 "The paranoid exit code only restores the saved CR3 when it switches back
  to the user GS.  However, even in the kernel GS case, it's possible that
  it needs to restore a user CR3, if for example, the paranoid exception
  occurred in the syscall exit path between SWITCH_TO_USER_CR3_STACK and
  SWAPGS."

Josh also confirmed via targeted testing that it's possible to hit this bug.

Fix the bug by also restoring CR3 in the paranoid_exit_no_swapgs branch.

The reason we haven't seen this bug reported by users yet is probably because
"paranoid" entry points are limited to the following cases:

 idtentry double_fault   do_double_fault  has_error_code=1  paranoid=2
 idtentry debug  do_debug has_error_code=0  paranoid=1 
shift_ist=DEBUG_STACK
 idtentry int3   do_int3  has_error_code=0  paranoid=1 
shift_ist=DEBUG_STACK
 idtentry machine_check  do_mce   has_error_code=0  paranoid=1

Amongst those entry points only machine_check is one that will interrupt an
IRQS-off critical section asynchronously - and machine check events are rare.

The other main asynchronous entries are NMI entries, which can be very high-freq
with perf profiling, but they are special: they don't use the 'idtentry' macro 
but
are open coded and restore user CR3 unconditionally so don't have this bug.

Reported-and-tested-by: Josh Poimboeuf 
Reviewed-by: Andy Lutomirski 
Acked-by: Thomas Gleixner 
Cc: Arjan van de Ven 
Cc: Borislav Petkov 
Cc: Dan Williams 
Cc: Dave Hansen 
Cc: David Woodhouse 
Cc: Greg Kroah-Hartman 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20180214073910.boevmg65upbk3...@gmail.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/entry_64.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1c5420420..4fd9044 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1168,6 +1168,7 @@ ENTRY(paranoid_exit)
jmp .Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
+   RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
 .Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
 END(paranoid_exit)


[tip:x86/pti] x86/entry/64: Fix CR3 restore in paranoid_exit()

2018-02-14 Thread tip-bot for Ingo Molnar
Commit-ID:  e48657573481a5dff7cfdc3d57005c80aa816500
Gitweb: https://git.kernel.org/tip/e48657573481a5dff7cfdc3d57005c80aa816500
Author: Ingo Molnar 
AuthorDate: Wed, 14 Feb 2018 08:39:11 +0100
Committer:  Ingo Molnar 
CommitDate: Thu, 15 Feb 2018 01:15:54 +0100

x86/entry/64: Fix CR3 restore in paranoid_exit()

Josh Poimboeuf noticed the following bug:

 "The paranoid exit code only restores the saved CR3 when it switches back
  to the user GS.  However, even in the kernel GS case, it's possible that
  it needs to restore a user CR3, if for example, the paranoid exception
  occurred in the syscall exit path between SWITCH_TO_USER_CR3_STACK and
  SWAPGS."

Josh also confirmed via targeted testing that it's possible to hit this bug.

Fix the bug by also restoring CR3 in the paranoid_exit_no_swapgs branch.

The reason we haven't seen this bug reported by users yet is probably because
"paranoid" entry points are limited to the following cases:

 idtentry double_fault   do_double_fault  has_error_code=1  paranoid=2
 idtentry debug  do_debug has_error_code=0  paranoid=1 
shift_ist=DEBUG_STACK
 idtentry int3   do_int3  has_error_code=0  paranoid=1 
shift_ist=DEBUG_STACK
 idtentry machine_check  do_mce   has_error_code=0  paranoid=1

Amongst those entry points only machine_check is one that will interrupt an
IRQS-off critical section asynchronously - and machine check events are rare.

The other main asynchronous entries are NMI entries, which can be very high-freq
with perf profiling, but they are special: they don't use the 'idtentry' macro 
but
are open coded and restore user CR3 unconditionally so don't have this bug.

Reported-and-tested-by: Josh Poimboeuf 
Reviewed-by: Andy Lutomirski 
Acked-by: Thomas Gleixner 
Cc: Arjan van de Ven 
Cc: Borislav Petkov 
Cc: Dan Williams 
Cc: Dave Hansen 
Cc: David Woodhouse 
Cc: Greg Kroah-Hartman 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20180214073910.boevmg65upbk3...@gmail.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/entry_64.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1c5420420..4fd9044 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1168,6 +1168,7 @@ ENTRY(paranoid_exit)
jmp .Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
+   RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
 .Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
 END(paranoid_exit)


[tip:x86/pti] x86/entry/64: Fix CR3 restore in paranoid_exit()

2018-02-14 Thread tip-bot for Ingo Molnar
Commit-ID:  48753793350974b7afe9598fd1dc46b2f1f47c2d
Gitweb: https://git.kernel.org/tip/48753793350974b7afe9598fd1dc46b2f1f47c2d
Author: Ingo Molnar 
AuthorDate: Wed, 14 Feb 2018 08:39:11 +0100
Committer:  Ingo Molnar 
CommitDate: Thu, 15 Feb 2018 00:28:03 +0100

x86/entry/64: Fix CR3 restore in paranoid_exit()

Josh Poimboeuf noticed the following bug:

 "The paranoid exit code only restores the saved CR3 when it switches back
  to the user GS.  However, even in the kernel GS case, it's possible that
  it needs to restore a user CR3, if for example, the paranoid exception
  occurred in the syscall exit path between SWITCH_TO_USER_CR3_STACK and
  SWAPGS."

Josh also confirmed via targeted testing that it's possible to hit this bug.

Fix the bug by also restoring CR3 in the paranoid_exit_no_swapgs branch.

The reason we haven't seen this bug reported by users yet is probably because
"paranoid" entry points are limited to the following cases:

 idtentry double_fault   do_double_fault  has_error_code=1  paranoid=2
 idtentry debug  do_debug has_error_code=0  paranoid=1 
shift_ist=DEBUG_STACK
 idtentry int3   do_int3  has_error_code=0  paranoid=1 
shift_ist=DEBUG_STACK
 idtentry machine_check  do_mce   has_error_code=0  paranoid=1

Amongst those entry points only machine_check is one that will interrupt an
IRQS-off critical section asynchronously - and machine check events are rare.

The other main asynchronous entries are NMI entries, which can be very high-freq
with perf profiling, but they are special: they don't use the 'idtentry' macro 
but
are open coded and restore user CR3 unconditionally so don't have this bug.

Reported-and-tested-by: Josh Poimboeuf 
Reviewed-by: Andy Lutomirski 
Acked-by: Thomas Gleixner 
Cc: Arjan van de Ven 
Cc: Borislav Petkov 
Cc: Dan Williams 
Cc: Dave Hansen 
Cc: David Woodhouse 
Cc: Greg Kroah-Hartman 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20180214073910.boevmg65upbk3...@gmail.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/entry_64.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1c5420420..4fd9044 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1168,6 +1168,7 @@ ENTRY(paranoid_exit)
jmp .Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
+   RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
 .Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
 END(paranoid_exit)


[tip:x86/pti] x86/entry/64: Fix CR3 restore in paranoid_exit()

2018-02-14 Thread tip-bot for Ingo Molnar
Commit-ID:  48753793350974b7afe9598fd1dc46b2f1f47c2d
Gitweb: https://git.kernel.org/tip/48753793350974b7afe9598fd1dc46b2f1f47c2d
Author: Ingo Molnar 
AuthorDate: Wed, 14 Feb 2018 08:39:11 +0100
Committer:  Ingo Molnar 
CommitDate: Thu, 15 Feb 2018 00:28:03 +0100

x86/entry/64: Fix CR3 restore in paranoid_exit()

Josh Poimboeuf noticed the following bug:

 "The paranoid exit code only restores the saved CR3 when it switches back
  to the user GS.  However, even in the kernel GS case, it's possible that
  it needs to restore a user CR3, if for example, the paranoid exception
  occurred in the syscall exit path between SWITCH_TO_USER_CR3_STACK and
  SWAPGS."

Josh also confirmed via targeted testing that it's possible to hit this bug.

Fix the bug by also restoring CR3 in the paranoid_exit_no_swapgs branch.

The reason we haven't seen this bug reported by users yet is probably because
"paranoid" entry points are limited to the following cases:

 idtentry double_fault   do_double_fault  has_error_code=1  paranoid=2
 idtentry debug  do_debug has_error_code=0  paranoid=1 
shift_ist=DEBUG_STACK
 idtentry int3   do_int3  has_error_code=0  paranoid=1 
shift_ist=DEBUG_STACK
 idtentry machine_check  do_mce   has_error_code=0  paranoid=1

Amongst those entry points only machine_check is one that will interrupt an
IRQS-off critical section asynchronously - and machine check events are rare.

The other main asynchronous entries are NMI entries, which can be very high-freq
with perf profiling, but they are special: they don't use the 'idtentry' macro 
but
are open coded and restore user CR3 unconditionally so don't have this bug.

Reported-and-tested-by: Josh Poimboeuf 
Reviewed-by: Andy Lutomirski 
Acked-by: Thomas Gleixner 
Cc: Arjan van de Ven 
Cc: Borislav Petkov 
Cc: Dan Williams 
Cc: Dave Hansen 
Cc: David Woodhouse 
Cc: Greg Kroah-Hartman 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20180214073910.boevmg65upbk3...@gmail.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/entry_64.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1c5420420..4fd9044 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1168,6 +1168,7 @@ ENTRY(paranoid_exit)
jmp .Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
+   RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
 .Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
 END(paranoid_exit)


[tip:x86/pti] selftests/x86: Disable tests requiring 32-bit support on pure 64-bit systems

2018-02-13 Thread tip-bot for Ingo Molnar
Commit-ID:  d09a04140a61e82e938534b0c3983e8d90d5c54d
Gitweb: https://git.kernel.org/tip/d09a04140a61e82e938534b0c3983e8d90d5c54d
Author: Ingo Molnar 
AuthorDate: Tue, 13 Feb 2018 09:15:19 +0100
Committer:  Ingo Molnar 
CommitDate: Tue, 13 Feb 2018 09:15:19 +0100

selftests/x86: Disable tests requiring 32-bit support on pure 64-bit systems

The ldt_gdt and ptrace_syscall selftests, even in their 64-bit variant, use
hard-coded 32-bit syscall numbers and call "int $0x80".

This will fail on 64-bit systems with CONFIG_IA32_EMULATION=y disabled.

Therefore, do not build these tests if we cannot build 32-bit binaries
(which should be a good approximation for CONFIG_IA32_EMULATION=y being 
enabled).

Signed-off-by: Dominik Brodowski 
Cc: Andy Lutomirski 
Cc: Dmitry Safonov 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux-kselft...@vger.kernel.org
Cc: sh...@kernel.org
Link: http://lkml.kernel.org/r/2018021013.16888-6-li...@dominikbrodowski.net
Signed-off-by: Ingo Molnar 
---
 tools/testing/selftests/x86/Makefile | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/x86/Makefile 
b/tools/testing/selftests/x86/Makefile
index 73b8ef6..aa6e2d7 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,16 +5,26 @@ include ../lib.mk
 
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt 
ptrace_syscall test_mremap_vdso \
-   check_initial_reg_state sigreturn ldt_gdt iopl 
mpx-mini-test ioperm \
+UNAME_M := $(shell uname -m)
+CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt 
test_mremap_vdso \
+   check_initial_reg_state sigreturn iopl mpx-mini-test 
ioperm \
protection_keys test_vdso test_vsyscall
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso 
unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
 TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+# Some selftests require 32bit support enabled also on 64bit systems
+TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
-TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
+TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) 
$(TARGETS_C_32BIT_NEEDED)
 TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11)
+TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED)
+endif
+
 BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
 BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
 
@@ -23,10 +33,6 @@ BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
 
 CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie
 
-UNAME_M := $(shell uname -m)
-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-
 ifeq ($(CAN_BUILD_I386),1)
 all: all_32
 TEST_PROGS += $(BINARIES_32)


[tip:x86/pti] selftests/x86: Disable tests requiring 32-bit support on pure 64-bit systems

2018-02-13 Thread tip-bot for Ingo Molnar
Commit-ID:  d09a04140a61e82e938534b0c3983e8d90d5c54d
Gitweb: https://git.kernel.org/tip/d09a04140a61e82e938534b0c3983e8d90d5c54d
Author: Ingo Molnar 
AuthorDate: Tue, 13 Feb 2018 09:15:19 +0100
Committer:  Ingo Molnar 
CommitDate: Tue, 13 Feb 2018 09:15:19 +0100

selftests/x86: Disable tests requiring 32-bit support on pure 64-bit systems

The ldt_gdt and ptrace_syscall selftests, even in their 64-bit variant, use
hard-coded 32-bit syscall numbers and call "int $0x80".

This will fail on 64-bit systems with CONFIG_IA32_EMULATION=y disabled.

Therefore, do not build these tests if we cannot build 32-bit binaries
(which should be a good approximation for CONFIG_IA32_EMULATION=y being 
enabled).

Signed-off-by: Dominik Brodowski 
Cc: Andy Lutomirski 
Cc: Dmitry Safonov 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux-kselft...@vger.kernel.org
Cc: sh...@kernel.org
Link: http://lkml.kernel.org/r/2018021013.16888-6-li...@dominikbrodowski.net
Signed-off-by: Ingo Molnar 
---
 tools/testing/selftests/x86/Makefile | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/x86/Makefile 
b/tools/testing/selftests/x86/Makefile
index 73b8ef6..aa6e2d7 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,16 +5,26 @@ include ../lib.mk
 
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt 
ptrace_syscall test_mremap_vdso \
-   check_initial_reg_state sigreturn ldt_gdt iopl 
mpx-mini-test ioperm \
+UNAME_M := $(shell uname -m)
+CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt 
test_mremap_vdso \
+   check_initial_reg_state sigreturn iopl mpx-mini-test 
ioperm \
protection_keys test_vdso test_vsyscall
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso 
unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
 TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+# Some selftests require 32bit support enabled also on 64bit systems
+TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
-TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
+TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) 
$(TARGETS_C_32BIT_NEEDED)
 TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11)
+TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED)
+endif
+
 BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
 BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
 
@@ -23,10 +33,6 @@ BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
 
 CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie
 
-UNAME_M := $(shell uname -m)
-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-
 ifeq ($(CAN_BUILD_I386),1)
 all: all_32
 TEST_PROGS += $(BINARIES_32)


[tip:x86/pti] selftests/x86: Do not rely on "int $0x80" in single_step_syscall.c

2018-02-13 Thread tip-bot for Ingo Molnar
Commit-ID:  9e2b962d35d746bca8b51902807553191ccb06b2
Gitweb: https://git.kernel.org/tip/9e2b962d35d746bca8b51902807553191ccb06b2
Author: Ingo Molnar 
AuthorDate: Tue, 13 Feb 2018 09:13:21 +0100
Committer:  Ingo Molnar 
CommitDate: Tue, 13 Feb 2018 09:13:21 +0100

selftests/x86: Do not rely on "int $0x80" in single_step_syscall.c

On 64-bit builds, we should not rely on "int $0x80" working (it only does if
CONFIG_IA32_EMULATION=y is enabled). To keep the "Set TF and check int80"
test running on 64-bit installs with CONFIG_IA32_EMULATION=y enabled, build
this test only if we can also build 32-bit binaries (which should be a
good approximation for that).

Signed-off-by: Dominik Brodowski 
Cc: Andy Lutomirski 
Cc: Dmitry Safonov 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux-kselft...@vger.kernel.org
Cc: sh...@kernel.org
Link: http://lkml.kernel.org/r/2018021013.16888-5-li...@dominikbrodowski.net
Signed-off-by: Ingo Molnar 
---
 tools/testing/selftests/x86/Makefile  | 2 ++
 tools/testing/selftests/x86/single_step_syscall.c | 5 -
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/x86/Makefile 
b/tools/testing/selftests/x86/Makefile
index 91fbfa8..73b8ef6 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -30,11 +30,13 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) 
trivial_64bit_program.c)
 ifeq ($(CAN_BUILD_I386),1)
 all: all_32
 TEST_PROGS += $(BINARIES_32)
+EXTRA_CFLAGS += -DCAN_BUILD_32
 endif
 
 ifeq ($(CAN_BUILD_X86_64),1)
 all: all_64
 TEST_PROGS += $(BINARIES_64)
+EXTRA_CFLAGS += -DCAN_BUILD_64
 endif
 
 all_32: $(BINARIES_32)
diff --git a/tools/testing/selftests/x86/single_step_syscall.c 
b/tools/testing/selftests/x86/single_step_syscall.c
index a48da95..ddfdd63 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -119,7 +119,9 @@ static void check_result(void)
 
 int main()
 {
+#ifdef CAN_BUILD_32
int tmp;
+#endif
 
sethandler(SIGTRAP, sigtrap, 0);
 
@@ -139,12 +141,13 @@ int main()
  : : "c" (post_nop) : "r11");
check_result();
 #endif
-
+#ifdef CAN_BUILD_32
printf("[RUN]\tSet TF and check int80\n");
set_eflags(get_eflags() | X86_EFLAGS_TF);
asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
: INT80_CLOBBERS);
check_result();
+#endif
 
/*
 * This test is particularly interesting if fast syscalls use


[tip:x86/pti] selftests/x86: Do not rely on "int $0x80" in single_step_syscall.c

2018-02-13 Thread tip-bot for Ingo Molnar
Commit-ID:  9e2b962d35d746bca8b51902807553191ccb06b2
Gitweb: https://git.kernel.org/tip/9e2b962d35d746bca8b51902807553191ccb06b2
Author: Ingo Molnar 
AuthorDate: Tue, 13 Feb 2018 09:13:21 +0100
Committer:  Ingo Molnar 
CommitDate: Tue, 13 Feb 2018 09:13:21 +0100

selftests/x86: Do not rely on "int $0x80" in single_step_syscall.c

On 64-bit builds, we should not rely on "int $0x80" working (it only does if
CONFIG_IA32_EMULATION=y is enabled). To keep the "Set TF and check int80"
test running on 64-bit installs with CONFIG_IA32_EMULATION=y enabled, build
this test only if we can also build 32-bit binaries (which should be a
good approximation for that).

Signed-off-by: Dominik Brodowski 
Cc: Andy Lutomirski 
Cc: Dmitry Safonov 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux-kselft...@vger.kernel.org
Cc: sh...@kernel.org
Link: http://lkml.kernel.org/r/2018021013.16888-5-li...@dominikbrodowski.net
Signed-off-by: Ingo Molnar 
---
 tools/testing/selftests/x86/Makefile  | 2 ++
 tools/testing/selftests/x86/single_step_syscall.c | 5 -
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/x86/Makefile 
b/tools/testing/selftests/x86/Makefile
index 91fbfa8..73b8ef6 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -30,11 +30,13 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) 
trivial_64bit_program.c)
 ifeq ($(CAN_BUILD_I386),1)
 all: all_32
 TEST_PROGS += $(BINARIES_32)
+EXTRA_CFLAGS += -DCAN_BUILD_32
 endif
 
 ifeq ($(CAN_BUILD_X86_64),1)
 all: all_64
 TEST_PROGS += $(BINARIES_64)
+EXTRA_CFLAGS += -DCAN_BUILD_64
 endif
 
 all_32: $(BINARIES_32)
diff --git a/tools/testing/selftests/x86/single_step_syscall.c 
b/tools/testing/selftests/x86/single_step_syscall.c
index a48da95..ddfdd63 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -119,7 +119,9 @@ static void check_result(void)
 
 int main()
 {
+#ifdef CAN_BUILD_32
int tmp;
+#endif
 
sethandler(SIGTRAP, sigtrap, 0);
 
@@ -139,12 +141,13 @@ int main()
  : : "c" (post_nop) : "r11");
check_result();
 #endif
-
+#ifdef CAN_BUILD_32
printf("[RUN]\tSet TF and check int80\n");
set_eflags(get_eflags() | X86_EFLAGS_TF);
asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
: INT80_CLOBBERS);
check_result();
+#endif
 
/*
 * This test is particularly interesting if fast syscalls use


[tip:x86/urgent] x86/Kconfig: Further simplify the NR_CPUS config

2018-02-11 Thread tip-bot for Ingo Molnar
Commit-ID:  aec6487e994d2f625197970a56a4aac40c2c7547
Gitweb: https://git.kernel.org/tip/aec6487e994d2f625197970a56a4aac40c2c7547
Author: Ingo Molnar 
AuthorDate: Sat, 10 Feb 2018 12:36:29 +0100
Committer:  Ingo Molnar 
CommitDate: Sun, 11 Feb 2018 11:51:34 +0100

x86/Kconfig: Further simplify the NR_CPUS config

Clean up various aspects of the x86 CONFIG_NR_CPUS configuration switches:

- Rename the three CONFIG_NR_CPUS related variables to create a common
  namespace for them:

RANGE_BEGIN_CPUS => NR_CPUS_RANGE_BEGIN
RANGE_END_CPUS   => NR_CPUS_RANGE_END
DEF_CONFIG_CPUS  => NR_CPUS_DEFAULT

- Align them vertically, such as:

config NR_CPUS_RANGE_END
int
depends on X86_64
default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK)
default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
default1 if !SMP

- Update help text, add more comments.

Test results:

 # i386 allnoconfig:
 CONFIG_NR_CPUS_RANGE_BEGIN=1
 CONFIG_NR_CPUS_RANGE_END=1
 CONFIG_NR_CPUS_DEFAULT=1
 CONFIG_NR_CPUS=1

 # i386 defconfig:
 CONFIG_NR_CPUS_RANGE_BEGIN=2
 CONFIG_NR_CPUS_RANGE_END=8
 CONFIG_NR_CPUS_DEFAULT=8
 CONFIG_NR_CPUS=8

 # i386 allyesconfig:
 CONFIG_NR_CPUS_RANGE_BEGIN=2
 CONFIG_NR_CPUS_RANGE_END=64
 CONFIG_NR_CPUS_DEFAULT=32
 CONFIG_NR_CPUS=32

 # x86_64 allnoconfig:
 CONFIG_NR_CPUS_RANGE_BEGIN=1
 CONFIG_NR_CPUS_RANGE_END=1
 CONFIG_NR_CPUS_DEFAULT=1
 CONFIG_NR_CPUS=1

 # x86_64 defconfig:
 CONFIG_NR_CPUS_RANGE_BEGIN=2
 CONFIG_NR_CPUS_RANGE_END=512
 CONFIG_NR_CPUS_DEFAULT=64
 CONFIG_NR_CPUS=64

 # x86_64 allyesconfig:
 CONFIG_NR_CPUS_RANGE_BEGIN=8192
 CONFIG_NR_CPUS_RANGE_END=8192
 CONFIG_NR_CPUS_DEFAULT=8192
 CONFIG_NR_CPUS=8192

Acked-by: Randy Dunlap 
Acked-by: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/20180210113629.jcv6su3r4suun...@gmail.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/Kconfig | 66 ++--
 1 file changed, 40 insertions(+), 26 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9d921b7..a528c14 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -949,52 +949,66 @@ config MAXSMP
  Enable maximum number of CPUS and NUMA Nodes for this architecture.
  If unsure, say N.
 
-config RANGE_END_CPUS
+#
+# The maximum number of CPUs supported:
+#
+# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
+# and which can be configured interactively in the
+# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
+#
+# The ranges are different on 32-bit and 64-bit kernels, depending on
+# hardware capabilities and scalability features of the kernel.
+#
+# ( If MAXSMP is enabled we just use the highest possible value and disable
+#   interactive configuration. )
+#
+
+config NR_CPUS_RANGE_BEGIN
int
-   depends on X86_32
-   default 8 if SMP && !X86_BIGSMP
-   default 64 if SMP && X86_BIGSMP
-   default 1 if !SMP
+   default NR_CPUS_RANGE_END if MAXSMP
+   default1 if !SMP
+   default2
 
-config RANGE_END_CPUS
+config NR_CPUS_RANGE_END
int
-   depends on X86_64
-   default 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
-   default 8192 if SMP && (MAXSMP || CPUMASK_OFFSTACK)
-   default 1 if !SMP
+   depends on X86_32
+   default   64 if  SMP &&  X86_BIGSMP
+   default8 if  SMP && !X86_BIGSMP
+   default1 if !SMP
 
-config RANGE_BEGIN_CPUS
+config NR_CPUS_RANGE_END
int
-   default 1 if !SMP
-   default RANGE_END_CPUS if MAXSMP
-   default 2
+   depends on X86_64
+   default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK)
+   default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+   default1 if !SMP
 
-config DEF_CONFIG_CPUS
+config NR_CPUS_DEFAULT
int
depends on X86_32
-   default 1 if !SMP
-   default 32 if X86_BIGSMP
-   default 8 if SMP
+   default   32 if  X86_BIGSMP
+   default8 if  SMP
+   default1 if !SMP
 
-config DEF_CONFIG_CPUS
+config NR_CPUS_DEFAULT
int
depends on X86_64
-   default 1 if !SMP
-   default 8192 if MAXSMP
-   default 64 if SMP
+   default 8192 if  MAXSMP
+   default   64 if  SMP
+   default1 if !SMP
 
 config NR_CPUS
int "Maximum number of CPUs" if SMP && !MAXSMP
-   range RANGE_BEGIN_CPUS RANGE_END_CPUS
-   default DEF_CONFIG_CPUS
+   range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+   default NR_CPUS_DEFAULT
---help---
  This allows you to specify the maximum number of CPUs which this
  kernel will support.  If CPUMASK_OFFSTACK is enabled, the maximum
  supported value is 8192, otherwise the maximum value is 512.  The
 

[tip:x86/urgent] x86/Kconfig: Further simplify the NR_CPUS config

2018-02-11 Thread tip-bot for Ingo Molnar
Commit-ID:  aec6487e994d2f625197970a56a4aac40c2c7547
Gitweb: https://git.kernel.org/tip/aec6487e994d2f625197970a56a4aac40c2c7547
Author: Ingo Molnar 
AuthorDate: Sat, 10 Feb 2018 12:36:29 +0100
Committer:  Ingo Molnar 
CommitDate: Sun, 11 Feb 2018 11:51:34 +0100

x86/Kconfig: Further simplify the NR_CPUS config

Clean up various aspects of the x86 CONFIG_NR_CPUS configuration switches:

- Rename the three CONFIG_NR_CPUS related variables to create a common
  namespace for them:

RANGE_BEGIN_CPUS => NR_CPUS_RANGE_BEGIN
RANGE_END_CPUS   => NR_CPUS_RANGE_END
DEF_CONFIG_CPUS  => NR_CPUS_DEFAULT

- Align them vertically, such as:

config NR_CPUS_RANGE_END
int
depends on X86_64
default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK)
default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
default1 if !SMP

- Update help text, add more comments.

Test results:

 # i386 allnoconfig:
 CONFIG_NR_CPUS_RANGE_BEGIN=1
 CONFIG_NR_CPUS_RANGE_END=1
 CONFIG_NR_CPUS_DEFAULT=1
 CONFIG_NR_CPUS=1

 # i386 defconfig:
 CONFIG_NR_CPUS_RANGE_BEGIN=2
 CONFIG_NR_CPUS_RANGE_END=8
 CONFIG_NR_CPUS_DEFAULT=8
 CONFIG_NR_CPUS=8

 # i386 allyesconfig:
 CONFIG_NR_CPUS_RANGE_BEGIN=2
 CONFIG_NR_CPUS_RANGE_END=64
 CONFIG_NR_CPUS_DEFAULT=32
 CONFIG_NR_CPUS=32

 # x86_64 allnoconfig:
 CONFIG_NR_CPUS_RANGE_BEGIN=1
 CONFIG_NR_CPUS_RANGE_END=1
 CONFIG_NR_CPUS_DEFAULT=1
 CONFIG_NR_CPUS=1

 # x86_64 defconfig:
 CONFIG_NR_CPUS_RANGE_BEGIN=2
 CONFIG_NR_CPUS_RANGE_END=512
 CONFIG_NR_CPUS_DEFAULT=64
 CONFIG_NR_CPUS=64

 # x86_64 allyesconfig:
 CONFIG_NR_CPUS_RANGE_BEGIN=8192
 CONFIG_NR_CPUS_RANGE_END=8192
 CONFIG_NR_CPUS_DEFAULT=8192
 CONFIG_NR_CPUS=8192

Acked-by: Randy Dunlap 
Acked-by: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/20180210113629.jcv6su3r4suun...@gmail.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/Kconfig | 66 ++--
 1 file changed, 40 insertions(+), 26 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9d921b7..a528c14 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -949,52 +949,66 @@ config MAXSMP
  Enable maximum number of CPUS and NUMA Nodes for this architecture.
  If unsure, say N.
 
-config RANGE_END_CPUS
+#
+# The maximum number of CPUs supported:
+#
+# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
+# and which can be configured interactively in the
+# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
+#
+# The ranges are different on 32-bit and 64-bit kernels, depending on
+# hardware capabilities and scalability features of the kernel.
+#
+# ( If MAXSMP is enabled we just use the highest possible value and disable
+#   interactive configuration. )
+#
+
+config NR_CPUS_RANGE_BEGIN
int
-   depends on X86_32
-   default 8 if SMP && !X86_BIGSMP
-   default 64 if SMP && X86_BIGSMP
-   default 1 if !SMP
+   default NR_CPUS_RANGE_END if MAXSMP
+   default1 if !SMP
+   default2
 
-config RANGE_END_CPUS
+config NR_CPUS_RANGE_END
int
-   depends on X86_64
-   default 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
-   default 8192 if SMP && (MAXSMP || CPUMASK_OFFSTACK)
-   default 1 if !SMP
+   depends on X86_32
+   default   64 if  SMP &&  X86_BIGSMP
+   default8 if  SMP && !X86_BIGSMP
+   default1 if !SMP
 
-config RANGE_BEGIN_CPUS
+config NR_CPUS_RANGE_END
int
-   default 1 if !SMP
-   default RANGE_END_CPUS if MAXSMP
-   default 2
+   depends on X86_64
+   default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK)
+   default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+   default1 if !SMP
 
-config DEF_CONFIG_CPUS
+config NR_CPUS_DEFAULT
int
depends on X86_32
-   default 1 if !SMP
-   default 32 if X86_BIGSMP
-   default 8 if SMP
+   default   32 if  X86_BIGSMP
+   default8 if  SMP
+   default1 if !SMP
 
-config DEF_CONFIG_CPUS
+config NR_CPUS_DEFAULT
int
depends on X86_64
-   default 1 if !SMP
-   default 8192 if MAXSMP
-   default 64 if SMP
+   default 8192 if  MAXSMP
+   default   64 if  SMP
+   default1 if !SMP
 
 config NR_CPUS
int "Maximum number of CPUs" if SMP && !MAXSMP
-   range RANGE_BEGIN_CPUS RANGE_END_CPUS
-   default DEF_CONFIG_CPUS
+   range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+   default NR_CPUS_DEFAULT
---help---
  This allows you to specify the maximum number of CPUs which this
  kernel will support.  If CPUMASK_OFFSTACK is enabled, the maximum
  supported value is 8192, otherwise the maximum value is 512.  The
  minimum value which makes sense is 2.
 
- This is purely to save memory - each supported CPU adds
- approximately eight kilobytes to the kernel 

[tip:perf/core] perf/x86/msr: Clean up the code

2018-01-06 Thread tip-bot for Ingo Molnar
Commit-ID:  9128d3ed9de3882c83b927eb553d5d44c84505f5
Gitweb: https://git.kernel.org/tip/9128d3ed9de3882c83b927eb553d5d44c84505f5
Author: Ingo Molnar 
AuthorDate: Fri, 5 Jan 2018 08:18:52 -0800
Committer:  Ingo Molnar 
CommitDate: Sat, 6 Jan 2018 12:18:40 +0100

perf/x86/msr: Clean up the code

Recent changes made a bit of an inconsistent mess out of arch/x86/events/msr.c,
fix it:

 - re-align the initialization tables to be vertically aligned and readable 
again

 - harmonize comment style in terms of punctuation, capitalization and spelling

 - use curly braces for multi-condition branches

 - remove extra newlines

 - simplify the code a bit

Cc: Alexander Shishkin 
Cc: Arnaldo Carvalho de Melo 
Cc: Jiri Olsa 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Stephane Eranian 
Cc: Thomas Gleixner 
Cc: Vince Weaver 
Cc: kan.li...@intel.com
Link: 
http://lkml.kernel.org/r/1515169132-3980-1-git-send-email-eran...@google.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/events/msr.c | 61 ---
 1 file changed, 29 insertions(+), 32 deletions(-)

diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 0be15b9..18e2628 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -36,7 +36,6 @@ static bool test_therm_status(int idx)
return boot_cpu_has(X86_FEATURE_DTHERM);
 }
 
-
 static bool test_intel(int idx)
 {
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
@@ -103,28 +102,28 @@ struct perf_msr {
bool(*test)(int idx);
 };
 
-PMU_EVENT_ATTR_STRING(tsc,evattr_tsc,"event=0x00");
-PMU_EVENT_ATTR_STRING(aperf,  evattr_aperf,  "event=0x01");
-PMU_EVENT_ATTR_STRING(mperf,  evattr_mperf,  "event=0x02");
-PMU_EVENT_ATTR_STRING(pperf,  evattr_pperf,  "event=0x03");
-PMU_EVENT_ATTR_STRING(smi,evattr_smi,"event=0x04");
-PMU_EVENT_ATTR_STRING(ptsc,   evattr_ptsc,   "event=0x05");
-PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06");
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin, evattr_therm, "event=0x07");
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap, "1");
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit, "C");
+PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, 
"event=0x00");
+PMU_EVENT_ATTR_STRING(aperf,   evattr_aperf,   
"event=0x01");
+PMU_EVENT_ATTR_STRING(mperf,   evattr_mperf,   
"event=0x02");
+PMU_EVENT_ATTR_STRING(pperf,   evattr_pperf,   
"event=0x03");
+PMU_EVENT_ATTR_STRING(smi, evattr_smi, 
"event=0x04");
+PMU_EVENT_ATTR_STRING(ptsc,evattr_ptsc,
"event=0x05");
+PMU_EVENT_ATTR_STRING(irperf,  evattr_irperf,  
"event=0x06");
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin,  evattr_therm,   
"event=0x07");
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap,  
"1" );
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit,  
"C" );
 
 static struct perf_msr msr[] = {
-   [PERF_MSR_TSC]= { 0,_tsc,NULL,   
 },
-   [PERF_MSR_APERF]  = { MSR_IA32_APERF,   _aperf,  
test_aperfmperf, },
-   [PERF_MSR_MPERF]  = { MSR_IA32_MPERF,   _mperf,  
test_aperfmperf, },
-   [PERF_MSR_PPERF]  = { MSR_PPERF,_pperf,  test_intel, 
 },
-   [PERF_MSR_SMI]= { MSR_SMI_COUNT,_smi,test_intel, 
 },
-   [PERF_MSR_PTSC]   = { MSR_F15H_PTSC,_ptsc,   test_ptsc,  
 },
-   [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF,  _irperf, test_irperf,
 },
-   [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, _therm,   
test_therm_status, },
-   [PERF_MSR_THERM_SNAP] = { MSR_IA32_THERM_STATUS, _therm_snap, 
test_therm_status, },
-   [PERF_MSR_THERM_UNIT] = { MSR_IA32_THERM_STATUS, _therm_unit, 
test_therm_status, },
+   [PERF_MSR_TSC]  = { 0,  _tsc,
NULL,   },
+   [PERF_MSR_APERF]= { MSR_IA32_APERF, _aperf,  
test_aperfmperf,},
+   [PERF_MSR_MPERF]= { MSR_IA32_MPERF, _mperf,  
test_aperfmperf,},
+   [PERF_MSR_PPERF]= { MSR_PPERF,  _pperf,  
test_intel, },
+   [PERF_MSR_SMI]  = { MSR_SMI_COUNT,  _smi,
test_intel, },
+   [PERF_MSR_PTSC] = { MSR_F15H_PTSC,  _ptsc,   
test_ptsc,  },
+   [PERF_MSR_IRPERF]   = { MSR_F17H_IRPERF,  

[tip:perf/core] perf/x86/msr: Clean up the code

2018-01-06 Thread tip-bot for Ingo Molnar
Commit-ID:  9128d3ed9de3882c83b927eb553d5d44c84505f5
Gitweb: https://git.kernel.org/tip/9128d3ed9de3882c83b927eb553d5d44c84505f5
Author: Ingo Molnar 
AuthorDate: Fri, 5 Jan 2018 08:18:52 -0800
Committer:  Ingo Molnar 
CommitDate: Sat, 6 Jan 2018 12:18:40 +0100

perf/x86/msr: Clean up the code

Recent changes made a bit of an inconsistent mess out of arch/x86/events/msr.c,
fix it:

 - re-align the initialization tables to be vertically aligned and readable 
again

 - harmonize comment style in terms of punctuation, capitalization and spelling

 - use curly braces for multi-condition branches

 - remove extra newlines

 - simplify the code a bit

Cc: Alexander Shishkin 
Cc: Arnaldo Carvalho de Melo 
Cc: Jiri Olsa 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Stephane Eranian 
Cc: Thomas Gleixner 
Cc: Vince Weaver 
Cc: kan.li...@intel.com
Link: 
http://lkml.kernel.org/r/1515169132-3980-1-git-send-email-eran...@google.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/events/msr.c | 61 ---
 1 file changed, 29 insertions(+), 32 deletions(-)

diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 0be15b9..18e2628 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -36,7 +36,6 @@ static bool test_therm_status(int idx)
return boot_cpu_has(X86_FEATURE_DTHERM);
 }
 
-
 static bool test_intel(int idx)
 {
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
@@ -103,28 +102,28 @@ struct perf_msr {
bool(*test)(int idx);
 };
 
-PMU_EVENT_ATTR_STRING(tsc,evattr_tsc,"event=0x00");
-PMU_EVENT_ATTR_STRING(aperf,  evattr_aperf,  "event=0x01");
-PMU_EVENT_ATTR_STRING(mperf,  evattr_mperf,  "event=0x02");
-PMU_EVENT_ATTR_STRING(pperf,  evattr_pperf,  "event=0x03");
-PMU_EVENT_ATTR_STRING(smi,evattr_smi,"event=0x04");
-PMU_EVENT_ATTR_STRING(ptsc,   evattr_ptsc,   "event=0x05");
-PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06");
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin, evattr_therm, "event=0x07");
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap, "1");
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit, "C");
+PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, 
"event=0x00");
+PMU_EVENT_ATTR_STRING(aperf,   evattr_aperf,   
"event=0x01");
+PMU_EVENT_ATTR_STRING(mperf,   evattr_mperf,   
"event=0x02");
+PMU_EVENT_ATTR_STRING(pperf,   evattr_pperf,   
"event=0x03");
+PMU_EVENT_ATTR_STRING(smi, evattr_smi, 
"event=0x04");
+PMU_EVENT_ATTR_STRING(ptsc,evattr_ptsc,
"event=0x05");
+PMU_EVENT_ATTR_STRING(irperf,  evattr_irperf,  
"event=0x06");
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin,  evattr_therm,   
"event=0x07");
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap,  
"1" );
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit,  
"C" );
 
 static struct perf_msr msr[] = {
-   [PERF_MSR_TSC]= { 0,_tsc,NULL,   
 },
-   [PERF_MSR_APERF]  = { MSR_IA32_APERF,   _aperf,  
test_aperfmperf, },
-   [PERF_MSR_MPERF]  = { MSR_IA32_MPERF,   _mperf,  
test_aperfmperf, },
-   [PERF_MSR_PPERF]  = { MSR_PPERF,_pperf,  test_intel, 
 },
-   [PERF_MSR_SMI]= { MSR_SMI_COUNT,_smi,test_intel, 
 },
-   [PERF_MSR_PTSC]   = { MSR_F15H_PTSC,_ptsc,   test_ptsc,  
 },
-   [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF,  _irperf, test_irperf,
 },
-   [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, _therm,   
test_therm_status, },
-   [PERF_MSR_THERM_SNAP] = { MSR_IA32_THERM_STATUS, _therm_snap, 
test_therm_status, },
-   [PERF_MSR_THERM_UNIT] = { MSR_IA32_THERM_STATUS, _therm_unit, 
test_therm_status, },
+   [PERF_MSR_TSC]  = { 0,  _tsc,
NULL,   },
+   [PERF_MSR_APERF]= { MSR_IA32_APERF, _aperf,  
test_aperfmperf,},
+   [PERF_MSR_MPERF]= { MSR_IA32_MPERF, _mperf,  
test_aperfmperf,},
+   [PERF_MSR_PPERF]= { MSR_PPERF,  _pperf,  
test_intel, },
+   [PERF_MSR_SMI]  = { MSR_SMI_COUNT,  _smi,
test_intel, },
+   [PERF_MSR_PTSC] = { MSR_F15H_PTSC,  _ptsc,   
test_ptsc,  },
+   [PERF_MSR_IRPERF]   = { MSR_F17H_IRPERF,_irperf, 
test_irperf,},
+   [PERF_MSR_THERM]= { MSR_IA32_THERM_STATUS,  _therm,  
test_therm_status,  },
+   [PERF_MSR_THERM_SNAP]   = { MSR_IA32_THERM_STATUS,  
_therm_snap,   

[tip:perf/core] tools headers: Follow the upstream UAPI header version 100% differ from the kernel

2017-12-06 Thread tip-bot for Ingo Molnar
Commit-ID:  3f27bb5f00dc10609c2704cd39a130c8155a8510
Gitweb: https://git.kernel.org/tip/3f27bb5f00dc10609c2704cd39a130c8155a8510
Author: Ingo Molnar 
AuthorDate: Tue, 21 Nov 2017 09:41:11 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Wed, 29 Nov 2017 18:17:59 -0300

tools headers: Follow the upstream UAPI header version 100% differ from the 
kernel

Remove this from check-headers.sh:

  opts="--ignore-blank-lines --ignore-space-change"

as the easiest policy is to just follow the upstream UAPI header version 100%.
Pure space-only changes are comparatively rare.

Signed-off-by: Ingo Molnar 
Cc: Adrian Hunter 
Link: http://lkml.kernel.org/r/20171121084111.y6p5zwqso2cbm...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/check-headers.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 77406d2..e66a8a7 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -45,7 +45,6 @@ include/uapi/asm-generic/mman-common.h
 
 check () {
   file=$1
-  opts="--ignore-blank-lines --ignore-space-change"
 
   shift
   while [ -n "$*" ]; do


[tip:perf/core] tools headers: Follow the upstream UAPI header version 100% differ from the kernel

2017-12-06 Thread tip-bot for Ingo Molnar
Commit-ID:  3f27bb5f00dc10609c2704cd39a130c8155a8510
Gitweb: https://git.kernel.org/tip/3f27bb5f00dc10609c2704cd39a130c8155a8510
Author: Ingo Molnar 
AuthorDate: Tue, 21 Nov 2017 09:41:11 +0100
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Wed, 29 Nov 2017 18:17:59 -0300

tools headers: Follow the upstream UAPI header version 100% differ from the 
kernel

Remove this from check-headers.sh:

  opts="--ignore-blank-lines --ignore-space-change"

as the easiest policy is to just follow the upstream UAPI header version 100%.
Pure space-only changes are comparatively rare.

Signed-off-by: Ingo Molnar 
Cc: Adrian Hunter 
Link: http://lkml.kernel.org/r/20171121084111.y6p5zwqso2cbm...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/check-headers.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 77406d2..e66a8a7 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -45,7 +45,6 @@ include/uapi/asm-generic/mman-common.h
 
 check () {
   file=$1
-  opts="--ignore-blank-lines --ignore-space-change"
 
   shift
   while [ -n "$*" ]; do


[tip:x86/asm] x86/cpufeatures: Re-tabulate the X86_FEATURE definitions

2017-11-07 Thread tip-bot for Ingo Molnar
Commit-ID:  acbc845ffefd9fb70466182cd8555a26189462b2
Gitweb: https://git.kernel.org/tip/acbc845ffefd9fb70466182cd8555a26189462b2
Author: Ingo Molnar 
AuthorDate: Tue, 31 Oct 2017 13:17:22 +0100
Committer:  Ingo Molnar 
CommitDate: Tue, 7 Nov 2017 10:57:47 +0100

x86/cpufeatures: Re-tabulate the X86_FEATURE definitions

Over the years asm/cpufeatures.h has become somewhat of a mess: the original
tabulation style was too narrow, while x86 feature names also kept growing
in length, creating frequent field width overflows.

Re-tabulate it to make it wider and easier to read/modify. Also harmonize
the tabulation of the other defines in this file to match it.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/20171031121723.28524-3-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/cpufeatures.h | 508 ++---
 1 file changed, 254 insertions(+), 254 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 7437073..ad1b835 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,8 +13,8 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS   18  /* N 32-bit words worth of info */
-#define NBUGINTS   1   /* N 32-bit bug flags */
+#define NCAPINTS   18 /* N 32-bit words worth of 
info */
+#define NBUGINTS   1  /* N 32-bit bug flags */
 
 /*
  * Note: If the comment begins with a quoted string, that string is used
@@ -28,163 +28,163 @@
  */
 
 /* Intel-defined CPU features, CPUID level 0x0001 (edx), word 0 */
-#define X86_FEATURE_FPU( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR( 0*32+ 5) /* Model-Specific Registers 
*/
-#define X86_FEATURE_PAE( 0*32+ 6) /* Physical Address 
Extensions */
-#define X86_FEATURE_MCE( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC   ( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR   ( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA( 0*32+14) /* Machine Check 
Architecture */
-#define X86_FEATURE_CMOV   ( 0*32+15) /* CMOV instructions */
+#define X86_FEATURE_FPU( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME( 0*32+ 1) /* Virtual Mode 
Extensions */
+#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE( 0*32+ 3) /* Page Size 
Extensions */
+#define X86_FEATURE_TSC( 0*32+ 4) /* Time Stamp 
Counter */
+#define X86_FEATURE_MSR( 0*32+ 5) /* Model-Specific 
Registers */
+#define X86_FEATURE_PAE( 0*32+ 6) /* Physical Address 
Extensions */
+#define X86_FEATURE_MCE( 0*32+ 7) /* Machine Check 
Exception */
+#define X86_FEATURE_CX8( 0*32+ 8) /* CMPXCHG8 
instruction */
+#define X86_FEATURE_APIC   ( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP( 0*32+11) /* SYSENTER/SYSEXIT 
*/
+#define X86_FEATURE_MTRR   ( 0*32+12) /* Memory Type Range 
Registers */
+#define X86_FEATURE_PGE( 0*32+13) /* Page Global 
Enable */
+#define X86_FEATURE_MCA( 0*32+14) /* Machine Check 
Architecture */
+#define X86_FEATURE_CMOV   ( 0*32+15) /* CMOV instructions */
  /* (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36  ( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH( 0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI   ( 0*32+22) /* ACPI via 

[tip:x86/asm] x86/cpufeatures: Fix various details in the feature definitions

2017-11-07 Thread tip-bot for Ingo Molnar
Commit-ID:  f3a624e901c633593156f7b00ca743a6204a29bc
Gitweb: https://git.kernel.org/tip/f3a624e901c633593156f7b00ca743a6204a29bc
Author: Ingo Molnar 
AuthorDate: Tue, 31 Oct 2017 13:17:23 +0100
Committer:  Ingo Molnar 
CommitDate: Tue, 7 Nov 2017 10:57:47 +0100

x86/cpufeatures: Fix various details in the feature definitions

Kept this commit separate from the re-tabulation changes, to make
the changes easier to review:

 - add better explanation for entries with no explanation
 - fix/enhance the text of some of the entries
 - fix the vertical alignment of some of the feature number definitions
 - fix inconsistent capitalization
 - ... and lots of other small details

i.e. make it all more of a coherent unit, instead of a patchwork of years of 
additions.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/20171031121723.28524-4-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/cpufeatures.h | 149 ++---
 1 file changed, 74 insertions(+), 75 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index ad1b835..cdf5be8 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -20,14 +20,12 @@
  * Note: If the comment begins with a quoted string, that string is used
  * in /proc/cpuinfo instead of the macro name.  If the string is "",
  * this feature bit is not displayed in /proc/cpuinfo at all.
- */
-
-/*
+ *
  * When adding new features here that depend on other features,
- * please update the table in kernel/cpu/cpuid-deps.c
+ * please update the table in kernel/cpu/cpuid-deps.c as well.
  */
 
-/* Intel-defined CPU features, CPUID level 0x0001 (edx), word 0 */
+/* Intel-defined CPU features, CPUID level 0x0001 (EDX), word 0 */
 #define X86_FEATURE_FPU( 0*32+ 0) /* Onboard FPU */
 #define X86_FEATURE_VME( 0*32+ 1) /* Virtual Mode 
Extensions */
 #define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
@@ -42,8 +40,7 @@
 #define X86_FEATURE_MTRR   ( 0*32+12) /* Memory Type Range 
Registers */
 #define X86_FEATURE_PGE( 0*32+13) /* Page Global 
Enable */
 #define X86_FEATURE_MCA( 0*32+14) /* Machine Check 
Architecture */
-#define X86_FEATURE_CMOV   ( 0*32+15) /* CMOV instructions */
- /* (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_CMOV   ( 0*32+15) /* CMOV instructions (plus 
FCMOVcc, FCOMI with FPU) */
 #define X86_FEATURE_PAT( 0*32+16) /* Page Attribute 
Table */
 #define X86_FEATURE_PSE36  ( 0*32+17) /* 36-bit PSEs */
 #define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
@@ -63,15 +60,15 @@
 /* AMD-defined CPU features, CPUID level 0x8001, word 1 */
 /* Don't duplicate feature flags which are redundant with Intel! */
 #define X86_FEATURE_SYSCALL( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
+#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
 #define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
 #define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
 #define X86_FEATURE_FXSR_OPT   ( 1*32+25) /* FXSAVE/FXRSTOR 
optimizations */
 #define X86_FEATURE_GBPAGES( 1*32+26) /* "pdpe1gb" GB pages */
 #define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT   ( 1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW  ( 1*32+31) /* 3DNow! */
+#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit 
support) */
+#define X86_FEATURE_3DNOWEXT   ( 1*32+30) /* AMD 3DNow extensions */
+#define X86_FEATURE_3DNOW  ( 1*32+31) /* 3DNow */
 
 /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
 #define X86_FEATURE_RECOVERY   ( 2*32+ 0) /* CPU in recovery mode */
@@ -84,66 +81,67 @@
 #define X86_FEATURE_K6_MTRR( 3*32+ 1) /* AMD K6 nonstandard MTRRs 
*/
 #define X86_FEATURE_CYRIX_ARR  ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
 #define X86_FEATURE_CENTAUR_MCR( 3*32+ 3) /* Centaur MCRs (= 
MTRRs) */
-/* cpu types for specific tunings: */
+
+/* CPU types for specific tunings: */
 #define X86_FEATURE_K8  

[tip:x86/asm] x86/cpufeatures: Re-tabulate the X86_FEATURE definitions

2017-11-07 Thread tip-bot for Ingo Molnar
Commit-ID:  acbc845ffefd9fb70466182cd8555a26189462b2
Gitweb: https://git.kernel.org/tip/acbc845ffefd9fb70466182cd8555a26189462b2
Author: Ingo Molnar 
AuthorDate: Tue, 31 Oct 2017 13:17:22 +0100
Committer:  Ingo Molnar 
CommitDate: Tue, 7 Nov 2017 10:57:47 +0100

x86/cpufeatures: Re-tabulate the X86_FEATURE definitions

Over the years asm/cpufeatures.h has become somewhat of a mess: the original
tabulation style was too narrow, while x86 feature names also kept growing
in length, creating frequent field width overflows.

Re-tabulate it to make it wider and easier to read/modify. Also harmonize
the tabulation of the other defines in this file to match it.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/20171031121723.28524-3-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/cpufeatures.h | 508 ++---
 1 file changed, 254 insertions(+), 254 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 7437073..ad1b835 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,8 +13,8 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS   18  /* N 32-bit words worth of info */
-#define NBUGINTS   1   /* N 32-bit bug flags */
+#define NCAPINTS   18 /* N 32-bit words worth of 
info */
+#define NBUGINTS   1  /* N 32-bit bug flags */
 
 /*
  * Note: If the comment begins with a quoted string, that string is used
@@ -28,163 +28,163 @@
  */
 
 /* Intel-defined CPU features, CPUID level 0x0001 (edx), word 0 */
-#define X86_FEATURE_FPU( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR( 0*32+ 5) /* Model-Specific Registers 
*/
-#define X86_FEATURE_PAE( 0*32+ 6) /* Physical Address 
Extensions */
-#define X86_FEATURE_MCE( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC   ( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR   ( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA( 0*32+14) /* Machine Check 
Architecture */
-#define X86_FEATURE_CMOV   ( 0*32+15) /* CMOV instructions */
+#define X86_FEATURE_FPU( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME( 0*32+ 1) /* Virtual Mode 
Extensions */
+#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE( 0*32+ 3) /* Page Size 
Extensions */
+#define X86_FEATURE_TSC( 0*32+ 4) /* Time Stamp 
Counter */
+#define X86_FEATURE_MSR( 0*32+ 5) /* Model-Specific 
Registers */
+#define X86_FEATURE_PAE( 0*32+ 6) /* Physical Address 
Extensions */
+#define X86_FEATURE_MCE( 0*32+ 7) /* Machine Check 
Exception */
+#define X86_FEATURE_CX8( 0*32+ 8) /* CMPXCHG8 
instruction */
+#define X86_FEATURE_APIC   ( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP( 0*32+11) /* SYSENTER/SYSEXIT 
*/
+#define X86_FEATURE_MTRR   ( 0*32+12) /* Memory Type Range 
Registers */
+#define X86_FEATURE_PGE( 0*32+13) /* Page Global 
Enable */
+#define X86_FEATURE_MCA( 0*32+14) /* Machine Check 
Architecture */
+#define X86_FEATURE_CMOV   ( 0*32+15) /* CMOV instructions */
  /* (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36  ( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH( 0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI   ( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR   ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-#define X86_FEATURE_XMM( 0*32+25) /* "sse" */
-#define X86_FEATURE_XMM2   ( 0*32+26) 

[tip:x86/asm] x86/cpufeatures: Fix various details in the feature definitions

2017-11-07 Thread tip-bot for Ingo Molnar
Commit-ID:  f3a624e901c633593156f7b00ca743a6204a29bc
Gitweb: https://git.kernel.org/tip/f3a624e901c633593156f7b00ca743a6204a29bc
Author: Ingo Molnar 
AuthorDate: Tue, 31 Oct 2017 13:17:23 +0100
Committer:  Ingo Molnar 
CommitDate: Tue, 7 Nov 2017 10:57:47 +0100

x86/cpufeatures: Fix various details in the feature definitions

Kept this commit separate from the re-tabulation changes, to make
the changes easier to review:

 - add better explanation for entries with no explanation
 - fix/enhance the text of some of the entries
 - fix the vertical alignment of some of the feature number definitions
 - fix inconsistent capitalization
 - ... and lots of other small details

i.e. make it all more of a coherent unit, instead of a patchwork of years of 
additions.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/20171031121723.28524-4-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/cpufeatures.h | 149 ++---
 1 file changed, 74 insertions(+), 75 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index ad1b835..cdf5be8 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -20,14 +20,12 @@
  * Note: If the comment begins with a quoted string, that string is used
  * in /proc/cpuinfo instead of the macro name.  If the string is "",
  * this feature bit is not displayed in /proc/cpuinfo at all.
- */
-
-/*
+ *
  * When adding new features here that depend on other features,
- * please update the table in kernel/cpu/cpuid-deps.c
+ * please update the table in kernel/cpu/cpuid-deps.c as well.
  */
 
-/* Intel-defined CPU features, CPUID level 0x0001 (edx), word 0 */
+/* Intel-defined CPU features, CPUID level 0x0001 (EDX), word 0 */
 #define X86_FEATURE_FPU( 0*32+ 0) /* Onboard FPU */
 #define X86_FEATURE_VME( 0*32+ 1) /* Virtual Mode 
Extensions */
 #define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
@@ -42,8 +40,7 @@
 #define X86_FEATURE_MTRR   ( 0*32+12) /* Memory Type Range 
Registers */
 #define X86_FEATURE_PGE( 0*32+13) /* Page Global 
Enable */
 #define X86_FEATURE_MCA( 0*32+14) /* Machine Check 
Architecture */
-#define X86_FEATURE_CMOV   ( 0*32+15) /* CMOV instructions */
- /* (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_CMOV   ( 0*32+15) /* CMOV instructions (plus 
FCMOVcc, FCOMI with FPU) */
 #define X86_FEATURE_PAT( 0*32+16) /* Page Attribute 
Table */
 #define X86_FEATURE_PSE36  ( 0*32+17) /* 36-bit PSEs */
 #define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
@@ -63,15 +60,15 @@
 /* AMD-defined CPU features, CPUID level 0x8001, word 1 */
 /* Don't duplicate feature flags which are redundant with Intel! */
 #define X86_FEATURE_SYSCALL( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
+#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
 #define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
 #define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
 #define X86_FEATURE_FXSR_OPT   ( 1*32+25) /* FXSAVE/FXRSTOR 
optimizations */
 #define X86_FEATURE_GBPAGES( 1*32+26) /* "pdpe1gb" GB pages */
 #define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT   ( 1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW  ( 1*32+31) /* 3DNow! */
+#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit 
support) */
+#define X86_FEATURE_3DNOWEXT   ( 1*32+30) /* AMD 3DNow extensions */
+#define X86_FEATURE_3DNOW  ( 1*32+31) /* 3DNow */
 
 /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
 #define X86_FEATURE_RECOVERY   ( 2*32+ 0) /* CPU in recovery mode */
@@ -84,66 +81,67 @@
 #define X86_FEATURE_K6_MTRR( 3*32+ 1) /* AMD K6 nonstandard MTRRs 
*/
 #define X86_FEATURE_CYRIX_ARR  ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
 #define X86_FEATURE_CENTAUR_MCR( 3*32+ 3) /* Centaur MCRs (= 
MTRRs) */
-/* cpu types for specific tunings: */
+
+/* CPU types for specific tunings: */
 #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
 #define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
 #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
 #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
 #define 

[tip:locking/urgent] locking/lockdep: Disable cross-release features for now

2017-10-14 Thread tip-bot for Ingo Molnar
Commit-ID:  b483cf3bc249d7af706390efa63d6671e80d1c09
Gitweb: https://git.kernel.org/tip/b483cf3bc249d7af706390efa63d6671e80d1c09
Author: Ingo Molnar 
AuthorDate: Sat, 14 Oct 2017 09:26:59 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 14 Oct 2017 12:50:26 +0200

locking/lockdep: Disable cross-release features for now

Johan Hovold reported a big lockdep slowdown on his system, caused by lockdep:

> I had noticed that the BeagleBone Black boot time appeared to have
> increased significantly with 4.14 and yesterday I finally had time to
> investigate it.
>
> Boot time (from "Linux version" to login prompt) had in fact doubled
> since 4.13 where it took 17 seconds (with my current config) compared to
> the 35 seconds I now see with 4.14-rc4.
>
> I quick bisect pointed to lockdep and specifically the following commit:
>
>   28a903f63ec0 ("locking/lockdep: Handle non(or multi)-acquisition of a 
> crosslock")

Because the final v4.14 release is close, disable the cross-release lockdep
features for now.

Bisected-by: Johan Hovold 
Debugged-by: Johan Hovold 
Reported-by: Johan Hovold 
Cc: Arnd Bergmann 
Cc: Byungchul Park 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Tony Lindgren 
Cc: kernel-t...@lge.com
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux...@kvack.org
Cc: linux-o...@vger.kernel.org
Link: http://lkml.kernel.org/r/20171014072659.f2yr6mhm5ha3e...@gmail.com
Signed-off-by: Ingo Molnar 
---
 lib/Kconfig.debug | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2689b7c..e270584 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1092,8 +1092,8 @@ config PROVE_LOCKING
select DEBUG_MUTEXES
select DEBUG_RT_MUTEXES if RT_MUTEXES
select DEBUG_LOCK_ALLOC
-   select LOCKDEP_CROSSRELEASE
-   select LOCKDEP_COMPLETIONS
+   select LOCKDEP_CROSSRELEASE if BROKEN
+   select LOCKDEP_COMPLETIONS if BROKEN
select TRACE_IRQFLAGS
default n
help


[tip:locking/urgent] locking/lockdep: Disable cross-release features for now

2017-10-14 Thread tip-bot for Ingo Molnar
Commit-ID:  b483cf3bc249d7af706390efa63d6671e80d1c09
Gitweb: https://git.kernel.org/tip/b483cf3bc249d7af706390efa63d6671e80d1c09
Author: Ingo Molnar 
AuthorDate: Sat, 14 Oct 2017 09:26:59 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 14 Oct 2017 12:50:26 +0200

locking/lockdep: Disable cross-release features for now

Johan Hovold reported a big lockdep slowdown on his system, caused by lockdep:

> I had noticed that the BeagleBone Black boot time appeared to have
> increased significantly with 4.14 and yesterday I finally had time to
> investigate it.
>
> Boot time (from "Linux version" to login prompt) had in fact doubled
> since 4.13 where it took 17 seconds (with my current config) compared to
> the 35 seconds I now see with 4.14-rc4.
>
> I quick bisect pointed to lockdep and specifically the following commit:
>
>   28a903f63ec0 ("locking/lockdep: Handle non(or multi)-acquisition of a 
> crosslock")

Because the final v4.14 release is close, disable the cross-release lockdep
features for now.

Bisected-by: Johan Hovold 
Debugged-by: Johan Hovold 
Reported-by: Johan Hovold 
Cc: Arnd Bergmann 
Cc: Byungchul Park 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Tony Lindgren 
Cc: kernel-t...@lge.com
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux...@kvack.org
Cc: linux-o...@vger.kernel.org
Link: http://lkml.kernel.org/r/20171014072659.f2yr6mhm5ha3e...@gmail.com
Signed-off-by: Ingo Molnar 
---
 lib/Kconfig.debug | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2689b7c..e270584 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1092,8 +1092,8 @@ config PROVE_LOCKING
select DEBUG_MUTEXES
select DEBUG_RT_MUTEXES if RT_MUTEXES
select DEBUG_LOCK_ALLOC
-   select LOCKDEP_CROSSRELEASE
-   select LOCKDEP_COMPLETIONS
+   select LOCKDEP_CROSSRELEASE if BROKEN
+   select LOCKDEP_COMPLETIONS if BROKEN
select TRACE_IRQFLAGS
default n
help


[tip:perf/urgent] tools include: Sync kernel ABI headers with tooling headers

2017-09-29 Thread tip-bot for Ingo Molnar
Commit-ID:  549a3976523c69a0245c0a310210c824a0b26e35
Gitweb: https://git.kernel.org/tip/549a3976523c69a0245c0a310210c824a0b26e35
Author: Ingo Molnar 
AuthorDate: Wed, 13 Sep 2017 09:38:23 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 25 Sep 2017 10:39:44 -0300

tools include: Sync kernel ABI headers with tooling headers

Time for a sync with ABI/uapi headers with the upcoming v4.14 kernel.

None of the ABI changes require any source code level changes to our
existing in-kernel tooling code:

  - tools/arch/s390/include/uapi/asm/kvm.h:

  New KVM_S390_VM_TOD_EXT ABI, not used by in-kernel tooling.

  - tools/arch/x86/include/asm/cpufeatures.h:
tools/arch/x86/include/asm/disabled-features.h:

  New PCID, SME and VGIF x86 CPU feature bits defined.

  - tools/include/asm-generic/hugetlb_encode.h:
tools/include/uapi/asm-generic/mman-common.h:
tools/include/uapi/linux/mman.h:

  Two new madvise() flags, plus a hugetlb system call mmap flags
  restructuring/extension changes.

  - tools/include/uapi/drm/drm.h:
tools/include/uapi/drm/i915_drm.h:

  New drm_syncobj_create flags definitions, new drm_syncobj_wait
  and drm_syncobj_array ABIs. DRM_I915_PERF_* calls and a new
  I915_PARAM_HAS_EXEC_FENCE_ARRAY ABI for the Intel driver.

  - tools/include/uapi/linux/bpf.h:

  New bpf_sock fields (::mark and ::priority), new XDP_REDIRECT
  action, new kvm_ppc_smmu_info fields (::data_keys, instr_keys)

Signed-off-by: Ingo Molnar 
Cc: Adrian Hunter 
Cc: David Ahern 
Cc: Jiri Olsa 
Cc: Milian Wolff 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Taeung Song 
Cc: Wang Nan 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20170913073823.lxmi4c7ejqlfa...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/arch/s390/include/uapi/asm/kvm.h |  6 +++
 tools/arch/x86/include/asm/cpufeatures.h   |  2 +
 tools/arch/x86/include/asm/disabled-features.h |  4 +-
 .../include}/asm-generic/hugetlb_encode.h  |  0
 tools/include/uapi/asm-generic/mman-common.h   | 14 ++
 tools/include/uapi/drm/drm.h   | 22 ++
 tools/include/uapi/drm/i915_drm.h  | 51 +-
 tools/include/uapi/linux/bpf.h | 32 +-
 tools/include/uapi/linux/kvm.h |  3 +-
 tools/include/uapi/linux/mman.h| 24 +-
 10 files changed, 130 insertions(+), 28 deletions(-)

diff --git a/tools/arch/s390/include/uapi/asm/kvm.h 
b/tools/arch/s390/include/uapi/asm/kvm.h
index 69d09c3..cd7359e 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req {
 /* kvm attributes for KVM_S390_VM_TOD */
 #define KVM_S390_VM_TOD_LOW0
 #define KVM_S390_VM_TOD_HIGH   1
+#define KVM_S390_VM_TOD_EXT2
+
+struct kvm_s390_vm_tod_clock {
+   __u8  epoch_idx;
+   __u64 tod;
+};
 
 /* kvm attributes for KVM_S390_VM_CPU_MODEL */
 /* processor related attributes are r/w */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h 
b/tools/arch/x86/include/asm/cpufeatures.h
index 8ea315a1..2519c6c 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -196,6 +196,7 @@
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME( 7*32+10) /* AMD Secure Memory 
Encryption */
 
 #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number 
*/
 #define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
@@ -287,6 +288,7 @@
 #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
 #define X86_FEATURE_AVIC   (15*32+13) /* Virtual Interrupt Controller */
 #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF   (15*32+16) /* Virtual GIF */
 
 /* Intel-defined CPU features, CPUID level 0x0007:0 (ecx), word 16 */
 #define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation 
instructions*/
diff --git a/tools/arch/x86/include/asm/disabled-features.h 
b/tools/arch/x86/include/asm/disabled-features.h
index 5dff775..c10c912 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -21,11 +21,13 @@
 # define DISABLE_K6_MTRR   (1<<(X86_FEATURE_K6_MTRR & 31))
 # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
 # define DISABLE_CENTAUR_MCR   (1<<(X86_FEATURE_CENTAUR_MCR & 31))
+# define DISABLE_PCID  0
 #else
 # define 

[tip:perf/urgent] tools include: Sync kernel ABI headers with tooling headers

2017-09-29 Thread tip-bot for Ingo Molnar
Commit-ID:  549a3976523c69a0245c0a310210c824a0b26e35
Gitweb: https://git.kernel.org/tip/549a3976523c69a0245c0a310210c824a0b26e35
Author: Ingo Molnar 
AuthorDate: Wed, 13 Sep 2017 09:38:23 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 25 Sep 2017 10:39:44 -0300

tools include: Sync kernel ABI headers with tooling headers

Time for a sync with ABI/uapi headers with the upcoming v4.14 kernel.

None of the ABI changes require any source code level changes to our
existing in-kernel tooling code:

  - tools/arch/s390/include/uapi/asm/kvm.h:

  New KVM_S390_VM_TOD_EXT ABI, not used by in-kernel tooling.

  - tools/arch/x86/include/asm/cpufeatures.h:
tools/arch/x86/include/asm/disabled-features.h:

  New PCID, SME and VGIF x86 CPU feature bits defined.

  - tools/include/asm-generic/hugetlb_encode.h:
tools/include/uapi/asm-generic/mman-common.h:
tools/include/uapi/linux/mman.h:

  Two new madvise() flags, plus a hugetlb system call mmap flags
  restructuring/extension changes.

  - tools/include/uapi/drm/drm.h:
tools/include/uapi/drm/i915_drm.h:

  New drm_syncobj_create flags definitions, new drm_syncobj_wait
  and drm_syncobj_array ABIs. DRM_I915_PERF_* calls and a new
  I915_PARAM_HAS_EXEC_FENCE_ARRAY ABI for the Intel driver.

  - tools/include/uapi/linux/bpf.h:

  New bpf_sock fields (::mark and ::priority), new XDP_REDIRECT
  action, new kvm_ppc_smmu_info fields (::data_keys, instr_keys)

Signed-off-by: Ingo Molnar 
Cc: Adrian Hunter 
Cc: David Ahern 
Cc: Jiri Olsa 
Cc: Milian Wolff 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Taeung Song 
Cc: Wang Nan 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20170913073823.lxmi4c7ejqlfa...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/arch/s390/include/uapi/asm/kvm.h |  6 +++
 tools/arch/x86/include/asm/cpufeatures.h   |  2 +
 tools/arch/x86/include/asm/disabled-features.h |  4 +-
 .../include}/asm-generic/hugetlb_encode.h  |  0
 tools/include/uapi/asm-generic/mman-common.h   | 14 ++
 tools/include/uapi/drm/drm.h   | 22 ++
 tools/include/uapi/drm/i915_drm.h  | 51 +-
 tools/include/uapi/linux/bpf.h | 32 +-
 tools/include/uapi/linux/kvm.h |  3 +-
 tools/include/uapi/linux/mman.h| 24 +-
 10 files changed, 130 insertions(+), 28 deletions(-)

diff --git a/tools/arch/s390/include/uapi/asm/kvm.h 
b/tools/arch/s390/include/uapi/asm/kvm.h
index 69d09c3..cd7359e 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req {
 /* kvm attributes for KVM_S390_VM_TOD */
 #define KVM_S390_VM_TOD_LOW0
 #define KVM_S390_VM_TOD_HIGH   1
+#define KVM_S390_VM_TOD_EXT2
+
+struct kvm_s390_vm_tod_clock {
+   __u8  epoch_idx;
+   __u64 tod;
+};
 
 /* kvm attributes for KVM_S390_VM_CPU_MODEL */
 /* processor related attributes are r/w */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h 
b/tools/arch/x86/include/asm/cpufeatures.h
index 8ea315a1..2519c6c 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -196,6 +196,7 @@
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME( 7*32+10) /* AMD Secure Memory 
Encryption */
 
 #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number 
*/
 #define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
@@ -287,6 +288,7 @@
 #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
 #define X86_FEATURE_AVIC   (15*32+13) /* Virtual Interrupt Controller */
 #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF   (15*32+16) /* Virtual GIF */
 
 /* Intel-defined CPU features, CPUID level 0x0007:0 (ecx), word 16 */
 #define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation 
instructions*/
diff --git a/tools/arch/x86/include/asm/disabled-features.h 
b/tools/arch/x86/include/asm/disabled-features.h
index 5dff775..c10c912 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -21,11 +21,13 @@
 # define DISABLE_K6_MTRR   (1<<(X86_FEATURE_K6_MTRR & 31))
 # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
 # define DISABLE_CENTAUR_MCR   (1<<(X86_FEATURE_CENTAUR_MCR & 31))
+# define DISABLE_PCID  0
 #else
 # define DISABLE_VME   0
 # define DISABLE_K6_MTRR   0
 # define DISABLE_CYRIX_ARR 0
 # define DISABLE_CENTAUR_MCR   0
+# define DISABLE_PCID  (1<<(X86_FEATURE_PCID & 31))
 #endif /* CONFIG_X86_64 */
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
@@ -49,7 +51,7 

[tip:x86/fpu] x86/fpu: Remove fpu__current_fpstate_write_begin/end()

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  685c930d6e58e31e251ec354f9dca3958a4c5040
Gitweb: http://git.kernel.org/tip/685c930d6e58e31e251ec354f9dca3958a4c5040
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:11 +0200
Committer:  Ingo Molnar 
CommitDate: Tue, 26 Sep 2017 09:42:20 +0200

x86/fpu: Remove fpu__current_fpstate_write_begin/end()

These functions are not used anymore, so remove them.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Bobby Powers 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-29-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/internal.h |  2 --
 arch/x86/kernel/fpu/core.c  | 63 -
 2 files changed, 65 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index cf290d4..508e418 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -26,8 +26,6 @@
 extern void fpu__activate_curr(struct fpu *fpu);
 extern void fpu__activate_fpstate_read(struct fpu *fpu);
 extern void fpu__activate_fpstate_write(struct fpu *fpu);
-extern void fpu__current_fpstate_write_begin(void);
-extern void fpu__current_fpstate_write_end(void);
 extern void fpu__save(struct fpu *fpu);
 extern void fpu__restore(struct fpu *fpu);
 extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index afd3f2a..b2cdeb3 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -317,69 +317,6 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 }
 
 /*
- * This function must be called before we write the current
- * task's fpstate.
- *
- * This call gets the current FPU register state and moves
- * it in to the 'fpstate'.  Preemption is disabled so that
- * no writes to the 'fpstate' can occur from context
- * swiches.
- *
- * Must be followed by a fpu__current_fpstate_write_end().
- */
-void fpu__current_fpstate_write_begin(void)
-{
-   struct fpu *fpu = >thread.fpu;
-
-   /*
-* Ensure that the context-switching code does not write
-* over the fpstate while we are doing our update.
-*/
-   preempt_disable();
-
-   /*
-* Move the fpregs in to the fpu's 'fpstate'.
-*/
-   fpu__activate_fpstate_read(fpu);
-
-   /*
-* The caller is about to write to 'fpu'.  Ensure that no
-* CPU thinks that its fpregs match the fpstate.  This
-* ensures we will not be lazy and skip a XRSTOR in the
-* future.
-*/
-   __fpu_invalidate_fpregs_state(fpu);
-}
-
-/*
- * This function must be paired with fpu__current_fpstate_write_begin()
- *
- * This will ensure that the modified fpstate gets placed back in
- * the fpregs if necessary.
- *
- * Note: This function may be called whether or not an _actual_
- * write to the fpstate occurred.
- */
-void fpu__current_fpstate_write_end(void)
-{
-   struct fpu *fpu = >thread.fpu;
-
-   /*
-* 'fpu' now has an updated copy of the state, but the
-* registers may still be out of date.  Update them with
-* an XRSTOR if they are active.
-*/
-   if (fpu->fpstate_active)
-   copy_kernel_to_fpregs(>state);
-
-   /*
-* Our update is done and the fpregs/fpstate are in sync
-* if necessary.  Context switches can happen again.
-*/
-   preempt_enable();
-}
-
-/*
  * 'fpu__restore()' is called to copy FPU registers from
  * the FPU fpstate to the live hw registers and to activate
  * access to the hardware registers, so that FPU instructions


[tip:x86/fpu] x86/fpu: Remove fpu__current_fpstate_write_begin/end()

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  685c930d6e58e31e251ec354f9dca3958a4c5040
Gitweb: http://git.kernel.org/tip/685c930d6e58e31e251ec354f9dca3958a4c5040
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:11 +0200
Committer:  Ingo Molnar 
CommitDate: Tue, 26 Sep 2017 09:42:20 +0200

x86/fpu: Remove fpu__current_fpstate_write_begin/end()

These functions are not used anymore, so remove them.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Bobby Powers 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-29-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/internal.h |  2 --
 arch/x86/kernel/fpu/core.c  | 63 -
 2 files changed, 65 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index cf290d4..508e418 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -26,8 +26,6 @@
 extern void fpu__activate_curr(struct fpu *fpu);
 extern void fpu__activate_fpstate_read(struct fpu *fpu);
 extern void fpu__activate_fpstate_write(struct fpu *fpu);
-extern void fpu__current_fpstate_write_begin(void);
-extern void fpu__current_fpstate_write_end(void);
 extern void fpu__save(struct fpu *fpu);
 extern void fpu__restore(struct fpu *fpu);
 extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index afd3f2a..b2cdeb3 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -317,69 +317,6 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 }
 
 /*
- * This function must be called before we write the current
- * task's fpstate.
- *
- * This call gets the current FPU register state and moves
- * it in to the 'fpstate'.  Preemption is disabled so that
- * no writes to the 'fpstate' can occur from context
- * swiches.
- *
- * Must be followed by a fpu__current_fpstate_write_end().
- */
-void fpu__current_fpstate_write_begin(void)
-{
-   struct fpu *fpu = >thread.fpu;
-
-   /*
-* Ensure that the context-switching code does not write
-* over the fpstate while we are doing our update.
-*/
-   preempt_disable();
-
-   /*
-* Move the fpregs in to the fpu's 'fpstate'.
-*/
-   fpu__activate_fpstate_read(fpu);
-
-   /*
-* The caller is about to write to 'fpu'.  Ensure that no
-* CPU thinks that its fpregs match the fpstate.  This
-* ensures we will not be lazy and skip a XRSTOR in the
-* future.
-*/
-   __fpu_invalidate_fpregs_state(fpu);
-}
-
-/*
- * This function must be paired with fpu__current_fpstate_write_begin()
- *
- * This will ensure that the modified fpstate gets placed back in
- * the fpregs if necessary.
- *
- * Note: This function may be called whether or not an _actual_
- * write to the fpstate occurred.
- */
-void fpu__current_fpstate_write_end(void)
-{
-   struct fpu *fpu = >thread.fpu;
-
-   /*
-* 'fpu' now has an updated copy of the state, but the
-* registers may still be out of date.  Update them with
-* an XRSTOR if they are active.
-*/
-   if (fpu->fpstate_active)
-   copy_kernel_to_fpregs(>state);
-
-   /*
-* Our update is done and the fpregs/fpstate are in sync
-* if necessary.  Context switches can happen again.
-*/
-   preempt_enable();
-}
-
-/*
  * 'fpu__restore()' is called to copy FPU registers from
  * the FPU fpstate to the live hw registers and to activate
  * access to the hardware registers, so that FPU instructions


[tip:x86/fpu] x86/fpu: Simplify and speed up fpu__copy()

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  e10078eba69859359ce8644dd423b4132a6a8913
Gitweb: http://git.kernel.org/tip/e10078eba69859359ce8644dd423b4132a6a8913
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:14 +0200
Committer:  Ingo Molnar 
CommitDate: Tue, 26 Sep 2017 09:43:44 +0200

x86/fpu: Simplify and speed up fpu__copy()

fpu__copy() has a preempt_disable()/enable() pair, which it had to do to
be able to atomically unlazy the current task when doing an FNSAVE.

But we don't unlazy tasks anymore, we always do direct saves/restores of
FPU context.

So remove both the unnecessary critical section, and update the comments.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-32-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/core.c | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 77668d9..52122dd 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -206,22 +206,13 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 * Save current FPU registers directly into the child
 * FPU context, without any memory-to-memory copying.
 *
-* We have to do all this with preemption disabled,
-* mostly because of the FNSAVE case, because in that
-* case we must not allow preemption in the window
-* between the FNSAVE and us marking the context lazy.
-*
-* It shouldn't be an issue as even FNSAVE is plenty
-* fast in terms of critical section length.
+* ( The function 'fails' in the FNSAVE case, which destroys
+*   register contents so we have to copy them back. )
 */
-   preempt_disable();
if (!copy_fpregs_to_fpstate(dst_fpu)) {
-   memcpy(_fpu->state, _fpu->state,
-  fpu_kernel_xstate_size);
-
+   memcpy(_fpu->state, _fpu->state, 
fpu_kernel_xstate_size);
copy_kernel_to_fpregs(_fpu->state);
}
-   preempt_enable();
 
trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);


[tip:x86/fpu] x86/fpu: Simplify and speed up fpu__copy()

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  e10078eba69859359ce8644dd423b4132a6a8913
Gitweb: http://git.kernel.org/tip/e10078eba69859359ce8644dd423b4132a6a8913
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:14 +0200
Committer:  Ingo Molnar 
CommitDate: Tue, 26 Sep 2017 09:43:44 +0200

x86/fpu: Simplify and speed up fpu__copy()

fpu__copy() has a preempt_disable()/enable() pair, which it had to do to
be able to atomically unlazy the current task when doing an FNSAVE.

But we don't unlazy tasks anymore, we always do direct saves/restores of
FPU context.

So remove both the unnecessary critical section, and update the comments.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-32-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/core.c | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 77668d9..52122dd 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -206,22 +206,13 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 * Save current FPU registers directly into the child
 * FPU context, without any memory-to-memory copying.
 *
-* We have to do all this with preemption disabled,
-* mostly because of the FNSAVE case, because in that
-* case we must not allow preemption in the window
-* between the FNSAVE and us marking the context lazy.
-*
-* It shouldn't be an issue as even FNSAVE is plenty
-* fast in terms of critical section length.
+* ( The function 'fails' in the FNSAVE case, which destroys
+*   register contents so we have to copy them back. )
 */
-   preempt_disable();
if (!copy_fpregs_to_fpstate(dst_fpu)) {
-   memcpy(_fpu->state, _fpu->state,
-  fpu_kernel_xstate_size);
-
+   memcpy(_fpu->state, _fpu->state, 
fpu_kernel_xstate_size);
copy_kernel_to_fpregs(_fpu->state);
}
-   preempt_enable();
 
trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);


[tip:x86/fpu] x86/fpu: Rename fpu::fpstate_active to fpu::initialized

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  e4a81bfcaae1ebbdc6efe74e8ea563144d90e9a9
Gitweb: http://git.kernel.org/tip/e4a81bfcaae1ebbdc6efe74e8ea563144d90e9a9
Author: Ingo Molnar 
AuthorDate: Tue, 26 Sep 2017 09:43:36 +0200
Committer:  Ingo Molnar 
CommitDate: Tue, 26 Sep 2017 09:43:36 +0200

x86/fpu: Rename fpu::fpstate_active to fpu::initialized

The x86 FPU code used to have a complex state machine where both the FPU
registers and the FPU state context could be 'active' (or inactive)
independently of each other - which enabled features like lazy FPU restore.

Much of this complexity is gone in the current code: now we basically can
have FPU-less tasks (kernel threads) that don't use (and save/restore) FPU
state at all, plus full FPU users that save/restore directly with no laziness
whatsoever.

But the fpu::fpstate_active still carries bits of the old complexity - meanwhile
this flag has become a simple flag that shows whether the FPU context saving
area in the thread struct is initialized and used, or not.

Rename it to fpu::initialized to express this simplicity in the name as well.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-30-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/ia32/ia32_signal.c |  2 +-
 arch/x86/include/asm/fpu/internal.h |  4 ++--
 arch/x86/include/asm/fpu/types.h|  6 +++---
 arch/x86/include/asm/trace/fpu.h|  8 
 arch/x86/kernel/fpu/core.c  | 24 
 arch/x86/kernel/fpu/init.c  |  2 +-
 arch/x86/kernel/fpu/regset.c|  6 +++---
 arch/x86/kernel/fpu/signal.c|  8 
 arch/x86/kernel/fpu/xstate.c|  2 +-
 arch/x86/kernel/signal.c|  6 +++---
 arch/x86/mm/pkeys.c |  2 +-
 11 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index e0bb46c..0e2a5ed 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, 
struct pt_regs *regs,
 ksig->ka.sa.sa_restorer)
sp = (unsigned long) ksig->ka.sa.sa_restorer;
 
-   if (fpu->fpstate_active) {
+   if (fpu->initialized) {
unsigned long fx_aligned, math_size;
 
sp = fpu__alloc_mathframe(sp, 1, _aligned, _size);
diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index 508e418..b26ae05 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -527,7 +527,7 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-   if (old_fpu->fpstate_active) {
+   if (old_fpu->initialized) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
@@ -550,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
 {
bool preload = static_cpu_has(X86_FEATURE_FPU) &&
-  new_fpu->fpstate_active;
+  new_fpu->initialized;
 
if (preload) {
if (!fpregs_state_valid(new_fpu, cpu))
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 71db45c..a152057 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -293,13 +293,13 @@ struct fpu {
unsigned intlast_cpu;
 
/*
-* @fpstate_active:
+* @initialized:
 *
-* This flag indicates whether this context is active: if the task
+* This flag indicates whether this context is initialized: if the task
 * is not running then we can restore from this context, if the task
 * is running then we should save into this context.
 */
-   unsigned char   fpstate_active;
+   unsigned char   initialized;
 
/*
 * @state:
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index da565aa..39f7a27 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -12,22 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
TP_STRUCT__entry(
__field(struct fpu *, fpu)
-   __field(bool, fpstate_active)
+   __field(bool, 

[tip:x86/fpu] x86/fpu: Rename fpu::fpstate_active to fpu::initialized

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  e4a81bfcaae1ebbdc6efe74e8ea563144d90e9a9
Gitweb: http://git.kernel.org/tip/e4a81bfcaae1ebbdc6efe74e8ea563144d90e9a9
Author: Ingo Molnar 
AuthorDate: Tue, 26 Sep 2017 09:43:36 +0200
Committer:  Ingo Molnar 
CommitDate: Tue, 26 Sep 2017 09:43:36 +0200

x86/fpu: Rename fpu::fpstate_active to fpu::initialized

The x86 FPU code used to have a complex state machine where both the FPU
registers and the FPU state context could be 'active' (or inactive)
independently of each other - which enabled features like lazy FPU restore.

Much of this complexity is gone in the current code: now we basically can
have FPU-less tasks (kernel threads) that don't use (and save/restore) FPU
state at all, plus full FPU users that save/restore directly with no laziness
whatsoever.

But the fpu::fpstate_active still carries bits of the old complexity - meanwhile
this flag has become a simple flag that shows whether the FPU context saving
area in the thread struct is initialized and used, or not.

Rename it to fpu::initialized to express this simplicity in the name as well.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-30-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/ia32/ia32_signal.c |  2 +-
 arch/x86/include/asm/fpu/internal.h |  4 ++--
 arch/x86/include/asm/fpu/types.h|  6 +++---
 arch/x86/include/asm/trace/fpu.h|  8 
 arch/x86/kernel/fpu/core.c  | 24 
 arch/x86/kernel/fpu/init.c  |  2 +-
 arch/x86/kernel/fpu/regset.c|  6 +++---
 arch/x86/kernel/fpu/signal.c|  8 
 arch/x86/kernel/fpu/xstate.c|  2 +-
 arch/x86/kernel/signal.c|  6 +++---
 arch/x86/mm/pkeys.c |  2 +-
 11 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index e0bb46c..0e2a5ed 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, 
struct pt_regs *regs,
 ksig->ka.sa.sa_restorer)
sp = (unsigned long) ksig->ka.sa.sa_restorer;
 
-   if (fpu->fpstate_active) {
+   if (fpu->initialized) {
unsigned long fx_aligned, math_size;
 
sp = fpu__alloc_mathframe(sp, 1, _aligned, _size);
diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index 508e418..b26ae05 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -527,7 +527,7 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-   if (old_fpu->fpstate_active) {
+   if (old_fpu->initialized) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
@@ -550,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
 {
bool preload = static_cpu_has(X86_FEATURE_FPU) &&
-  new_fpu->fpstate_active;
+  new_fpu->initialized;
 
if (preload) {
if (!fpregs_state_valid(new_fpu, cpu))
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 71db45c..a152057 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -293,13 +293,13 @@ struct fpu {
unsigned intlast_cpu;
 
/*
-* @fpstate_active:
+* @initialized:
 *
-* This flag indicates whether this context is active: if the task
+* This flag indicates whether this context is initialized: if the task
 * is not running then we can restore from this context, if the task
 * is running then we should save into this context.
 */
-   unsigned char   fpstate_active;
+   unsigned char   initialized;
 
/*
 * @state:
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index da565aa..39f7a27 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -12,22 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
TP_STRUCT__entry(
__field(struct fpu *, fpu)
-   __field(bool, fpstate_active)
+   __field(bool, initialized)
__field(u64, xfeatures)
__field(u64, xcomp_bv)
),
 
TP_fast_assign(
__entry->fpu= fpu;
-   __entry->fpstate_active = fpu->fpstate_active;
+   __entry->initialized= fpu->initialized;
if 

[tip:x86/fpu] x86/fpu: Rename fpu__activate_curr() to fpu__initialize()

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  2ce03d850b9a2f17d55596ecfa86e72b5687a627
Gitweb: http://git.kernel.org/tip/2ce03d850b9a2f17d55596ecfa86e72b5687a627
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:15 +0200
Committer:  Ingo Molnar 
CommitDate: Tue, 26 Sep 2017 09:43:44 +0200

x86/fpu: Rename fpu__activate_curr() to fpu__initialize()

Rename this function to better express that it's all about
initializing the FPU state of a task which goes hand in hand
with the fpu::initialized field.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-33-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/internal.h | 2 +-
 arch/x86/kernel/fpu/core.c  | 8 
 arch/x86/kernel/fpu/signal.c| 2 +-
 arch/x86/kvm/x86.c  | 2 +-
 arch/x86/math-emu/fpu_entry.c   | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index b26ae05..7c980aa 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -23,7 +23,7 @@
 /*
  * High level FPU state handling functions:
  */
-extern void fpu__activate_curr(struct fpu *fpu);
+extern void fpu__initialize(struct fpu *fpu);
 extern void fpu__activate_fpstate_read(struct fpu *fpu);
 extern void fpu__activate_fpstate_write(struct fpu *fpu);
 extern void fpu__save(struct fpu *fpu);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 52122dd..07db9d9 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -224,7 +224,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
  * Activate the current task's in-memory FPU context,
  * if it has not been used before:
  */
-void fpu__activate_curr(struct fpu *fpu)
+void fpu__initialize(struct fpu *fpu)
 {
WARN_ON_FPU(fpu != >thread.fpu);
 
@@ -237,7 +237,7 @@ void fpu__activate_curr(struct fpu *fpu)
fpu->initialized = 1;
}
 }
-EXPORT_SYMBOL_GPL(fpu__activate_curr);
+EXPORT_SYMBOL_GPL(fpu__initialize);
 
 /*
  * This function must be called before we read a task's fpstate.
@@ -316,7 +316,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
  */
 void fpu__restore(struct fpu *fpu)
 {
-   fpu__activate_curr(fpu);
+   fpu__initialize(fpu);
 
/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
kernel_fpu_disable();
@@ -392,7 +392,7 @@ void fpu__clear(struct fpu *fpu)
 */
if (static_cpu_has(X86_FEATURE_FPU)) {
preempt_disable();
-   fpu__activate_curr(fpu);
+   fpu__initialize(fpu);
user_fpu_begin();
copy_init_fpstate_to_fpregs();
preempt_enable();
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index ab2dd24..7fa3bdb 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -280,7 +280,7 @@ static int __fpu__restore_sig(void __user *buf, void __user 
*buf_fx, int size)
if (!access_ok(VERIFY_READ, buf, size))
return -EACCES;
 
-   fpu__activate_curr(fpu);
+   fpu__initialize(fpu);
 
if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_set(current, NULL,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cd17b7d..03869eb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7225,7 +7225,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
int r;
sigset_t sigsaved;
 
-   fpu__activate_curr(fpu);
+   fpu__initialize(fpu);
 
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, >sigset, );
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index d4a7df2..220638a 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -114,7 +114,7 @@ void math_emulate(struct math_emu_info *info)
struct desc_struct code_descriptor;
struct fpu *fpu = >thread.fpu;
 
-   fpu__activate_curr(fpu);
+   fpu__initialize(fpu);
 
 #ifdef RE_ENTRANT_CHECKING
if (emulating) {


[tip:x86/fpu] x86/fpu: Rename fpu__activate_curr() to fpu__initialize()

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  2ce03d850b9a2f17d55596ecfa86e72b5687a627
Gitweb: http://git.kernel.org/tip/2ce03d850b9a2f17d55596ecfa86e72b5687a627
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:15 +0200
Committer:  Ingo Molnar 
CommitDate: Tue, 26 Sep 2017 09:43:44 +0200

x86/fpu: Rename fpu__activate_curr() to fpu__initialize()

Rename this function to better express that it's all about
initializing the FPU state of a task which goes hand in hand
with the fpu::initialized field.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-33-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/internal.h | 2 +-
 arch/x86/kernel/fpu/core.c  | 8 
 arch/x86/kernel/fpu/signal.c| 2 +-
 arch/x86/kvm/x86.c  | 2 +-
 arch/x86/math-emu/fpu_entry.c   | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index b26ae05..7c980aa 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -23,7 +23,7 @@
 /*
  * High level FPU state handling functions:
  */
-extern void fpu__activate_curr(struct fpu *fpu);
+extern void fpu__initialize(struct fpu *fpu);
 extern void fpu__activate_fpstate_read(struct fpu *fpu);
 extern void fpu__activate_fpstate_write(struct fpu *fpu);
 extern void fpu__save(struct fpu *fpu);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 52122dd..07db9d9 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -224,7 +224,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
  * Activate the current task's in-memory FPU context,
  * if it has not been used before:
  */
-void fpu__activate_curr(struct fpu *fpu)
+void fpu__initialize(struct fpu *fpu)
 {
WARN_ON_FPU(fpu != >thread.fpu);
 
@@ -237,7 +237,7 @@ void fpu__activate_curr(struct fpu *fpu)
fpu->initialized = 1;
}
 }
-EXPORT_SYMBOL_GPL(fpu__activate_curr);
+EXPORT_SYMBOL_GPL(fpu__initialize);
 
 /*
  * This function must be called before we read a task's fpstate.
@@ -316,7 +316,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
  */
 void fpu__restore(struct fpu *fpu)
 {
-   fpu__activate_curr(fpu);
+   fpu__initialize(fpu);
 
/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
kernel_fpu_disable();
@@ -392,7 +392,7 @@ void fpu__clear(struct fpu *fpu)
 */
if (static_cpu_has(X86_FEATURE_FPU)) {
preempt_disable();
-   fpu__activate_curr(fpu);
+   fpu__initialize(fpu);
user_fpu_begin();
copy_init_fpstate_to_fpregs();
preempt_enable();
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index ab2dd24..7fa3bdb 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -280,7 +280,7 @@ static int __fpu__restore_sig(void __user *buf, void __user 
*buf_fx, int size)
if (!access_ok(VERIFY_READ, buf, size))
return -EACCES;
 
-   fpu__activate_curr(fpu);
+   fpu__initialize(fpu);
 
if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_set(current, NULL,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cd17b7d..03869eb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7225,7 +7225,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
int r;
sigset_t sigsaved;
 
-   fpu__activate_curr(fpu);
+   fpu__initialize(fpu);
 
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, >sigset, );
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index d4a7df2..220638a 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -114,7 +114,7 @@ void math_emulate(struct math_emu_info *info)
struct desc_struct code_descriptor;
struct fpu *fpu = >thread.fpu;
 
-   fpu__activate_curr(fpu);
+   fpu__initialize(fpu);
 
 #ifdef RE_ENTRANT_CHECKING
if (emulating) {


[tip:x86/fpu] x86/fpu: Fix stale comments about lazy FPU logic

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  7f1487c59b7c6dcb20155f4302985da2659a2997
Gitweb: http://git.kernel.org/tip/7f1487c59b7c6dcb20155f4302985da2659a2997
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:13 +0200
Committer:  Ingo Molnar 
CommitDate: Tue, 26 Sep 2017 09:43:43 +0200

x86/fpu: Fix stale comments about lazy FPU logic

We don't do any lazy restore anymore, what we have are two pieces of 
optimization:

 - no-FPU tasks that don't save/restore the FPU context (kernel threads are 
such)

 - cached FPU registers maintained via the fpu->last_cpu field. This means that
   if an FPU task context switches to a non-FPU task then we can maintain the
   FPU registers as an in-FPU copies (cache), and skip the restoration of them
   once we switch back to the original FPU-using task.

Update all the comments that still referred to old 'lazy' and 'unlazy' concepts.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-31-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/core.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index c8d6032..77668d9 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -205,9 +205,6 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
/*
 * Save current FPU registers directly into the child
 * FPU context, without any memory-to-memory copying.
-* In lazy mode, if the FPU context isn't loaded into
-* fpregs, CR0.TS will be set and do_device_not_available
-* will load the FPU context.
 *
 * We have to do all this with preemption disabled,
 * mostly because of the FNSAVE case, because in that
@@ -285,13 +282,13 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
 /*
  * This function must be called before we write a task's fpstate.
  *
- * If the task has used the FPU before then unlazy it.
+ * If the task has used the FPU before then invalidate any cached FPU 
registers.
  * If the task has not used the FPU before then initialize its fpstate.
  *
  * After this function call, after registers in the fpstate are
  * modified and the child task has woken up, the child task will
  * restore the modified FPU state from the modified context. If we
- * didn't clear its lazy status here then the lazy in-registers
+ * didn't clear its cached status here then the cached in-registers
  * state pending on its former CPU could be restored, corrupting
  * the modifications.
  */
@@ -304,7 +301,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
WARN_ON_FPU(fpu == >thread.fpu);
 
if (fpu->initialized) {
-   /* Invalidate any lazy state: */
+   /* Invalidate any cached state: */
__fpu_invalidate_fpregs_state(fpu);
} else {
fpstate_init(>state);


[tip:x86/fpu] x86/fpu: Fix stale comments about lazy FPU logic

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  7f1487c59b7c6dcb20155f4302985da2659a2997
Gitweb: http://git.kernel.org/tip/7f1487c59b7c6dcb20155f4302985da2659a2997
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:13 +0200
Committer:  Ingo Molnar 
CommitDate: Tue, 26 Sep 2017 09:43:43 +0200

x86/fpu: Fix stale comments about lazy FPU logic

We don't do any lazy restore anymore, what we have are two pieces of 
optimization:

 - no-FPU tasks that don't save/restore the FPU context (kernel threads are 
such)

 - cached FPU registers maintained via the fpu->last_cpu field. This means that
   if an FPU task context switches to a non-FPU task then we can maintain the
   FPU registers as an in-FPU copies (cache), and skip the restoration of them
   once we switch back to the original FPU-using task.

Update all the comments that still referred to old 'lazy' and 'unlazy' concepts.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-31-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/core.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index c8d6032..77668d9 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -205,9 +205,6 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
/*
 * Save current FPU registers directly into the child
 * FPU context, without any memory-to-memory copying.
-* In lazy mode, if the FPU context isn't loaded into
-* fpregs, CR0.TS will be set and do_device_not_available
-* will load the FPU context.
 *
 * We have to do all this with preemption disabled,
 * mostly because of the FNSAVE case, because in that
@@ -285,13 +282,13 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
 /*
  * This function must be called before we write a task's fpstate.
  *
- * If the task has used the FPU before then unlazy it.
+ * If the task has used the FPU before then invalidate any cached FPU 
registers.
  * If the task has not used the FPU before then initialize its fpstate.
  *
  * After this function call, after registers in the fpstate are
  * modified and the child task has woken up, the child task will
  * restore the modified FPU state from the modified context. If we
- * didn't clear its lazy status here then the lazy in-registers
+ * didn't clear its cached status here then the cached in-registers
  * state pending on its former CPU could be restored, corrupting
  * the modifications.
  */
@@ -304,7 +301,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
WARN_ON_FPU(fpu == >thread.fpu);
 
if (fpu->initialized) {
-   /* Invalidate any lazy state: */
+   /* Invalidate any cached state: */
__fpu_invalidate_fpregs_state(fpu);
} else {
fpstate_init(>state);


[tip:x86/fpu] x86/fpu: Fix fpu__activate_fpstate_read() and update comments

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  4618e90965f272fe522f2af2523a60d0d4bc78f3
Gitweb: http://git.kernel.org/tip/4618e90965f272fe522f2af2523a60d0d4bc78f3
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:10 +0200
Committer:  Ingo Molnar 
CommitDate: Tue, 26 Sep 2017 09:41:09 +0200

x86/fpu: Fix fpu__activate_fpstate_read() and update comments

fpu__activate_fpstate_read() can be called for the current task
when coredumping - or for stopped tasks when ptrace-ing.

Implement this properly in the code and update the comments.

This also fixes an incorrect (but harmless) warning introduced by
one of the earlier patches.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-28-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/core.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 93103a9..afd3f2a 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -254,18 +254,21 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr);
 /*
  * This function must be called before we read a task's fpstate.
  *
- * If the task has not used the FPU before then initialize its
- * fpstate.
+ * There's two cases where this gets called:
+ *
+ * - for the current task (when coredumping), in which case we have
+ *   to save the latest FPU registers into the fpstate,
+ *
+ * - or it's called for stopped tasks (ptrace), in which case the
+ *   registers were already saved by the context-switch code when
+ *   the task scheduled out - we only have to initialize the registers
+ *   if they've never been initialized.
  *
  * If the task has used the FPU before then save it.
  */
 void fpu__activate_fpstate_read(struct fpu *fpu)
 {
-   /*
-* If fpregs are active (in the current CPU), then
-* copy them to the fpstate:
-*/
-   if (fpu->fpstate_active) {
+   if (fpu == >thread.fpu) {
fpu__save(fpu);
} else {
if (!fpu->fpstate_active) {


[tip:x86/fpu] x86/fpu: Fix fpu__activate_fpstate_read() and update comments

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  4618e90965f272fe522f2af2523a60d0d4bc78f3
Gitweb: http://git.kernel.org/tip/4618e90965f272fe522f2af2523a60d0d4bc78f3
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:10 +0200
Committer:  Ingo Molnar 
CommitDate: Tue, 26 Sep 2017 09:41:09 +0200

x86/fpu: Fix fpu__activate_fpstate_read() and update comments

fpu__activate_fpstate_read() can be called for the current task
when coredumping - or for stopped tasks when ptrace-ing.

Implement this properly in the code and update the comments.

This also fixes an incorrect (but harmless) warning introduced by
one of the earlier patches.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-28-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/core.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 93103a9..afd3f2a 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -254,18 +254,21 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr);
 /*
  * This function must be called before we read a task's fpstate.
  *
- * If the task has not used the FPU before then initialize its
- * fpstate.
+ * There's two cases where this gets called:
+ *
+ * - for the current task (when coredumping), in which case we have
+ *   to save the latest FPU registers into the fpstate,
+ *
+ * - or it's called for stopped tasks (ptrace), in which case the
+ *   registers were already saved by the context-switch code when
+ *   the task scheduled out - we only have to initialize the registers
+ *   if they've never been initialized.
  *
  * If the task has used the FPU before then save it.
  */
 void fpu__activate_fpstate_read(struct fpu *fpu)
 {
-   /*
-* If fpregs are active (in the current CPU), then
-* copy them to the fpstate:
-*/
-   if (fpu->fpstate_active) {
+   if (fpu == >thread.fpu) {
fpu__save(fpu);
} else {
if (!fpu->fpstate_active) {


[tip:x86/fpu] x86/fpu: Remove struct fpu::fpregs_active

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  99dc26bda233ee722bbd370bddf20beece3ffb93
Gitweb: http://git.kernel.org/tip/99dc26bda233ee722bbd370bddf20beece3ffb93
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:03 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:34 +0200

x86/fpu: Remove struct fpu::fpregs_active

The previous changes paved the way for the removal of the
fpu::fpregs_active state flag - we now only have the
fpu::fpstate_active and fpu::last_cpu fields left.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-21-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/internal.h |  5 -
 arch/x86/include/asm/fpu/types.h| 23 ---
 arch/x86/include/asm/trace/fpu.h|  5 +
 arch/x86/kernel/fpu/core.c  |  9 -
 arch/x86/kernel/fpu/signal.c|  2 --
 5 files changed, 1 insertion(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index 42a6016..629e7abc 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -526,14 +526,12 @@ static inline int fpregs_state_valid(struct fpu *fpu, 
unsigned int cpu)
  */
 static inline void fpregs_deactivate(struct fpu *fpu)
 {
-   fpu->fpregs_active = 0;
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
trace_x86_fpu_regs_deactivated(fpu);
 }
 
 static inline void fpregs_activate(struct fpu *fpu)
 {
-   fpu->fpregs_active = 1;
this_cpu_write(fpu_fpregs_owner_ctx, fpu);
trace_x86_fpu_regs_activated(fpu);
 }
@@ -552,8 +550,6 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-   WARN_ON_FPU(old_fpu->fpregs_active != old_fpu->fpstate_active);
-
if (old_fpu->fpstate_active) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
@@ -561,7 +557,6 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
old_fpu->last_cpu = cpu;
 
/* But leave fpu_fpregs_owner_ctx! */
-   old_fpu->fpregs_active = 0;
trace_x86_fpu_regs_deactivated(old_fpu);
} else
old_fpu->last_cpu = -1;
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 3c80f5b..0c314a3 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -299,29 +299,6 @@ struct fpu {
unsigned char   fpstate_active;
 
/*
-* @fpregs_active:
-*
-* This flag determines whether a given context is actively
-* loaded into the FPU's registers and that those registers
-* represent the task's current FPU state.
-*
-* Note the interaction with fpstate_active:
-*
-*   # task does not use the FPU:
-*   fpstate_active == 0
-*
-*   # task uses the FPU and regs are active:
-*   fpstate_active == 1 && fpregs_active == 1
-*
-*   # the regs are inactive but still match fpstate:
-*   fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
-*
-* The third state is what we use for the lazy restore optimization
-* on lazy-switching CPUs.
-*/
-   unsigned char   fpregs_active;
-
-   /*
 * @state:
 *
 * In-memory copy of all FPU registers that we save/restore
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 342e597..da565aa 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -12,7 +12,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
TP_STRUCT__entry(
__field(struct fpu *, fpu)
-   __field(bool, fpregs_active)
__field(bool, fpstate_active)
__field(u64, xfeatures)
__field(u64, xcomp_bv)
@@ -20,16 +19,14 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
TP_fast_assign(
__entry->fpu= fpu;
-   __entry->fpregs_active  = fpu->fpregs_active;
__entry->fpstate_active = fpu->fpstate_active;
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
__entry->xfeatures = fpu->state.xsave.header.xfeatures;
__entry->xcomp_bv  = 

[tip:x86/fpu] x86/fpu: Remove struct fpu::fpregs_active

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  99dc26bda233ee722bbd370bddf20beece3ffb93
Gitweb: http://git.kernel.org/tip/99dc26bda233ee722bbd370bddf20beece3ffb93
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:03 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:34 +0200

x86/fpu: Remove struct fpu::fpregs_active

The previous changes paved the way for the removal of the
fpu::fpregs_active state flag - we now only have the
fpu::fpstate_active and fpu::last_cpu fields left.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-21-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/internal.h |  5 -
 arch/x86/include/asm/fpu/types.h| 23 ---
 arch/x86/include/asm/trace/fpu.h|  5 +
 arch/x86/kernel/fpu/core.c  |  9 -
 arch/x86/kernel/fpu/signal.c|  2 --
 5 files changed, 1 insertion(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index 42a6016..629e7abc 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -526,14 +526,12 @@ static inline int fpregs_state_valid(struct fpu *fpu, 
unsigned int cpu)
  */
 static inline void fpregs_deactivate(struct fpu *fpu)
 {
-   fpu->fpregs_active = 0;
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
trace_x86_fpu_regs_deactivated(fpu);
 }
 
 static inline void fpregs_activate(struct fpu *fpu)
 {
-   fpu->fpregs_active = 1;
this_cpu_write(fpu_fpregs_owner_ctx, fpu);
trace_x86_fpu_regs_activated(fpu);
 }
@@ -552,8 +550,6 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-   WARN_ON_FPU(old_fpu->fpregs_active != old_fpu->fpstate_active);
-
if (old_fpu->fpstate_active) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
@@ -561,7 +557,6 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
old_fpu->last_cpu = cpu;
 
/* But leave fpu_fpregs_owner_ctx! */
-   old_fpu->fpregs_active = 0;
trace_x86_fpu_regs_deactivated(old_fpu);
} else
old_fpu->last_cpu = -1;
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 3c80f5b..0c314a3 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -299,29 +299,6 @@ struct fpu {
unsigned char   fpstate_active;
 
/*
-* @fpregs_active:
-*
-* This flag determines whether a given context is actively
-* loaded into the FPU's registers and that those registers
-* represent the task's current FPU state.
-*
-* Note the interaction with fpstate_active:
-*
-*   # task does not use the FPU:
-*   fpstate_active == 0
-*
-*   # task uses the FPU and regs are active:
-*   fpstate_active == 1 && fpregs_active == 1
-*
-*   # the regs are inactive but still match fpstate:
-*   fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
-*
-* The third state is what we use for the lazy restore optimization
-* on lazy-switching CPUs.
-*/
-   unsigned char   fpregs_active;
-
-   /*
 * @state:
 *
 * In-memory copy of all FPU registers that we save/restore
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 342e597..da565aa 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -12,7 +12,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
TP_STRUCT__entry(
__field(struct fpu *, fpu)
-   __field(bool, fpregs_active)
__field(bool, fpstate_active)
__field(u64, xfeatures)
__field(u64, xcomp_bv)
@@ -20,16 +19,14 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
TP_fast_assign(
__entry->fpu= fpu;
-   __entry->fpregs_active  = fpu->fpregs_active;
__entry->fpstate_active = fpu->fpstate_active;
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
__entry->xfeatures = fpu->state.xsave.header.xfeatures;
__entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
}
),
-   TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: 
%llx xcomp_bv: %llx",
+   TP_printk("x86/fpu: %p fpstate_active: %d xfeatures: %llx xcomp_bv: 
%llx",
__entry->fpu,
-   __entry->fpregs_active,
   

[tip:x86/fpu] x86/fpu: Decouple fpregs_activate()/fpregs_deactivate() from fpu->fpregs_active

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  6cf4edbe0526db311a28734609da888fdfcb3604
Gitweb: http://git.kernel.org/tip/6cf4edbe0526db311a28734609da888fdfcb3604
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:02 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:34 +0200

x86/fpu: Decouple fpregs_activate()/fpregs_deactivate() from fpu->fpregs_active

The fpregs_activate()/fpregs_deactivate() are currently called in such a 
pattern:

if (!fpu->fpregs_active)
fpregs_activate(fpu);

...

if (fpu->fpregs_active)
fpregs_deactivate(fpu);

But note that it's actually safe to call them without checking the flag first.

This further decouples the fpu->fpregs_active flag from actual FPU logic.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-20-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/internal.h | 7 +--
 arch/x86/kernel/fpu/core.c  | 3 +--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index 7fa676f..42a6016 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -526,8 +526,6 @@ static inline int fpregs_state_valid(struct fpu *fpu, 
unsigned int cpu)
  */
 static inline void fpregs_deactivate(struct fpu *fpu)
 {
-   WARN_ON_FPU(!fpu->fpregs_active);
-
fpu->fpregs_active = 0;
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
trace_x86_fpu_regs_deactivated(fpu);
@@ -535,8 +533,6 @@ static inline void fpregs_deactivate(struct fpu *fpu)
 
 static inline void fpregs_activate(struct fpu *fpu)
 {
-   WARN_ON_FPU(fpu->fpregs_active);
-
fpu->fpregs_active = 1;
this_cpu_write(fpu_fpregs_owner_ctx, fpu);
trace_x86_fpu_regs_activated(fpu);
@@ -604,8 +600,7 @@ static inline void user_fpu_begin(void)
struct fpu *fpu = >thread.fpu;
 
preempt_disable();
-   if (!fpu->fpregs_active)
-   fpregs_activate(fpu);
+   fpregs_activate(fpu);
preempt_enable();
 }
 
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index eab2446..01a47e9 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -426,8 +426,7 @@ void fpu__drop(struct fpu *fpu)
asm volatile("1: fwait\n"
 "2:\n"
 _ASM_EXTABLE(1b, 2b));
-   if (fpu->fpregs_active)
-   fpregs_deactivate(fpu);
+   fpregs_deactivate(fpu);
}
} else {
WARN_ON_FPU(fpu->fpregs_active);


[tip:x86/fpu] x86/fpu: Decouple fpregs_activate()/fpregs_deactivate() from fpu->fpregs_active

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  6cf4edbe0526db311a28734609da888fdfcb3604
Gitweb: http://git.kernel.org/tip/6cf4edbe0526db311a28734609da888fdfcb3604
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:02 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:34 +0200

x86/fpu: Decouple fpregs_activate()/fpregs_deactivate() from fpu->fpregs_active

The fpregs_activate()/fpregs_deactivate() are currently called in such a 
pattern:

if (!fpu->fpregs_active)
fpregs_activate(fpu);

...

if (fpu->fpregs_active)
fpregs_deactivate(fpu);

But note that it's actually safe to call them without checking the flag first.

This further decouples the fpu->fpregs_active flag from actual FPU logic.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-20-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/internal.h | 7 +--
 arch/x86/kernel/fpu/core.c  | 3 +--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index 7fa676f..42a6016 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -526,8 +526,6 @@ static inline int fpregs_state_valid(struct fpu *fpu, 
unsigned int cpu)
  */
 static inline void fpregs_deactivate(struct fpu *fpu)
 {
-   WARN_ON_FPU(!fpu->fpregs_active);
-
fpu->fpregs_active = 0;
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
trace_x86_fpu_regs_deactivated(fpu);
@@ -535,8 +533,6 @@ static inline void fpregs_deactivate(struct fpu *fpu)
 
 static inline void fpregs_activate(struct fpu *fpu)
 {
-   WARN_ON_FPU(fpu->fpregs_active);
-
fpu->fpregs_active = 1;
this_cpu_write(fpu_fpregs_owner_ctx, fpu);
trace_x86_fpu_regs_activated(fpu);
@@ -604,8 +600,7 @@ static inline void user_fpu_begin(void)
struct fpu *fpu = >thread.fpu;
 
preempt_disable();
-   if (!fpu->fpregs_active)
-   fpregs_activate(fpu);
+   fpregs_activate(fpu);
preempt_enable();
 }
 
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index eab2446..01a47e9 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -426,8 +426,7 @@ void fpu__drop(struct fpu *fpu)
asm volatile("1: fwait\n"
 "2:\n"
 _ASM_EXTABLE(1b, 2b));
-   if (fpu->fpregs_active)
-   fpregs_deactivate(fpu);
+   fpregs_deactivate(fpu);
}
} else {
WARN_ON_FPU(fpu->fpregs_active);


[tip:x86/fpu] x86/fpu: Split the state handling in fpu__drop()

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  b6aa85558d7e7b18fc3470d2bc1731d2205dd275
Gitweb: http://git.kernel.org/tip/b6aa85558d7e7b18fc3470d2bc1731d2205dd275
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:00 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:34 +0200

x86/fpu: Split the state handling in fpu__drop()

Prepare fpu__drop() to use fpu->fpregs_active.

There are two distinct usecases for fpu__drop() in this context:
exit_thread() when called for 'current' in exit(), and when called
for another task in fork().

This patch does not change behavior, it only adds a couple of
debug checks and structures the code to make the ->fpregs_active
change more obviously correct.

All the complications will be removed later on.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-18-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/core.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index b7dc383..815dfba 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -414,12 +414,19 @@ void fpu__drop(struct fpu *fpu)
 {
preempt_disable();
 
-   if (fpu->fpregs_active) {
-   /* Ignore delayed exceptions from user space */
-   asm volatile("1: fwait\n"
-"2:\n"
-_ASM_EXTABLE(1b, 2b));
-   fpregs_deactivate(fpu);
+   if (fpu == >thread.fpu) {
+   WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
+
+   if (fpu->fpregs_active) {
+   /* Ignore delayed exceptions from user space */
+   asm volatile("1: fwait\n"
+"2:\n"
+_ASM_EXTABLE(1b, 2b));
+   if (fpu->fpregs_active)
+   fpregs_deactivate(fpu);
+   }
+   } else {
+   WARN_ON_FPU(fpu->fpregs_active);
}
 
fpu->fpstate_active = 0;


[tip:x86/fpu] x86/fpu: Change fpu->fpregs_active users to fpu->fpstate_active

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  f1c8cd0176078c7bcafdc89cac447cab672a0b5e
Gitweb: http://git.kernel.org/tip/f1c8cd0176078c7bcafdc89cac447cab672a0b5e
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:01 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:34 +0200

x86/fpu: Change fpu->fpregs_active users to fpu->fpstate_active

We want to simplify the FPU state machine by eliminating fpu->fpregs_active,
and we can do that because the two state flags (::fpregs_active and
::fpstate_active) are set essentially together.

The old lazy FPU switching code used to make a distinction - but there's
no lazy switching code anymore, we always switch in an 'eager' fashion.

Do this by first changing all substantial uses of fpu->fpregs_active
to fpu->fpstate_active and adding a few debug checks to double check
our assumption is correct.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-19-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/internal.h |  4 +++-
 arch/x86/kernel/fpu/core.c  | 16 ++--
 arch/x86/kernel/fpu/signal.c|  4 +++-
 arch/x86/mm/pkeys.c |  3 +--
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index b223c57..7fa676f 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -556,7 +556,9 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-   if (old_fpu->fpregs_active) {
+   WARN_ON_FPU(old_fpu->fpregs_active != old_fpu->fpstate_active);
+
+   if (old_fpu->fpstate_active) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 815dfba..eab2446 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
 
kernel_fpu_disable();
 
-   if (fpu->fpregs_active) {
+   if (fpu->fpstate_active) {
/*
 * Ignore return value -- we don't care if reg state
 * is clobbered.
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
 {
struct fpu *fpu = >thread.fpu;
 
-   if (fpu->fpregs_active)
+   if (fpu->fpstate_active)
copy_kernel_to_fpregs(>state);
 
kernel_fpu_enable();
@@ -147,8 +147,10 @@ void fpu__save(struct fpu *fpu)
WARN_ON_FPU(fpu != >thread.fpu);
 
preempt_disable();
+   WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
+
trace_x86_fpu_before_save(fpu);
-   if (fpu->fpregs_active) {
+   if (fpu->fpstate_active) {
if (!copy_fpregs_to_fpstate(fpu)) {
copy_kernel_to_fpregs(>state);
}
@@ -262,11 +264,12 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr);
  */
 void fpu__activate_fpstate_read(struct fpu *fpu)
 {
+   WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
/*
 * If fpregs are active (in the current CPU), then
 * copy them to the fpstate:
 */
-   if (fpu->fpregs_active) {
+   if (fpu->fpstate_active) {
fpu__save(fpu);
} else {
if (!fpu->fpstate_active) {
@@ -362,12 +365,13 @@ void fpu__current_fpstate_write_end(void)
 {
struct fpu *fpu = >thread.fpu;
 
+   WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
/*
 * 'fpu' now has an updated copy of the state, but the
 * registers may still be out of date.  Update them with
 * an XRSTOR if they are active.
 */
-   if (fpu->fpregs_active)
+   if (fpu->fpstate_active)
copy_kernel_to_fpregs(>state);
 
/*
@@ -417,7 +421,7 @@ void fpu__drop(struct fpu *fpu)
if (fpu == >thread.fpu) {
WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
 
-   if (fpu->fpregs_active) {
+   if (fpu->fpstate_active) {
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
 "2:\n"
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 6840256..a88083b 100644
--- 

[tip:x86/fpu] x86/fpu: Split the state handling in fpu__drop()

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  b6aa85558d7e7b18fc3470d2bc1731d2205dd275
Gitweb: http://git.kernel.org/tip/b6aa85558d7e7b18fc3470d2bc1731d2205dd275
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:00 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:34 +0200

x86/fpu: Split the state handling in fpu__drop()

Prepare fpu__drop() to use fpu->fpregs_active.

There are two distinct usecases for fpu__drop() in this context:
exit_thread() when called for 'current' in exit(), and when called
for another task in fork().

This patch does not change behavior, it only adds a couple of
debug checks and structures the code to make the ->fpregs_active
change more obviously correct.

All the complications will be removed later on.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-18-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/core.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index b7dc383..815dfba 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -414,12 +414,19 @@ void fpu__drop(struct fpu *fpu)
 {
preempt_disable();
 
-   if (fpu->fpregs_active) {
-   /* Ignore delayed exceptions from user space */
-   asm volatile("1: fwait\n"
-"2:\n"
-_ASM_EXTABLE(1b, 2b));
-   fpregs_deactivate(fpu);
+   if (fpu == >thread.fpu) {
+   WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
+
+   if (fpu->fpregs_active) {
+   /* Ignore delayed exceptions from user space */
+   asm volatile("1: fwait\n"
+"2:\n"
+_ASM_EXTABLE(1b, 2b));
+   if (fpu->fpregs_active)
+   fpregs_deactivate(fpu);
+   }
+   } else {
+   WARN_ON_FPU(fpu->fpregs_active);
}
 
fpu->fpstate_active = 0;


[tip:x86/fpu] x86/fpu: Change fpu->fpregs_active users to fpu->fpstate_active

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  f1c8cd0176078c7bcafdc89cac447cab672a0b5e
Gitweb: http://git.kernel.org/tip/f1c8cd0176078c7bcafdc89cac447cab672a0b5e
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 15:00:01 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:34 +0200

x86/fpu: Change fpu->fpregs_active users to fpu->fpstate_active

We want to simplify the FPU state machine by eliminating fpu->fpregs_active,
and we can do that because the two state flags (::fpregs_active and
::fpstate_active) are set essentially together.

The old lazy FPU switching code used to make a distinction - but there's
no lazy switching code anymore, we always switch in an 'eager' fashion.

Do this by first changing all substantial uses of fpu->fpregs_active
to fpu->fpstate_active and adding a few debug checks to double check
our assumption is correct.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-19-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/internal.h |  4 +++-
 arch/x86/kernel/fpu/core.c  | 16 ++--
 arch/x86/kernel/fpu/signal.c|  4 +++-
 arch/x86/mm/pkeys.c |  3 +--
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index b223c57..7fa676f 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -556,7 +556,9 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-   if (old_fpu->fpregs_active) {
+   WARN_ON_FPU(old_fpu->fpregs_active != old_fpu->fpstate_active);
+
+   if (old_fpu->fpstate_active) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 815dfba..eab2446 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
 
kernel_fpu_disable();
 
-   if (fpu->fpregs_active) {
+   if (fpu->fpstate_active) {
/*
 * Ignore return value -- we don't care if reg state
 * is clobbered.
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
 {
struct fpu *fpu = >thread.fpu;
 
-   if (fpu->fpregs_active)
+   if (fpu->fpstate_active)
copy_kernel_to_fpregs(>state);
 
kernel_fpu_enable();
@@ -147,8 +147,10 @@ void fpu__save(struct fpu *fpu)
WARN_ON_FPU(fpu != >thread.fpu);
 
preempt_disable();
+   WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
+
trace_x86_fpu_before_save(fpu);
-   if (fpu->fpregs_active) {
+   if (fpu->fpstate_active) {
if (!copy_fpregs_to_fpstate(fpu)) {
copy_kernel_to_fpregs(>state);
}
@@ -262,11 +264,12 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr);
  */
 void fpu__activate_fpstate_read(struct fpu *fpu)
 {
+   WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
/*
 * If fpregs are active (in the current CPU), then
 * copy them to the fpstate:
 */
-   if (fpu->fpregs_active) {
+   if (fpu->fpstate_active) {
fpu__save(fpu);
} else {
if (!fpu->fpstate_active) {
@@ -362,12 +365,13 @@ void fpu__current_fpstate_write_end(void)
 {
struct fpu *fpu = >thread.fpu;
 
+   WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
/*
 * 'fpu' now has an updated copy of the state, but the
 * registers may still be out of date.  Update them with
 * an XRSTOR if they are active.
 */
-   if (fpu->fpregs_active)
+   if (fpu->fpstate_active)
copy_kernel_to_fpregs(>state);
 
/*
@@ -417,7 +421,7 @@ void fpu__drop(struct fpu *fpu)
if (fpu == >thread.fpu) {
WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
 
-   if (fpu->fpregs_active) {
+   if (fpu->fpstate_active) {
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
 "2:\n"
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 6840256..a88083b 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -171,7 +171,9 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user 
*buf_fx, int size)
sizeof(struct user_i387_ia32_struct), NULL,
(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-   if (fpu->fpregs_active || using_compacted_format()) 

[tip:x86/fpu] x86/fpu: Make the fpu state change in fpu__clear() scheduler-atomic

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  a10b6a16cdad88170f546d008c77453cddf918e6
Gitweb: http://git.kernel.org/tip/a10b6a16cdad88170f546d008c77453cddf918e6
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 14:59:59 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:33 +0200

x86/fpu: Make the fpu state change in fpu__clear() scheduler-atomic

Do this temporarily only, to make it easier to change the FPU state machine,
in particular this change couples the fpu->fpregs_active and fpu->fpstate_active
states: they are only set/cleared together (as far as the scheduler sees them).

This will be removed by later patches.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-17-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index bad5724..b7dc383 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -462,9 +462,11 @@ void fpu__clear(struct fpu *fpu)
 * Make sure fpstate is cleared and initialized.
 */
if (static_cpu_has(X86_FEATURE_FPU)) {
+   preempt_disable();
fpu__activate_curr(fpu);
user_fpu_begin();
copy_init_fpstate_to_fpregs();
+   preempt_enable();
}
 }
 


[tip:x86/fpu] x86/fpu: Simplify fpu->fpregs_active use

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  b3a163081c28d1a4d1ad76259a9d93b34a82f1da
Gitweb: http://git.kernel.org/tip/b3a163081c28d1a4d1ad76259a9d93b34a82f1da
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 14:59:58 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:33 +0200

x86/fpu: Simplify fpu->fpregs_active use

The fpregs_active() inline function is pretty pointless - in almost
all the callsites it can be replaced with a direct fpu->fpregs_active
access.

Do so and eliminate the extra layer of obfuscation.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-16-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/internal.h | 17 +
 arch/x86/kernel/fpu/core.c  |  2 +-
 arch/x86/kernel/fpu/signal.c|  9 +
 arch/x86/mm/pkeys.c |  2 +-
 4 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index 554cdb2..b223c57 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -543,21 +543,6 @@ static inline void fpregs_activate(struct fpu *fpu)
 }
 
 /*
- * The question "does this thread have fpu access?"
- * is slightly racy, since preemption could come in
- * and revoke it immediately after the test.
- *
- * However, even in that very unlikely scenario,
- * we can just assume we have FPU access - typically
- * to save the FP state - we'll just take a #NM
- * fault and get the FPU access back.
- */
-static inline int fpregs_active(void)
-{
-   return current->thread.fpu.fpregs_active;
-}
-
-/*
  * FPU state switching for scheduling.
  *
  * This is a two-stage process:
@@ -617,7 +602,7 @@ static inline void user_fpu_begin(void)
struct fpu *fpu = >thread.fpu;
 
preempt_disable();
-   if (!fpregs_active())
+   if (!fpu->fpregs_active)
fpregs_activate(fpu);
preempt_enable();
 }
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e1114f0..bad5724 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -367,7 +367,7 @@ void fpu__current_fpstate_write_end(void)
 * registers may still be out of date.  Update them with
 * an XRSTOR if they are active.
 */
-   if (fpregs_active())
+   if (fpu->fpregs_active)
copy_kernel_to_fpregs(>state);
 
/*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 2d682da..6840256 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -155,7 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct 
xregs_state __user *buf)
  */
 int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 {
-   struct xregs_state *xsave = >thread.fpu.state.xsave;
+   struct fpu *fpu = >thread.fpu;
+   struct xregs_state *xsave = >state.xsave;
struct task_struct *tsk = current;
int ia32_fxstate = (buf != buf_fx);
 
@@ -170,13 +171,13 @@ int copy_fpstate_to_sigframe(void __user *buf, void 
__user *buf_fx, int size)
sizeof(struct user_i387_ia32_struct), NULL,
(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-   if (fpregs_active() || using_compacted_format()) {
+   if (fpu->fpregs_active || using_compacted_format()) {
/* Save the live register state to the user directly. */
if (copy_fpregs_to_sigframe(buf_fx))
return -1;
/* Update the thread's fxstate to save the fsave header. */
if (ia32_fxstate)
-   copy_fxregs_to_kernel(>thread.fpu);
+   copy_fxregs_to_kernel(fpu);
} else {
/*
 * It is a *bug* if kernel uses compacted-format for xsave
@@ -189,7 +190,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user 
*buf_fx, int size)
return -1;
}
 
-   fpstate_sanitize_xstate(>thread.fpu);
+   fpstate_sanitize_xstate(fpu);
if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
return -1;
}
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 2dab69a..e2c2347 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -45,7 +45,7 @@ int 

[tip:x86/fpu] x86/fpu: Make the fpu state change in fpu__clear() scheduler-atomic

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  a10b6a16cdad88170f546d008c77453cddf918e6
Gitweb: http://git.kernel.org/tip/a10b6a16cdad88170f546d008c77453cddf918e6
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 14:59:59 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:33 +0200

x86/fpu: Make the fpu state change in fpu__clear() scheduler-atomic

Do this temporarily only, to make it easier to change the FPU state machine,
in particular this change couples the fpu->fpregs_active and fpu->fpstate_active
states: they are only set/cleared together (as far as the scheduler sees them).

This will be removed by later patches.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-17-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index bad5724..b7dc383 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -462,9 +462,11 @@ void fpu__clear(struct fpu *fpu)
 * Make sure fpstate is cleared and initialized.
 */
if (static_cpu_has(X86_FEATURE_FPU)) {
+   preempt_disable();
fpu__activate_curr(fpu);
user_fpu_begin();
copy_init_fpstate_to_fpregs();
+   preempt_enable();
}
 }
 


[tip:x86/fpu] x86/fpu: Simplify fpu->fpregs_active use

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  b3a163081c28d1a4d1ad76259a9d93b34a82f1da
Gitweb: http://git.kernel.org/tip/b3a163081c28d1a4d1ad76259a9d93b34a82f1da
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 14:59:58 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:33 +0200

x86/fpu: Simplify fpu->fpregs_active use

The fpregs_active() inline function is pretty pointless - in almost
all the callsites it can be replaced with a direct fpu->fpregs_active
access.

Do so and eliminate the extra layer of obfuscation.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-16-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/internal.h | 17 +
 arch/x86/kernel/fpu/core.c  |  2 +-
 arch/x86/kernel/fpu/signal.c|  9 +
 arch/x86/mm/pkeys.c |  2 +-
 4 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index 554cdb2..b223c57 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -543,21 +543,6 @@ static inline void fpregs_activate(struct fpu *fpu)
 }
 
 /*
- * The question "does this thread have fpu access?"
- * is slightly racy, since preemption could come in
- * and revoke it immediately after the test.
- *
- * However, even in that very unlikely scenario,
- * we can just assume we have FPU access - typically
- * to save the FP state - we'll just take a #NM
- * fault and get the FPU access back.
- */
-static inline int fpregs_active(void)
-{
-   return current->thread.fpu.fpregs_active;
-}
-
-/*
  * FPU state switching for scheduling.
  *
  * This is a two-stage process:
@@ -617,7 +602,7 @@ static inline void user_fpu_begin(void)
struct fpu *fpu = >thread.fpu;
 
preempt_disable();
-   if (!fpregs_active())
+   if (!fpu->fpregs_active)
fpregs_activate(fpu);
preempt_enable();
 }
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e1114f0..bad5724 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -367,7 +367,7 @@ void fpu__current_fpstate_write_end(void)
 * registers may still be out of date.  Update them with
 * an XRSTOR if they are active.
 */
-   if (fpregs_active())
+   if (fpu->fpregs_active)
copy_kernel_to_fpregs(>state);
 
/*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 2d682da..6840256 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -155,7 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct 
xregs_state __user *buf)
  */
 int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 {
-   struct xregs_state *xsave = >thread.fpu.state.xsave;
+   struct fpu *fpu = >thread.fpu;
+   struct xregs_state *xsave = >state.xsave;
struct task_struct *tsk = current;
int ia32_fxstate = (buf != buf_fx);
 
@@ -170,13 +171,13 @@ int copy_fpstate_to_sigframe(void __user *buf, void 
__user *buf_fx, int size)
sizeof(struct user_i387_ia32_struct), NULL,
(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-   if (fpregs_active() || using_compacted_format()) {
+   if (fpu->fpregs_active || using_compacted_format()) {
/* Save the live register state to the user directly. */
if (copy_fpregs_to_sigframe(buf_fx))
return -1;
/* Update the thread's fxstate to save the fsave header. */
if (ia32_fxstate)
-   copy_fxregs_to_kernel(>thread.fpu);
+   copy_fxregs_to_kernel(fpu);
} else {
/*
 * It is a *bug* if kernel uses compacted-format for xsave
@@ -189,7 +190,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user 
*buf_fx, int size)
return -1;
}
 
-   fpstate_sanitize_xstate(>thread.fpu);
+   fpstate_sanitize_xstate(fpu);
if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
return -1;
}
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 2dab69a..e2c2347 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -45,7 +45,7 @@ int __execute_only_pkey(struct mm_struct *mm)
 */
preempt_disable();
if (!need_to_set_mm_pkey &&
-   fpregs_active() &&
+   current->thread.fpu.fpregs_active &&
!__pkru_allows_read(read_pkru(), execute_only_pkey)) {
preempt_enable();
return execute_only_pkey;


[tip:x86/fpu] x86/fpu: Split copy_user_to_xstate() into copy_kernel_to_xstate() & copy_user_to_xstate()

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  79fecc2b7506f29fb91becc65e8788e5ae7eba9f
Gitweb: http://git.kernel.org/tip/79fecc2b7506f29fb91becc65e8788e5ae7eba9f
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 14:59:54 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:32 +0200

x86/fpu: Split copy_user_to_xstate() into copy_kernel_to_xstate() & 
copy_user_to_xstate()

Similar to:

  x86/fpu: Split copy_xstate_to_user() into copy_xstate_to_kernel() & 
copy_xstate_to_user()

No change in functionality.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-12-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/xstate.h |  4 +--
 arch/x86/kernel/fpu/regset.c  | 10 --
 arch/x86/kernel/fpu/xstate.c  | 66 ++-
 3 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h 
b/arch/x86/include/asm/fpu/xstate.h
index fed6617..79af79d 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -50,6 +50,6 @@ const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int 
offset, unsigned int size);
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned 
int offset, unsigned int size);
-int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
-struct xregs_state *xsave);
+int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf, struct 
xregs_state *xsave);
+int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct 
xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index ec14041..cb45dd8 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -134,10 +134,14 @@ int xstateregs_set(struct task_struct *target, const 
struct user_regset *regset,
 
fpu__activate_fpstate_write(fpu);
 
-   if (boot_cpu_has(X86_FEATURE_XSAVES))
-   ret = copy_user_to_xstate(kbuf, ubuf, xsave);
-   else
+   if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+   if (kbuf)
+   ret = copy_kernel_to_xstate(kbuf, ubuf, xsave);
+   else
+   ret = copy_user_to_xstate(kbuf, ubuf, xsave);
+   } else {
ret = user_regset_copyin(, , , , xsave, 0, 
-1);
+   }
 
/*
 * In case of failure, mark all states as init:
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 00c3b41..1ad25d1 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1084,7 +1084,71 @@ int copy_xstate_to_user(void __user *ubuf, struct 
xregs_state *xsave, unsigned i
 }
 
 /*
- * Convert from a ptrace standard-format buffer to kernel XSAVES format
+ * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set() and
+ * there we check the CPU has XSAVES and a whole standard-sized buffer
+ * exists.
+ */
+int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf,
+struct xregs_state *xsave)
+{
+   unsigned int offset, size;
+   int i;
+   u64 xfeatures;
+   u64 allowed_features;
+
+   offset = offsetof(struct xregs_state, header);
+   size = sizeof(xfeatures);
+
+   if (kbuf) {
+   memcpy(, kbuf + offset, size);
+   } else {
+   if (__copy_from_user(, ubuf + offset, size))
+   return -EFAULT;
+   }
+
+   /*
+* Reject if the user sets any disabled or supervisor features:
+*/
+   allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
+
+   if (xfeatures & ~allowed_features)
+   return -EINVAL;
+
+   for (i = 0; i < XFEATURE_MAX; i++) {
+   u64 mask = ((u64)1 << i);
+
+   if (xfeatures & mask) {
+   void *dst = __raw_xsave_addr(xsave, 1 << i);
+
+   offset = xstate_offsets[i];
+   size = xstate_sizes[i];
+
+   if (kbuf) {
+   memcpy(dst, kbuf + offset, size);
+   } else {
+   if (__copy_from_user(dst, ubuf + offset, size))
+

[tip:x86/fpu] x86/fpu: Remove 'kbuf' parameter from the copy_user_to_xstate() API

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  7b9094c688f807c110a2dab6f6edc5876bfa7b0b
Gitweb: http://git.kernel.org/tip/7b9094c688f807c110a2dab6f6edc5876bfa7b0b
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 14:59:56 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:33 +0200

x86/fpu: Remove 'kbuf' parameter from the copy_user_to_xstate() API

No change in functionality.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-14-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/xstate.h |  2 +-
 arch/x86/kernel/fpu/regset.c  |  2 +-
 arch/x86/kernel/fpu/signal.c  | 11 ---
 arch/x86/kernel/fpu/xstate.c  | 19 +--
 4 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h 
b/arch/x86/include/asm/fpu/xstate.h
index f10889b..4ceb907 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -51,5 +51,5 @@ int using_compacted_format(void);
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int 
offset, unsigned int size);
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned 
int offset, unsigned int size);
 int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave);
-int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct 
xregs_state *xsave);
+int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 785302c..caf723f 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -138,7 +138,7 @@ int xstateregs_set(struct task_struct *target, const struct 
user_regset *regset,
if (kbuf)
ret = copy_kernel_to_xstate(kbuf, xsave);
else
-   ret = copy_user_to_xstate(kbuf, ubuf, xsave);
+   ret = copy_user_to_xstate(ubuf, xsave);
} else {
ret = user_regset_copyin(, , , , xsave, 0, 
-1);
}
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index b1fe9a1..2c685b4 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -323,13 +323,10 @@ static int __fpu__restore_sig(void __user *buf, void 
__user *buf_fx, int size)
 */
fpu__drop(fpu);
 
-   if (using_compacted_format()) {
-   err = copy_user_to_xstate(NULL, buf_fx,
-  >state.xsave);
-   } else {
-   err = __copy_from_user(>state.xsave,
-  buf_fx, state_size);
-   }
+   if (using_compacted_format())
+   err = copy_user_to_xstate(buf_fx, >state.xsave);
+   else
+   err = __copy_from_user(>state.xsave, buf_fx, 
state_size);
 
if (err || __copy_from_user(, buf, sizeof(env))) {
fpstate_init(>state);
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 71cc8d3..b1f3e4d 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1142,8 +1142,7 @@ int copy_kernel_to_xstate(const void *kbuf, struct 
xregs_state *xsave)
  * there we check the CPU has XSAVES and a whole standard-sized buffer
  * exists.
  */
-int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
-struct xregs_state *xsave)
+int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave)
 {
unsigned int offset, size;
int i;
@@ -1153,12 +1152,8 @@ int copy_user_to_xstate(const void *kbuf, const void 
__user *ubuf,
offset = offsetof(struct xregs_state, header);
size = sizeof(xfeatures);
 
-   if (kbuf) {
-   memcpy(, kbuf + offset, size);
-   } else {
-   if (__copy_from_user(, ubuf + offset, size))
-   return -EFAULT;
-   }
+   if (__copy_from_user(, ubuf + offset, size))
+   return -EFAULT;
 
/*
 * Reject if the user sets any disabled or supervisor features:
@@ -1177,12 +1172,8 @@ int copy_user_to_xstate(const void *kbuf, const void 
__user *ubuf,
offset = xstate_offsets[i];
size = xstate_sizes[i];

[tip:x86/fpu] x86/fpu: Split copy_user_to_xstate() into copy_kernel_to_xstate() & copy_user_to_xstate()

2017-09-26 Thread tip-bot for Ingo Molnar
Commit-ID:  79fecc2b7506f29fb91becc65e8788e5ae7eba9f
Gitweb: http://git.kernel.org/tip/79fecc2b7506f29fb91becc65e8788e5ae7eba9f
Author: Ingo Molnar 
AuthorDate: Sat, 23 Sep 2017 14:59:54 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 24 Sep 2017 13:04:32 +0200

x86/fpu: Split copy_user_to_xstate() into copy_kernel_to_xstate() & 
copy_user_to_xstate()

Similar to:

  x86/fpu: Split copy_xstate_to_user() into copy_xstate_to_kernel() & 
copy_xstate_to_user()

No change in functionality.

Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: Eric Biggers 
Cc: Fenghua Yu 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Cc: Yu-cheng Yu 
Link: http://lkml.kernel.org/r/20170923130016.21448-12-mi...@kernel.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/xstate.h |  4 +--
 arch/x86/kernel/fpu/regset.c  | 10 --
 arch/x86/kernel/fpu/xstate.c  | 66 ++-
 3 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h 
b/arch/x86/include/asm/fpu/xstate.h
index fed6617..79af79d 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -50,6 +50,6 @@ const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int 
offset, unsigned int size);
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned 
int offset, unsigned int size);
-int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
-struct xregs_state *xsave);
+int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf, struct 
xregs_state *xsave);
+int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct 
xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index ec14041..cb45dd8 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -134,10 +134,14 @@ int xstateregs_set(struct task_struct *target, const 
struct user_regset *regset,
 
fpu__activate_fpstate_write(fpu);
 
-   if (boot_cpu_has(X86_FEATURE_XSAVES))
-   ret = copy_user_to_xstate(kbuf, ubuf, xsave);
-   else
+   if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+   if (kbuf)
+   ret = copy_kernel_to_xstate(kbuf, ubuf, xsave);
+   else
+   ret = copy_user_to_xstate(kbuf, ubuf, xsave);
+   } else {
ret = user_regset_copyin(, , , , xsave, 0, 
-1);
+   }
 
/*
 * In case of failure, mark all states as init:
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 00c3b41..1ad25d1 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1084,7 +1084,71 @@ int copy_xstate_to_user(void __user *ubuf, struct 
xregs_state *xsave, unsigned i
 }
 
 /*
- * Convert from a ptrace standard-format buffer to kernel XSAVES format
+ * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set() and
+ * there we check the CPU has XSAVES and a whole standard-sized buffer
+ * exists.
+ */
+int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf,
+struct xregs_state *xsave)
+{
+   unsigned int offset, size;
+   int i;
+   u64 xfeatures;
+   u64 allowed_features;
+
+   offset = offsetof(struct xregs_state, header);
+   size = sizeof(xfeatures);
+
+   if (kbuf) {
+   memcpy(, kbuf + offset, size);
+   } else {
+   if (__copy_from_user(, ubuf + offset, size))
+   return -EFAULT;
+   }
+
+   /*
+* Reject if the user sets any disabled or supervisor features:
+*/
+   allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
+
+   if (xfeatures & ~allowed_features)
+   return -EINVAL;
+
+   for (i = 0; i < XFEATURE_MAX; i++) {
+   u64 mask = ((u64)1 << i);
+
+   if (xfeatures & mask) {
+   void *dst = __raw_xsave_addr(xsave, 1 << i);
+
+   offset = xstate_offsets[i];
+   size = xstate_sizes[i];
+
+   if (kbuf) {
+   memcpy(dst, kbuf + offset, size);
+   } else {
+   if (__copy_from_user(dst, ubuf + offset, size))
+   return -EFAULT;
+   }
+   }
+   }
+
+   /*
+* The state that came in from userspace was user-state only.
+* Mask all the user states out of 'xfeatures':
+*/
+   xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+
+   /*
+* Add back in the 

  1   2   3   4   5   6   7   >