[PATCH v2 5/6] ir-rx51: use hrtimer instead of dmtimer

2016-05-16 Thread Ivaylo Dimitrov
Drop dmtimer usage for pulse timer in favor of hrtimer. That allows
removing PWM dmitimer platform data usage.

Signed-off-by: Ivaylo Dimitrov 
---
 arch/arm/mach-omap2/board-rx51-peripherals.c |   4 -
 arch/arm/mach-omap2/pdata-quirks.c   |   3 -
 drivers/media/rc/ir-rx51.c   | 166 ++-
 include/linux/platform_data/media/ir-rx51.h  |   1 -
 4 files changed, 37 insertions(+), 137 deletions(-)

diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c 
b/arch/arm/mach-omap2/board-rx51-peripherals.c
index e487575..a5ab712 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -1242,10 +1242,6 @@ static struct pwm_omap_dmtimer_pdata __maybe_unused 
pwm_dmtimer_pdata = {
 #if defined(CONFIG_IR_RX51) || defined(CONFIG_IR_RX51_MODULE)
 static struct lirc_rx51_platform_data rx51_lirc_data = {
.set_max_mpu_wakeup_lat = omap_pm_set_max_mpu_wakeup_lat,
-#if IS_ENABLED(CONFIG_OMAP_DM_TIMER)
-   .dmtimer = _dmtimer_pdata,
-#endif
-
 };
 
 static struct platform_device rx51_lirc_device = {
diff --git a/arch/arm/mach-omap2/pdata-quirks.c 
b/arch/arm/mach-omap2/pdata-quirks.c
index 436c6e6..8739d5c 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -486,9 +486,6 @@ static struct pwm_omap_dmtimer_pdata pwm_dmtimer_pdata = {
 
 static struct lirc_rx51_platform_data __maybe_unused rx51_lirc_data = {
.set_max_mpu_wakeup_lat = omap_pm_set_max_mpu_wakeup_lat,
-#if IS_ENABLED(CONFIG_OMAP_DM_TIMER)
-   .dmtimer = _dmtimer_pdata,
-#endif
 };
 
 static struct platform_device __maybe_unused rx51_lirc_device = {
diff --git a/drivers/media/rc/ir-rx51.c b/drivers/media/rc/ir-rx51.c
index 1cbb43d..82fb6f2 100644
--- a/drivers/media/rc/ir-rx51.c
+++ b/drivers/media/rc/ir-rx51.c
@@ -22,10 +22,10 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
-#include 
 #include 
 
 #define LIRC_RX51_DRIVER_FEATURES (LIRC_CAN_SET_SEND_DUTY_CYCLE |  \
@@ -36,32 +36,26 @@
 
 #define WBUF_LEN 256
 
-#define TIMER_MAX_VALUE 0x
-
 struct lirc_rx51 {
struct pwm_device *pwm;
-   pwm_omap_dmtimer *pulse_timer;
-   struct pwm_omap_dmtimer_pdata *dmtimer;
+   struct hrtimer timer;
struct device*dev;
struct lirc_rx51_platform_data *pdata;
wait_queue_head_t wqueue;
 
-   unsigned long   fclk_khz;
unsigned intfreq;   /* carrier frequency */
unsigned intduty_cycle; /* carrier duty cycle */
-   unsigned intirq_num;
-   unsigned intmatch;
int wbuf[WBUF_LEN];
int wbuf_index;
unsigned long   device_is_open;
 };
 
-static void lirc_rx51_on(struct lirc_rx51 *lirc_rx51)
+static inline void lirc_rx51_on(struct lirc_rx51 *lirc_rx51)
 {
pwm_enable(lirc_rx51->pwm);
 }
 
-static void lirc_rx51_off(struct lirc_rx51 *lirc_rx51)
+static inline void lirc_rx51_off(struct lirc_rx51 *lirc_rx51)
 {
pwm_disable(lirc_rx51->pwm);
 }
@@ -72,61 +66,21 @@ static int init_timing_params(struct lirc_rx51 *lirc_rx51)
int duty, period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, lirc_rx51->freq);
 
duty = DIV_ROUND_CLOSEST(lirc_rx51->duty_cycle * period, 100);
-   lirc_rx51->dmtimer->set_int_enable(lirc_rx51->pulse_timer, 0);
 
pwm_config(pwm, duty, period);
 
-   lirc_rx51->dmtimer->start(lirc_rx51->pulse_timer);
-
-   lirc_rx51->match = 0;
-
return 0;
 }
 
-#define tics_after(a, b) ((long)(b) - (long)(a) < 0)
-
-static int pulse_timer_set_timeout(struct lirc_rx51 *lirc_rx51, int usec)
+static enum hrtimer_restart lirc_rx51_timer_cb(struct hrtimer *timer)
 {
-   int counter;
-
-   BUG_ON(usec < 0);
-
-   if (lirc_rx51->match == 0)
-   counter = 
lirc_rx51->dmtimer->read_counter(lirc_rx51->pulse_timer);
-   else
-   counter = lirc_rx51->match;
-
-   counter += (u32)(lirc_rx51->fclk_khz * usec / (1000));
-   lirc_rx51->dmtimer->set_match(lirc_rx51->pulse_timer, 1, counter);
-   lirc_rx51->dmtimer->set_int_enable(lirc_rx51->pulse_timer,
-  PWM_OMAP_DMTIMER_INT_MATCH);
-   if (tics_after(lirc_rx51->dmtimer->read_counter(lirc_rx51->pulse_timer),
-  counter)) {
-   return 1;
-   }
-   return 0;
-}
+   struct lirc_rx51 *lirc_rx51 =
+   container_of(timer, struct lirc_rx51, timer);
+   ktime_t now;
 
-static irqreturn_t lirc_rx51_interrupt_handler(int irq, void *ptr)
-{
-   unsigned int retval;
-   struct lirc_rx51 *lirc_rx51 = ptr;
-
-   retval = lirc_rx51->dmtimer->read_status(lirc_rx51->pulse_timer);
-   if (!retval)
-   return IRQ_NONE;
-
-   if (retval & ~PWM_OMAP_DMTIMER_INT_MATCH)
-   dev_err_ratelimited(lirc_rx51->dev,
-   ": Unexpected interrupt source: 

[PATCH v2 2/6] pwm: omap-dmtimer: Allow for setting dmtimer clock source

2016-05-16 Thread Ivaylo Dimitrov
OMAP GP timers can have different input clocks that allow different PWM
frequencies. However, there is no other way of setting the clock source but
through clocks or clock-names properties of the timer itself. This limits
PWM functionality to only the frequencies allowed by the particular clock
source. Allowing setting the clock source by PWM rather than by timer
allows different PWMs to have different ranges by not hard-wiring the clock
source to the timer.

Signed-off-by: Ivaylo Dimitrov 
Acked-by: Rob Herring 
---
 Documentation/devicetree/bindings/pwm/pwm-omap-dmtimer.txt |  4 
 drivers/pwm/pwm-omap-dmtimer.c | 12 +++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/pwm/pwm-omap-dmtimer.txt 
b/Documentation/devicetree/bindings/pwm/pwm-omap-dmtimer.txt
index 5befb53..2e53324 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-omap-dmtimer.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-omap-dmtimer.txt
@@ -9,6 +9,10 @@ Required properties:
 
 Optional properties:
 - ti,prescaler: Should be a value between 0 and 7, see the timers datasheet
+- ti,clock-source: Set dmtimer parent clock, values between 0 and 2:
+  - 0x00 - high-frequency system clock (timer_sys_ck)
+  - 0x01 - 32-kHz always-on clock (timer_32k_ck)
+  - 0x02 - external clock (timer_ext_ck, OMAP2 only)
 
 Example:
pwm9: dmtimer-pwm@9 {
diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c
index b7e6ecb..95964c6 100644
--- a/drivers/pwm/pwm-omap-dmtimer.c
+++ b/drivers/pwm/pwm-omap-dmtimer.c
@@ -245,7 +245,7 @@ static int pwm_omap_dmtimer_probe(struct platform_device 
*pdev)
struct pwm_omap_dmtimer_chip *omap;
struct pwm_omap_dmtimer_pdata *pdata;
pwm_omap_dmtimer *dm_timer;
-   u32 prescaler;
+   u32 v;
int status;
 
pdata = dev_get_platdata(>dev);
@@ -306,10 +306,12 @@ static int pwm_omap_dmtimer_probe(struct platform_device 
*pdev)
if (pm_runtime_active(>dm_timer_pdev->dev))
omap->pdata->stop(omap->dm_timer);
 
-   /* setup dmtimer prescaler */
-   if (!of_property_read_u32(pdev->dev.of_node, "ti,prescaler",
-   ))
-   omap->pdata->set_prescaler(omap->dm_timer, prescaler);
+   if (!of_property_read_u32(pdev->dev.of_node, "ti,prescaler", ))
+   omap->pdata->set_prescaler(omap->dm_timer, v);
+
+   /* setup dmtimer clock source */
+   if (!of_property_read_u32(pdev->dev.of_node, "ti,clock-source", ))
+   omap->pdata->set_source(omap->dm_timer, v);
 
omap->chip.dev = >dev;
omap->chip.ops = _omap_dmtimer_ops;
-- 
1.9.1



Re: [PATCH v2 4/5] of: overlay: Pick up label symbols from overlays.

2016-05-16 Thread Pantelis Antoniou
Hi Geert,

> On May 16, 2016, at 22:06 , Geert Uytterhoeven  wrote:
> 
> On Mon, May 16, 2016 at 6:52 PM, Pantelis Antoniou
>  wrote:
>> Insert overlay symbols to the base tree when applied.
>> This makes it possible to apply an overlay that references a label
>> that a previously inserted overlay had.
>> 
>> Signed-off-by: Pantelis Antoniou 
> 
> This patch hasn't changed, so I think you can keep my

It’s been tweaked slightly that’s why I dropped your tested-by
signoff. Could you test this version again please to verify it
works for you?

> Tested-by: Geert Uytterhoeven 
> 
> Gr{oetje,eeting}s,
> 
>Geert
> 

Regards

— Pantelis

> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- 
> ge...@linux-m68k.org
> 
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like 
> that.
>-- Linus Torvalds



Re: [PATCH v2 4/5] of: overlay: Pick up label symbols from overlays.

2016-05-16 Thread Pantelis Antoniou
Hi Geert,

> On May 16, 2016, at 22:06 , Geert Uytterhoeven  wrote:
> 
> On Mon, May 16, 2016 at 6:52 PM, Pantelis Antoniou
>  wrote:
>> Insert overlay symbols to the base tree when applied.
>> This makes it possible to apply an overlay that references a label
>> that a previously inserted overlay had.
>> 
>> Signed-off-by: Pantelis Antoniou 
> 
> This patch hasn't changed, so I think you can keep my

It’s been tweaked slightly that’s why I dropped your tested-by
signoff. Could you test this version again please to verify it
works for you?

> Tested-by: Geert Uytterhoeven 
> 
> Gr{oetje,eeting}s,
> 
>Geert
> 

Regards

— Pantelis

> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- 
> ge...@linux-m68k.org
> 
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like 
> that.
>-- Linus Torvalds



Re: [PATCH] ftrace/x86: Fix function graph tracer reset path

2016-05-16 Thread Steven Rostedt
On Mon, 16 May 2016 21:19:18 +0200
Borislav Petkov  wrote:

> Btw, arch_static_branch_jump() spells that 5-byte JMP too and not until
> too long ago we had it in static_cpu_has()...

Those are "special" too.

If we can get the compiler to do the Right Thing (TM) then we should
let it.

> 
> I guess after spending some time with the kernel, one can't really
> differentiate hacks from proper design anymore. :-P
> 

That's because a quality of a kernel is determined by the
maintainability of all its hacks, not lack of them.

-- Steve


Re: [PATCH] ftrace/x86: Fix function graph tracer reset path

2016-05-16 Thread Steven Rostedt
On Mon, 16 May 2016 21:19:18 +0200
Borislav Petkov  wrote:

> Btw, arch_static_branch_jump() spells that 5-byte JMP too and not until
> too long ago we had it in static_cpu_has()...

Those are "special" too.

If we can get the compiler to do the Right Thing (TM) then we should
let it.

> 
> I guess after spending some time with the kernel, one can't really
> differentiate hacks from proper design anymore. :-P
> 

That's because a quality of a kernel is determined by the
maintainability of all its hacks, not lack of them.

-- Steve


Re: [patch V4 09/31] bitops: Add x86-specific parity functions

2016-05-16 Thread H. Peter Anvin
On May 16, 2016 10:06:08 AM PDT, Peter Zijlstra  wrote:
>On Mon, May 16, 2016 at 11:49:05PM +0800, Zhaoxiu Zeng wrote:
>> On 2016/5/11 17:31, Peter Zijlstra wrote:
>> > Please use the GEN_*_RMWcc() stuff to avoid the setpo where
>possible.
>> 
>> Setpo is better.
>> In most cases, we need to store the parity, or compare it with other
>variables.
>> 
>> For example, in drivers/net/ethernet/broadcom/tg3.c,
>> 
>> static int tg3_test_nvram(struct tg3 *tp)
>> {
>>  ..
>>  if (parity8(data[i]) == !!parity[i])
>>  goto out;
>>  ..
>> }
>> 
>> If use GEN_BINARY_RMWcc stuff,
>> 
>> static inline unsigned int __arch_parity8(unsigned int w)
>> {
>>  GEN_BINARY_RMWcc("testb", w, "er", 0xff, "%0", "po");
>> }
>
>blergh; GCC does indeed make a mess of that. It looks we'll need the
>cc-output stuff for this in order for GCC to generates sane code for
>that :/

For what it's worth, I have a patchset for cc out just about ready to post.  It 
is in gcc 6.1.
-- 
Sent from my Android device with K-9 Mail. Please excuse brevity and formatting.


Re: [patch V4 09/31] bitops: Add x86-specific parity functions

2016-05-16 Thread H. Peter Anvin
On May 16, 2016 10:06:08 AM PDT, Peter Zijlstra  wrote:
>On Mon, May 16, 2016 at 11:49:05PM +0800, Zhaoxiu Zeng wrote:
>> On 2016/5/11 17:31, Peter Zijlstra wrote:
>> > Please use the GEN_*_RMWcc() stuff to avoid the setpo where
>possible.
>> 
>> Setpo is better.
>> In most cases, we need to store the parity, or compare it with other
>variables.
>> 
>> For example, in drivers/net/ethernet/broadcom/tg3.c,
>> 
>> static int tg3_test_nvram(struct tg3 *tp)
>> {
>>  ..
>>  if (parity8(data[i]) == !!parity[i])
>>  goto out;
>>  ..
>> }
>> 
>> If use GEN_BINARY_RMWcc stuff,
>> 
>> static inline unsigned int __arch_parity8(unsigned int w)
>> {
>>  GEN_BINARY_RMWcc("testb", w, "er", 0xff, "%0", "po");
>> }
>
>blergh; GCC does indeed make a mess of that. It looks we'll need the
>cc-output stuff for this in order for GCC to generates sane code for
>that :/

For what it's worth, I have a patchset for cc out just about ready to post.  It 
is in gcc 6.1.
-- 
Sent from my Android device with K-9 Mail. Please excuse brevity and formatting.


Re: [patch V4 09/31] bitops: Add x86-specific parity functions

2016-05-16 Thread H. Peter Anvin
On May 11, 2016 2:31:39 AM PDT, Peter Zijlstra  wrote:
>On Wed, May 11, 2016 at 05:16:38PM +0800, zengzhao...@163.com wrote:
>
>> +static inline unsigned int __arch_parity4(unsigned int w)
>> +{
>> +unsigned int res = 0;
>> +
>> +asm("test $0xf, %1; setpo %b0"
>> +: "+q" (res)
>> +: "r" (w)
>> +: "cc");
>> +
>> +return res;
>> +}
>> +
>> +static inline unsigned int __arch_parity8(unsigned int w)
>> +{
>> +unsigned int res = 0;
>> +
>> +asm("test %1, %1; setpo %b0"
>> +: "+q" (res)
>> +: "r" (w)
>> +: "cc");
>> +
>> +return res;
>> +}
>> +
>> +static inline unsigned int __arch_parity16(unsigned int w)
>> +{
>> +unsigned int res = 0;
>> +
>> +asm("xor %h1, %b1; setpo %b0"
>> +: "+q" (res), "+q" (w)
>> +: : "cc");
>> +
>> +return res;
>> +}
>
>Please use the GEN_*_RMWcc() stuff to avoid the setpo where possible.
>
>> +
>> +#ifdef CONFIG_64BIT
>> +/* popcnt %eax, %eax -- redundant REX prefix for alignment */
>> +#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc0"
>> +/* popcnt %rax, %rax */
>> +#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc0"
>> +#else
>> +/* popcnt %eax, %eax */
>> +#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0"
>> +#endif
>
>Yuck, please don't duplicate stuff like this.
>
>> +
>> +static __always_inline unsigned int __arch_parity32(unsigned int w)
>> +{
>> +unsigned int res;
>> +unsigned int tmp;
>> +
>> +asm(ALTERNATIVE(
>> +"   mov %%eax, %1   \n"
>> +"   shr $16, %%eax  \n"
>> +"   xor %1, %%eax   \n"
>> +"   xor %%ah, %%al  \n"
>> +"   mov $0, %%eax   \n"
>> +"   setpo   %%al\n",
>> +POPCNT32 "  \n"
>> +"   and $1, %%eax   \n",
>> +X86_FEATURE_POPCNT)
>> +: "=a" (res), "=" (tmp)
>> +: "a" (w)
>> +: "cc");
>> +
>> +return res;
>> +}
>
>How many bytes does that end up being? Should we make it a call?

Also, setxx is used with a "bool" or "u8", not " unsigned int"
-- 
Sent from my Android device with K-9 Mail. Please excuse brevity and formatting.


Re: [patch V4 09/31] bitops: Add x86-specific parity functions

2016-05-16 Thread H. Peter Anvin
On May 11, 2016 2:31:39 AM PDT, Peter Zijlstra  wrote:
>On Wed, May 11, 2016 at 05:16:38PM +0800, zengzhao...@163.com wrote:
>
>> +static inline unsigned int __arch_parity4(unsigned int w)
>> +{
>> +unsigned int res = 0;
>> +
>> +asm("test $0xf, %1; setpo %b0"
>> +: "+q" (res)
>> +: "r" (w)
>> +: "cc");
>> +
>> +return res;
>> +}
>> +
>> +static inline unsigned int __arch_parity8(unsigned int w)
>> +{
>> +unsigned int res = 0;
>> +
>> +asm("test %1, %1; setpo %b0"
>> +: "+q" (res)
>> +: "r" (w)
>> +: "cc");
>> +
>> +return res;
>> +}
>> +
>> +static inline unsigned int __arch_parity16(unsigned int w)
>> +{
>> +unsigned int res = 0;
>> +
>> +asm("xor %h1, %b1; setpo %b0"
>> +: "+q" (res), "+q" (w)
>> +: : "cc");
>> +
>> +return res;
>> +}
>
>Please use the GEN_*_RMWcc() stuff to avoid the setpo where possible.
>
>> +
>> +#ifdef CONFIG_64BIT
>> +/* popcnt %eax, %eax -- redundant REX prefix for alignment */
>> +#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc0"
>> +/* popcnt %rax, %rax */
>> +#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc0"
>> +#else
>> +/* popcnt %eax, %eax */
>> +#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0"
>> +#endif
>
>Yuck, please don't duplicate stuff like this.
>
>> +
>> +static __always_inline unsigned int __arch_parity32(unsigned int w)
>> +{
>> +unsigned int res;
>> +unsigned int tmp;
>> +
>> +asm(ALTERNATIVE(
>> +"   mov %%eax, %1   \n"
>> +"   shr $16, %%eax  \n"
>> +"   xor %1, %%eax   \n"
>> +"   xor %%ah, %%al  \n"
>> +"   mov $0, %%eax   \n"
>> +"   setpo   %%al\n",
>> +POPCNT32 "  \n"
>> +"   and $1, %%eax   \n",
>> +X86_FEATURE_POPCNT)
>> +: "=a" (res), "=" (tmp)
>> +: "a" (w)
>> +: "cc");
>> +
>> +return res;
>> +}
>
>How many bytes does that end up being? Should we make it a call?

Also, setxx is used with a "bool" or "u8", not " unsigned int"
-- 
Sent from my Android device with K-9 Mail. Please excuse brevity and formatting.


Re: [PATCH] rcu: tree: correctly handle sparse possible CPUs

2016-05-16 Thread Paul E. McKenney
On Mon, May 16, 2016 at 05:48:26PM +0100, Mark Rutland wrote:
> In many cases in the RCU tree code, we iterate over the set of CPUS for
> a leaf node described by rcu_node::grplo and rcu_node::grphi, checking
> per-cpu data for each CPU in this range. However, if the set of possible
> CPUs is sparse, some CPUs described in this range are not possible, and
> thus no per-cpu region will have been allocated (or initialised) for
> them by the generic percpu code.
> 
> Erroneous accesses to a per-cpu area for these !possible CPUs may fault
> or may hit other data depending on the addressed generated when the
> erroneous per cpu offset is applied. In practice, both cases have been
> observed on arm64 hardware (the former being silent, but detectable with
> additional patches).
> 
> To avoid issues resulting from this, we must iterate over the set of
> *possible* cpus for a given leaf node. This patch adds new helpers to
> enable this (also unifying and simplifying some related bitmask
> manipulation logic), and moves the RCU tree code over to them.
> 
> Without this patch, running reboot at a shell can result in an oops
> like:
> 
> [ 3369.075979] Unable to handle kernel paging request at virtual address 
> ff8008b21b4c
> [ 3369.083881] pgd = ffc3ecdda000
> [ 3369.087270] [ff8008b21b4c] *pgd=0083eca48003, 
> *pud=0083eca48003, *pmd=
> [ 3369.096222] Internal error: Oops: 9607 [#1] PREEMPT SMP
> [ 3369.101781] Modules linked in:
> [ 3369.104825] CPU: 2 PID: 1817 Comm: NetworkManager Tainted: GW  
>  4.6.0+ #3
> [ 3369.121239] task: ffc0fa13e000 ti: ffc3eb94 task.ti: 
> ffc3eb94
> [ 3369.128708] PC is at sync_rcu_exp_select_cpus+0x188/0x510
> [ 3369.134094] LR is at sync_rcu_exp_select_cpus+0x104/0x510
> [ 3369.139479] pc : [] lr : [] pstate: 
> 21c5
> [ 3369.146860] sp : ffc3eb9435a0
> [ 3369.150162] x29: ffc3eb9435a0 x28: ff8008be4f88
> [ 3369.155465] x27: ff8008b66c80 x26: ffc3eceb2600
> [ 3369.160767] x25: 0001 x24: ff8008be4f88
> [ 3369.166070] x23: ff8008b51c3c x22: ff8008b66c80
> [ 3369.171371] x21: 0001 x20: ff8008b21b40
> [ 3369.176673] x19: ff8008b66c80 x18: 
> [ 3369.181975] x17: 007fa951a010 x16: ff80086a30f0
> [ 3369.187278] x15: 007fa9505590 x14: 
> [ 3369.192580] x13: ff8008b51000 x12: ffc3eb94
> [ 3369.197882] x11: 0006 x10: ff8008b51b78
> [ 3369.203184] x9 : 0001 x8 : ff8008be4000
> [ 3369.208486] x7 : ff8008b21b40 x6 : 1003
> [ 3369.213788] x5 :  x4 : ff8008b27280
> [ 3369.219090] x3 : ff8008b21b4c x2 : 0001
> [ 3369.224406] x1 : 0001 x0 : 0140
> ...
> [ 3369.972257] [] sync_rcu_exp_select_cpus+0x188/0x510
> [ 3369.978685] [] synchronize_rcu_expedited+0x64/0xa8
> [ 3369.985026] [] synchronize_net+0x24/0x30
> [ 3369.990499] [] dev_deactivate_many+0x28c/0x298
> [ 3369.996493] [] __dev_close_many+0x60/0xd0
> [ 3370.002052] [] __dev_close+0x28/0x40
> [ 3370.007178] [] __dev_change_flags+0x8c/0x158
> [ 3370.012999] [] dev_change_flags+0x20/0x60
> [ 3370.018558] [] do_setlink+0x288/0x918
> [ 3370.023771] [] rtnl_newlink+0x398/0x6a8
> [ 3370.029158] [] rtnetlink_rcv_msg+0xe4/0x220
> [ 3370.034891] [] netlink_rcv_skb+0xc4/0xf8
> [ 3370.040364] [] rtnetlink_rcv+0x2c/0x40
> [ 3370.045663] [] netlink_unicast+0x160/0x238
> [ 3370.051309] [] netlink_sendmsg+0x2f0/0x358
> [ 3370.056956] [] sock_sendmsg+0x18/0x30
> [ 3370.062168] [] ___sys_sendmsg+0x26c/0x280
> [ 3370.067728] [] __sys_sendmsg+0x44/0x88
> [ 3370.073027] [] SyS_sendmsg+0x10/0x20
> [ 3370.078153] [] el0_svc_naked+0x24/0x28

Nice fix and simplification!

Could you please forward-port this to branch rcu/dev of -rcu?

git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git

Thanx, Paul


> Signed-off-by: Mark Rutland 
> Reported-by: Dennis Chen 
> Cc: Paul E. McKenney 
> Cc: Catalin Marinas 
> Cc: Josh Triplett 
> Cc: Lai Jiangshan 
> Cc: Mathieu Desnoyers 
> Cc: Steve Capper 
> Cc: Steven Rostedt 
> Cc: Will Deacon 
> Cc: linux-kernel@vger.kernel.org
> ---
>  kernel/rcu/tree.c | 30 +-
>  kernel/rcu/tree.h | 18 ++
>  2 files changed, 31 insertions(+), 17 deletions(-)
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 9a535a8..2923df3 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1235,15 +1235,16 @@ static void rcu_check_gp_kthread_starvation(struct 
> rcu_state *rsp)
>  static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
>  {
>   int cpu;
> + unsigned long 

Re: [PATCH] rcu: tree: correctly handle sparse possible CPUs

2016-05-16 Thread Paul E. McKenney
On Mon, May 16, 2016 at 05:48:26PM +0100, Mark Rutland wrote:
> In many cases in the RCU tree code, we iterate over the set of CPUS for
> a leaf node described by rcu_node::grplo and rcu_node::grphi, checking
> per-cpu data for each CPU in this range. However, if the set of possible
> CPUs is sparse, some CPUs described in this range are not possible, and
> thus no per-cpu region will have been allocated (or initialised) for
> them by the generic percpu code.
> 
> Erroneous accesses to a per-cpu area for these !possible CPUs may fault
> or may hit other data depending on the addressed generated when the
> erroneous per cpu offset is applied. In practice, both cases have been
> observed on arm64 hardware (the former being silent, but detectable with
> additional patches).
> 
> To avoid issues resulting from this, we must iterate over the set of
> *possible* cpus for a given leaf node. This patch adds new helpers to
> enable this (also unifying and simplifying some related bitmask
> manipulation logic), and moves the RCU tree code over to them.
> 
> Without this patch, running reboot at a shell can result in an oops
> like:
> 
> [ 3369.075979] Unable to handle kernel paging request at virtual address 
> ff8008b21b4c
> [ 3369.083881] pgd = ffc3ecdda000
> [ 3369.087270] [ff8008b21b4c] *pgd=0083eca48003, 
> *pud=0083eca48003, *pmd=
> [ 3369.096222] Internal error: Oops: 9607 [#1] PREEMPT SMP
> [ 3369.101781] Modules linked in:
> [ 3369.104825] CPU: 2 PID: 1817 Comm: NetworkManager Tainted: GW  
>  4.6.0+ #3
> [ 3369.121239] task: ffc0fa13e000 ti: ffc3eb94 task.ti: 
> ffc3eb94
> [ 3369.128708] PC is at sync_rcu_exp_select_cpus+0x188/0x510
> [ 3369.134094] LR is at sync_rcu_exp_select_cpus+0x104/0x510
> [ 3369.139479] pc : [] lr : [] pstate: 
> 21c5
> [ 3369.146860] sp : ffc3eb9435a0
> [ 3369.150162] x29: ffc3eb9435a0 x28: ff8008be4f88
> [ 3369.155465] x27: ff8008b66c80 x26: ffc3eceb2600
> [ 3369.160767] x25: 0001 x24: ff8008be4f88
> [ 3369.166070] x23: ff8008b51c3c x22: ff8008b66c80
> [ 3369.171371] x21: 0001 x20: ff8008b21b40
> [ 3369.176673] x19: ff8008b66c80 x18: 
> [ 3369.181975] x17: 007fa951a010 x16: ff80086a30f0
> [ 3369.187278] x15: 007fa9505590 x14: 
> [ 3369.192580] x13: ff8008b51000 x12: ffc3eb94
> [ 3369.197882] x11: 0006 x10: ff8008b51b78
> [ 3369.203184] x9 : 0001 x8 : ff8008be4000
> [ 3369.208486] x7 : ff8008b21b40 x6 : 1003
> [ 3369.213788] x5 :  x4 : ff8008b27280
> [ 3369.219090] x3 : ff8008b21b4c x2 : 0001
> [ 3369.224406] x1 : 0001 x0 : 0140
> ...
> [ 3369.972257] [] sync_rcu_exp_select_cpus+0x188/0x510
> [ 3369.978685] [] synchronize_rcu_expedited+0x64/0xa8
> [ 3369.985026] [] synchronize_net+0x24/0x30
> [ 3369.990499] [] dev_deactivate_many+0x28c/0x298
> [ 3369.996493] [] __dev_close_many+0x60/0xd0
> [ 3370.002052] [] __dev_close+0x28/0x40
> [ 3370.007178] [] __dev_change_flags+0x8c/0x158
> [ 3370.012999] [] dev_change_flags+0x20/0x60
> [ 3370.018558] [] do_setlink+0x288/0x918
> [ 3370.023771] [] rtnl_newlink+0x398/0x6a8
> [ 3370.029158] [] rtnetlink_rcv_msg+0xe4/0x220
> [ 3370.034891] [] netlink_rcv_skb+0xc4/0xf8
> [ 3370.040364] [] rtnetlink_rcv+0x2c/0x40
> [ 3370.045663] [] netlink_unicast+0x160/0x238
> [ 3370.051309] [] netlink_sendmsg+0x2f0/0x358
> [ 3370.056956] [] sock_sendmsg+0x18/0x30
> [ 3370.062168] [] ___sys_sendmsg+0x26c/0x280
> [ 3370.067728] [] __sys_sendmsg+0x44/0x88
> [ 3370.073027] [] SyS_sendmsg+0x10/0x20
> [ 3370.078153] [] el0_svc_naked+0x24/0x28

Nice fix and simplification!

Could you please forward-port this to branch rcu/dev of -rcu?

git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git

Thanx, Paul


> Signed-off-by: Mark Rutland 
> Reported-by: Dennis Chen 
> Cc: Paul E. McKenney 
> Cc: Catalin Marinas 
> Cc: Josh Triplett 
> Cc: Lai Jiangshan 
> Cc: Mathieu Desnoyers 
> Cc: Steve Capper 
> Cc: Steven Rostedt 
> Cc: Will Deacon 
> Cc: linux-kernel@vger.kernel.org
> ---
>  kernel/rcu/tree.c | 30 +-
>  kernel/rcu/tree.h | 18 ++
>  2 files changed, 31 insertions(+), 17 deletions(-)
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 9a535a8..2923df3 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1235,15 +1235,16 @@ static void rcu_check_gp_kthread_starvation(struct 
> rcu_state *rsp)
>  static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
>  {
>   int cpu;
> + unsigned long bit;
>   unsigned long flags;
>   struct rcu_node *rnp;
> 
>   rcu_for_each_leaf_node(rsp, rnp) {
>   raw_spin_lock_irqsave_rcu_node(rnp, flags);
>   if (rnp->qsmask != 0) {
> - for 

Re: [PATCH] ftrace/x86: Fix function graph tracer reset path

2016-05-16 Thread Borislav Petkov
On Mon, May 16, 2016 at 03:13:57PM -0400, Steven Rostedt wrote:
> I actually thought about this first, but I thought it rather a hack
> (although one could argue all of function tracing is a hack ;-)

... I was about to say...

> But as the "weak" call was used to fix one location, why not use
> it here too. Being consistent, and also making sure all calls to
> ftrace_stub do the same.

Btw, arch_static_branch_jump() spells that 5-byte JMP too and not until
too long ago we had it in static_cpu_has()...

I guess after spending some time with the kernel, one can't really
differentiate hacks from proper design anymore. :-P

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


Re: [PATCH] ftrace/x86: Fix function graph tracer reset path

2016-05-16 Thread Borislav Petkov
On Mon, May 16, 2016 at 03:13:57PM -0400, Steven Rostedt wrote:
> I actually thought about this first, but I thought it rather a hack
> (although one could argue all of function tracing is a hack ;-)

... I was about to say...

> But as the "weak" call was used to fix one location, why not use
> it here too. Being consistent, and also making sure all calls to
> ftrace_stub do the same.

Btw, arch_static_branch_jump() spells that 5-byte JMP too and not until
too long ago we had it in static_cpu_has()...

I guess after spending some time with the kernel, one can't really
differentiate hacks from proper design anymore. :-P

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


Re: [PATCH v5 03/12] mm: balloon: use general non-lru movable page feature

2016-05-16 Thread kbuild test robot
Hi,

[auto build test ERROR on next-20160506]
[cannot apply to v4.6-rc7 v4.6-rc6 v4.6-rc5 v4.6]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Minchan-Kim/mm-use-put_page-to-free-page-instead-of-putback_lru_page/20160509-102355
config: i386-randconfig-s0-05161922 (attached as .config)
compiler: gcc-6 (Debian 6.1.1-1) 6.1.1 20160430
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

>> ERROR: "__ClearPageMovable" [drivers/virtio/virtio_balloon.ko] undefined!
>> ERROR: "__SetPageMovable" [drivers/virtio/virtio_balloon.ko] undefined!

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


Re: [PATCH v5 03/12] mm: balloon: use general non-lru movable page feature

2016-05-16 Thread kbuild test robot
Hi,

[auto build test ERROR on next-20160506]
[cannot apply to v4.6-rc7 v4.6-rc6 v4.6-rc5 v4.6]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Minchan-Kim/mm-use-put_page-to-free-page-instead-of-putback_lru_page/20160509-102355
config: i386-randconfig-s0-05161922 (attached as .config)
compiler: gcc-6 (Debian 6.1.1-1) 6.1.1 20160430
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

>> ERROR: "__ClearPageMovable" [drivers/virtio/virtio_balloon.ko] undefined!
>> ERROR: "__SetPageMovable" [drivers/virtio/virtio_balloon.ko] undefined!

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


Re: [PATCH] ftrace/x86: Fix function graph tracer reset path

2016-05-16 Thread Steven Rostedt
On Mon, 16 May 2016 21:03:59 +0200
Borislav Petkov  wrote:

> On Mon, May 16, 2016 at 11:24:53PM +0900, Namhyung Kim wrote:
> > > -GLOBAL(ftrace_stub)
> > > +/* This is weak to keep gas from relaxing the jumps */
> > > +WEAK(ftrace_stub)
> > >   retq
> > >  END(ftrace_caller)  
> 
> You could also force the 5-byte jump. I guess you could also write
> simply ".long 0" in there but this way it is more robust if someone
> decides to add other stuff between the JMP and the ftrace_stub label.
> 
> ---
> diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
> index ed48a9f465f8..b1db8a584c06 100644
> --- a/arch/x86/kernel/mcount_64.S
> +++ b/arch/x86/kernel/mcount_64.S
> @@ -179,7 +179,9 @@ GLOBAL(ftrace_epilogue)
>  
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
>  GLOBAL(ftrace_graph_call)
> - jmp ftrace_stub
> + .byte 0xe9
> + .long ftrace_stub - 1f
> +1:
>  #endif
>  

I actually thought about this first, but I thought it rather a hack
(although one could argue all of function tracing is a hack ;-) But as
the "weak" call was used to fix one location, why not use it here too.
Being consistent, and also making sure all calls to ftrace_stub do the
same.

-- Steve


Re: [PATCH] ftrace/x86: Fix function graph tracer reset path

2016-05-16 Thread Steven Rostedt
On Mon, 16 May 2016 21:03:59 +0200
Borislav Petkov  wrote:

> On Mon, May 16, 2016 at 11:24:53PM +0900, Namhyung Kim wrote:
> > > -GLOBAL(ftrace_stub)
> > > +/* This is weak to keep gas from relaxing the jumps */
> > > +WEAK(ftrace_stub)
> > >   retq
> > >  END(ftrace_caller)  
> 
> You could also force the 5-byte jump. I guess you could also write
> simply ".long 0" in there but this way it is more robust if someone
> decides to add other stuff between the JMP and the ftrace_stub label.
> 
> ---
> diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
> index ed48a9f465f8..b1db8a584c06 100644
> --- a/arch/x86/kernel/mcount_64.S
> +++ b/arch/x86/kernel/mcount_64.S
> @@ -179,7 +179,9 @@ GLOBAL(ftrace_epilogue)
>  
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
>  GLOBAL(ftrace_graph_call)
> - jmp ftrace_stub
> + .byte 0xe9
> + .long ftrace_stub - 1f
> +1:
>  #endif
>  

I actually thought about this first, but I thought it rather a hack
(although one could argue all of function tracing is a hack ;-) But as
the "weak" call was used to fix one location, why not use it here too.
Being consistent, and also making sure all calls to ftrace_stub do the
same.

-- Steve


Re: [RFC v2 PATCH 0/8] VFS:userns: support portable root filesystems

2016-05-16 Thread James Bottomley
On Sat, 2016-05-14 at 21:21 -0500, Eric W. Biederman wrote:
> James Bottomley  writes:
> 
> > On Sat, 2016-05-14 at 10:53 +0100, Djalal Harouni wrote:
> 
> Just a couple of quick comments from a very high level design point.
> 
> - I think a shiftfs is valuable in the same way that overlayfs is
>   valuable.
> 
>   Esepcially in the Docker case where a lot of containers want a shared
>   base image (for efficiency), but it is desirable to run those
>   containers in different user namespaces for safety.
> 
> - It is also the plan to make it possible to mount a filesystem where
>   the uids and gids of that filesystem on disk do not have a one to one
>   mapping to kernel uids and gids.  99% of the work has already be done,
>   for all filesystem except XFS.

Can you elaborate a bit more on why we want to do this?  I think only
having a single shift of uid_t to kuid_t across the kernel to user
boundary is a nice feature of user namespaces.  Architecturally, it's
not such a big thing to do it as the data goes on to the disk as well,
but what's the use case for it?

>   That said there are some significant issues to work through, before
>   something like that can be enabled.
> 
>   * Handling of uids/gids on disk that don't map into a kuid/kgid.

So I think this is nicely handled in the capability checks in
generic_permission() (capable_wrt_inode_uidgid()) is there a need to
make it more complex (and thus more error prone)?

>   * Safety from poisoned filesystem images.

By poisoned FS image, you mean an image over whose internal data the
user has control?  The basic problem of how do we give users write
access to data devices they can then cause to be mounted as
filesystems?

>   I have slowly been working with Seth Forshee on these issues as
>   the last thing I want is to introduce more security bugs right now.
>   Seth being a braver man than I am has already merged his changes into
>   the Ubuntu kernel.
> 
>   Right now we are targeting fuse, because fuse is already designed to
>   handle poisoned filesystem images.  So to safely enable this kind of
>   mapping for fuse is not a giant step.
> 
>   The big thing from my point of view is to get the VFS interfaces
>   correct so that the VFS handles all of the weird cases that come up
>   with uids and gids that don't map, and any other weird cases.  Keeping
>   the weird bits out of the filesystems.

If by VFS interfaces, you mean where we've already got the mapping 
confined, absolutely.

> James I think you are missing the fact that all filesystems already 
> have the make_kuid and make_kgid calls right where the data comes off
> disk,

I beg to differ: they certainly don't.  The underlying filesystem
populates the inode in ->lookup with the data off the disk which goes
into the inode as a kuid_t/kgid_t  It remains forever in the inode as
that.  We convert it as it goes out of the kernel in the stat calls
(actually stat.c:cp_old/new_stat())

>  and the from_kuid and from_kgid calls right where the on-disk data
> is being created just before it goes on disk.  Which means that the
> actual impact on filesystems of the translation is trivial.

Are you looking at a different tree from me?  I'm actually just looking
at Linus git head.

James




Re: [RFC v2 PATCH 0/8] VFS:userns: support portable root filesystems

2016-05-16 Thread James Bottomley
On Sat, 2016-05-14 at 21:21 -0500, Eric W. Biederman wrote:
> James Bottomley  writes:
> 
> > On Sat, 2016-05-14 at 10:53 +0100, Djalal Harouni wrote:
> 
> Just a couple of quick comments from a very high level design point.
> 
> - I think a shiftfs is valuable in the same way that overlayfs is
>   valuable.
> 
>   Esepcially in the Docker case where a lot of containers want a shared
>   base image (for efficiency), but it is desirable to run those
>   containers in different user namespaces for safety.
> 
> - It is also the plan to make it possible to mount a filesystem where
>   the uids and gids of that filesystem on disk do not have a one to one
>   mapping to kernel uids and gids.  99% of the work has already be done,
>   for all filesystem except XFS.

Can you elaborate a bit more on why we want to do this?  I think only
having a single shift of uid_t to kuid_t across the kernel to user
boundary is a nice feature of user namespaces.  Architecturally, it's
not such a big thing to do it as the data goes on to the disk as well,
but what's the use case for it?

>   That said there are some significant issues to work through, before
>   something like that can be enabled.
> 
>   * Handling of uids/gids on disk that don't map into a kuid/kgid.

So I think this is nicely handled in the capability checks in
generic_permission() (capable_wrt_inode_uidgid()) is there a need to
make it more complex (and thus more error prone)?

>   * Safety from poisoned filesystem images.

By poisoned FS image, you mean an image over whose internal data the
user has control?  The basic problem of how do we give users write
access to data devices they can then cause to be mounted as
filesystems?

>   I have slowly been working with Seth Forshee on these issues as
>   the last thing I want is to introduce more security bugs right now.
>   Seth being a braver man than I am has already merged his changes into
>   the Ubuntu kernel.
> 
>   Right now we are targeting fuse, because fuse is already designed to
>   handle poisoned filesystem images.  So to safely enable this kind of
>   mapping for fuse is not a giant step.
> 
>   The big thing from my point of view is to get the VFS interfaces
>   correct so that the VFS handles all of the weird cases that come up
>   with uids and gids that don't map, and any other weird cases.  Keeping
>   the weird bits out of the filesystems.

If by VFS interfaces, you mean where we've already got the mapping 
confined, absolutely.

> James I think you are missing the fact that all filesystems already 
> have the make_kuid and make_kgid calls right where the data comes off
> disk,

I beg to differ: they certainly don't.  The underlying filesystem
populates the inode in ->lookup with the data off the disk which goes
into the inode as a kuid_t/kgid_t  It remains forever in the inode as
that.  We convert it as it goes out of the kernel in the stat calls
(actually stat.c:cp_old/new_stat())

>  and the from_kuid and from_kgid calls right where the on-disk data
> is being created just before it goes on disk.  Which means that the
> actual impact on filesystems of the translation is trivial.

Are you looking at a different tree from me?  I'm actually just looking
at Linus git head.

James




Re: [PATCH] usb: gadget: f_fs: report error if excess data received

2016-05-16 Thread Michal Nazarewicz
On Mon, May 16 2016, Felipe Balbi wrote:
> Michal Nazarewicz  writes:
>
>>> Alan Stern  writes:
 The point is that you don't know whether the host sent more data than
 expected.  All you know is that the host sent more data than the user
 asked the kernel for -- but maybe the user didn't ask for all the
 data that he expected.  Maybe the user wanted to retrieve the full
 set of data using two read() system calls.
>>
>> On Mon, May 16 2016, Felipe Balbi wrote:
>>> right, but that just means we need to buffer the data instead of bailing
>>> out of the first read() completely.
>>
>> Correct.
>>
>> I have a ~4h bus ride ahead of me so I’ll try to implement it.  If you
>> don’t hear from me by the end of the day, there probably wasn’t enough
>> space/comfort in the bus to use a laptop.
>
> Cool, Michal. Thanks
>
> seems like a kfifo would do well here(?)

There appears to be no kfifo support for iov_iter though, so I just went
with a simple buffer.

I haven’t looked at the patch too carefully so this is an RFC rather
than an actual patch at this point.  It does compile at least.

Regardless, the more I thin about it, the more I’m under the impression
that the whole rounding up in f_fs was a mistake.  And the more I’m
leaning towards ignoring the excess data set by the host.

-- >8 --
Subject: usb: gadget: f_fs: buffer data from ‘oversized’ OUT requests

f_fs rounds up read(2) requests to a multiple of a max packet size
which means that host may provide more data than user has space for.
So far, the excess data has been silently ignored.

This introduces a buffer for a tail of such requests so that they are
returned on next read instead of being ignored.

Signed-off-by: Michal Nazarewicz 
---
 drivers/usb/gadget/function/f_fs.c | 63 +-
 1 file changed, 56 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/gadget/function/f_fs.c 
b/drivers/usb/gadget/function/f_fs.c
index 2c314c1..7d3c51a 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -130,6 +130,12 @@ struct ffs_epfile {
 
struct dentry   *dentry;
 
+   /*
+* Buffer for holding data from partial reads which may happen since
+* we’re rounding user read requests to a multiple of a max packet size.
+*/
+   struct ffs_buffer   *read_buffer;
+
charname[5];
 
unsigned char   in; /* P: ffs->eps_lock */
@@ -138,6 +144,12 @@ struct ffs_epfile {
unsigned char   _pad;
 };
 
+struct ffs_buffer {
+   size_t length;
+   char *data;
+   char storage[];
+};
+
 /*  ffs_io_data structure ***/
 
 struct ffs_io_data {
@@ -681,6 +693,24 @@ static void ffs_epfile_async_io_complete(struct usb_ep 
*_ep,
schedule_work(_data->work);
 }
 
+static ssize_t ffs_epfile_read_buffered(struct ffs_epfile *epfile,
+   struct iov_iter *iter)
+{
+   struct ffs_buffer *buf = epfile->read_buffer;
+   ssize_t ret = 0;
+   if (buf) {
+   ret = copy_to_iter(buf->data, buf->length, iter);
+   buf->length -= ret;
+   if (buf->length) {
+   buf->data += ret;
+   } else {
+   kfree(buf);
+   epfile->read_buffer = NULL;
+   }
+   }
+   return ret;
+}
+
 static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
 {
struct ffs_epfile *epfile = file->private_data;
@@ -710,6 +740,18 @@ static ssize_t ffs_epfile_io(struct file *file, struct 
ffs_io_data *io_data)
if (halt && epfile->isoc)
return -EINVAL;
 
+   /*
+* Do we have buffered data from previous partial read?  Check that for
+* synchronous case only because we do not have facility to ‘wake up’
+* a pending asynchronous read and push buffered data to it which we
+* would need to make things behave consistently.
+*/
+   if (!halt && !io_data->aio && io_data->read) {
+   ret = ffs_epfile_read_buffered(epfile, _data->data);
+   if (ret)
+   return ret;
+   }
+
/* Allocate & copy */
if (!halt) {
/*
@@ -804,17 +846,24 @@ static ssize_t ffs_epfile_io(struct file *file, struct 
ffs_io_data *io_data)
interrupted = ep->status < 0;
}
 
-   /*
-* XXX We may end up silently droping data here.  Since data_len
-* (i.e. req->length) may be bigger than len (after being
-* rounded up to maxpacketsize), we may end up with more data
-* then user space has space 

Re: [PATCH] usb: gadget: f_fs: report error if excess data received

2016-05-16 Thread Michal Nazarewicz
On Mon, May 16 2016, Felipe Balbi wrote:
> Michal Nazarewicz  writes:
>
>>> Alan Stern  writes:
 The point is that you don't know whether the host sent more data than
 expected.  All you know is that the host sent more data than the user
 asked the kernel for -- but maybe the user didn't ask for all the
 data that he expected.  Maybe the user wanted to retrieve the full
 set of data using two read() system calls.
>>
>> On Mon, May 16 2016, Felipe Balbi wrote:
>>> right, but that just means we need to buffer the data instead of bailing
>>> out of the first read() completely.
>>
>> Correct.
>>
>> I have a ~4h bus ride ahead of me so I’ll try to implement it.  If you
>> don’t hear from me by the end of the day, there probably wasn’t enough
>> space/comfort in the bus to use a laptop.
>
> Cool, Michal. Thanks
>
> seems like a kfifo would do well here(?)

There appears to be no kfifo support for iov_iter though, so I just went
with a simple buffer.

I haven’t looked at the patch too carefully so this is an RFC rather
than an actual patch at this point.  It does compile at least.

Regardless, the more I thin about it, the more I’m under the impression
that the whole rounding up in f_fs was a mistake.  And the more I’m
leaning towards ignoring the excess data set by the host.

-- >8 --
Subject: usb: gadget: f_fs: buffer data from ‘oversized’ OUT requests

f_fs rounds up read(2) requests to a multiple of a max packet size
which means that host may provide more data than user has space for.
So far, the excess data has been silently ignored.

This introduces a buffer for a tail of such requests so that they are
returned on next read instead of being ignored.

Signed-off-by: Michal Nazarewicz 
---
 drivers/usb/gadget/function/f_fs.c | 63 +-
 1 file changed, 56 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/gadget/function/f_fs.c 
b/drivers/usb/gadget/function/f_fs.c
index 2c314c1..7d3c51a 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -130,6 +130,12 @@ struct ffs_epfile {
 
struct dentry   *dentry;
 
+   /*
+* Buffer for holding data from partial reads which may happen since
+* we’re rounding user read requests to a multiple of a max packet size.
+*/
+   struct ffs_buffer   *read_buffer;
+
charname[5];
 
unsigned char   in; /* P: ffs->eps_lock */
@@ -138,6 +144,12 @@ struct ffs_epfile {
unsigned char   _pad;
 };
 
+struct ffs_buffer {
+   size_t length;
+   char *data;
+   char storage[];
+};
+
 /*  ffs_io_data structure ***/
 
 struct ffs_io_data {
@@ -681,6 +693,24 @@ static void ffs_epfile_async_io_complete(struct usb_ep 
*_ep,
schedule_work(_data->work);
 }
 
+static ssize_t ffs_epfile_read_buffered(struct ffs_epfile *epfile,
+   struct iov_iter *iter)
+{
+   struct ffs_buffer *buf = epfile->read_buffer;
+   ssize_t ret = 0;
+   if (buf) {
+   ret = copy_to_iter(buf->data, buf->length, iter);
+   buf->length -= ret;
+   if (buf->length) {
+   buf->data += ret;
+   } else {
+   kfree(buf);
+   epfile->read_buffer = NULL;
+   }
+   }
+   return ret;
+}
+
 static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
 {
struct ffs_epfile *epfile = file->private_data;
@@ -710,6 +740,18 @@ static ssize_t ffs_epfile_io(struct file *file, struct 
ffs_io_data *io_data)
if (halt && epfile->isoc)
return -EINVAL;
 
+   /*
+* Do we have buffered data from previous partial read?  Check that for
+* synchronous case only because we do not have facility to ‘wake up’
+* a pending asynchronous read and push buffered data to it which we
+* would need to make things behave consistently.
+*/
+   if (!halt && !io_data->aio && io_data->read) {
+   ret = ffs_epfile_read_buffered(epfile, _data->data);
+   if (ret)
+   return ret;
+   }
+
/* Allocate & copy */
if (!halt) {
/*
@@ -804,17 +846,24 @@ static ssize_t ffs_epfile_io(struct file *file, struct 
ffs_io_data *io_data)
interrupted = ep->status < 0;
}
 
-   /*
-* XXX We may end up silently droping data here.  Since data_len
-* (i.e. req->length) may be bigger than len (after being
-* rounded up to maxpacketsize), we may end up with more data
-* then user space has space for.
-*/
ret = interrupted ? 

[ANNOUNCE] MDB Linux Kernel Debugger Linux v4.6

2016-05-16 Thread Jeffrey Merkey
The following changes since commit 2dcd0af568b0cf583645c8a317dd12e344b1c72a:

  Linux 4.6 (2016-05-15 15:43:13 -0700)

are available in the git repository at:

  https://github.com/jeffmerkey/linux.git tags/mdb-v4.6-tag

for you to fetch changes up to 8c50856a5d798108aa4d5bfadb0c2172fce2448e:

  Add MDB Linux Kernel Debugger to Linux v4.6 (2016-05-15 18:16:50 -0600)


Signed-Off-By:  Jeff Merkey 

The MDB Linux Kernel Debugger for Linux Kernel v4.6 is now available via git.   


Checkpatch Compliance Results:

./scripts/checkpatch.pl --strict --file arch/x86/kernel/debug/mdb/*.[ch] 
arch/x86/kernel/debug/mdb/Make*


arch/x86/kernel/debug/mdb/mdb-base.c

total: 0 errors, 0 warnings, 0 checks, 3297 lines checked

arch/x86/kernel/debug/mdb/mdb-base.c has no obvious style problems and is ready 
for submission.

arch/x86/kernel/debug/mdb/mdb-base.h

total: 0 errors, 0 warnings, 0 checks, 447 lines checked

arch/x86/kernel/debug/mdb/mdb-base.h has no obvious style problems and is ready 
for submission.
---
arch/x86/kernel/debug/mdb/mdb.h
---
total: 0 errors, 0 warnings, 0 checks, 40 lines checked

arch/x86/kernel/debug/mdb/mdb.h has no obvious style problems and is ready for 
submission.
---
arch/x86/kernel/debug/mdb/mdb-ia-apic.c
---
total: 0 errors, 0 warnings, 0 checks, 243 lines checked

arch/x86/kernel/debug/mdb/mdb-ia-apic.c has no obvious style problems and is 
ready for submission.
--
arch/x86/kernel/debug/mdb/mdb-ia.c
--
total: 0 errors, 0 warnings, 0 checks, 6887 lines checked

arch/x86/kernel/debug/mdb/mdb-ia.c has no obvious style problems and is ready 
for submission.
--
arch/x86/kernel/debug/mdb/mdb-ia.h
--
total: 0 errors, 0 warnings, 0 checks, 209 lines checked

arch/x86/kernel/debug/mdb/mdb-ia.h has no obvious style problems and is ready 
for submission.
---
arch/x86/kernel/debug/mdb/mdb-ia-proc.h
---
total: 0 errors, 0 warnings, 0 checks, 819 lines checked

arch/x86/kernel/debug/mdb/mdb-ia-proc.h has no obvious style problems and is 
ready for submission.
--
arch/x86/kernel/debug/mdb/mdb-ia-support.c
--
total: 0 errors, 0 warnings, 0 checks, 5342 lines checked

arch/x86/kernel/debug/mdb/mdb-ia-support.c has no obvious style problems and is 
ready for submission.
--
arch/x86/kernel/debug/mdb/mdb-ia-support.h
--
total: 0 errors, 0 warnings, 0 checks, 76 lines checked

arch/x86/kernel/debug/mdb/mdb-ia-support.h has no obvious style problems and is 
ready for submission.

arch/x86/kernel/debug/mdb/mdb-keyboard.h

total: 0 errors, 0 warnings, 0 checks, 127 lines checked

arch/x86/kernel/debug/mdb/mdb-keyboard.h has no obvious style problems and is 
ready for submission.

arch/x86/kernel/debug/mdb/mdb-list.c

total: 0 errors, 0 warnings, 0 checks, 534 lines checked

arch/x86/kernel/debug/mdb/mdb-list.c has no obvious style problems and is ready 
for submission.

arch/x86/kernel/debug/mdb/mdb-list.h

total: 0 errors, 0 warnings, 0 checks, 96 lines checked

arch/x86/kernel/debug/mdb/mdb-list.h has no obvious style problems and is ready 
for submission.
-
arch/x86/kernel/debug/mdb/mdb-logic.c
-
total: 0 errors, 0 warnings, 0 checks, 2118 lines checked

arch/x86/kernel/debug/mdb/mdb-logic.c has no obvious style problems and is 
ready for submission.

arch/x86/kernel/debug/mdb/mdb-main.c

total: 0 errors, 0 warnings, 0 checks, 786 lines checked

arch/x86/kernel/debug/mdb/mdb-main.c has no obvious style problems and is ready 
for submission.
--
arch/x86/kernel/debug/mdb/mdb-os.c
--
total: 0 errors, 0 warnings, 0 checks, 1474 lines checked

arch/x86/kernel/debug/mdb/mdb-os.c has no obvious style problems and is ready 
for submission.
--
arch/x86/kernel/debug/mdb/mdb-os.h
--
total: 0 errors, 0 warnings, 0 checks, 141 lines checked

arch/x86/kernel/debug/mdb/mdb-os.h has no obvious style problems and is ready 
for 

[ANNOUNCE] MDB Linux Kernel Debugger Linux v4.6

2016-05-16 Thread Jeffrey Merkey
The following changes since commit 2dcd0af568b0cf583645c8a317dd12e344b1c72a:

  Linux 4.6 (2016-05-15 15:43:13 -0700)

are available in the git repository at:

  https://github.com/jeffmerkey/linux.git tags/mdb-v4.6-tag

for you to fetch changes up to 8c50856a5d798108aa4d5bfadb0c2172fce2448e:

  Add MDB Linux Kernel Debugger to Linux v4.6 (2016-05-15 18:16:50 -0600)


Signed-Off-By:  Jeff Merkey 

The MDB Linux Kernel Debugger for Linux Kernel v4.6 is now available via git.   


Checkpatch Compliance Results:

./scripts/checkpatch.pl --strict --file arch/x86/kernel/debug/mdb/*.[ch] 
arch/x86/kernel/debug/mdb/Make*


arch/x86/kernel/debug/mdb/mdb-base.c

total: 0 errors, 0 warnings, 0 checks, 3297 lines checked

arch/x86/kernel/debug/mdb/mdb-base.c has no obvious style problems and is ready 
for submission.

arch/x86/kernel/debug/mdb/mdb-base.h

total: 0 errors, 0 warnings, 0 checks, 447 lines checked

arch/x86/kernel/debug/mdb/mdb-base.h has no obvious style problems and is ready 
for submission.
---
arch/x86/kernel/debug/mdb/mdb.h
---
total: 0 errors, 0 warnings, 0 checks, 40 lines checked

arch/x86/kernel/debug/mdb/mdb.h has no obvious style problems and is ready for 
submission.
---
arch/x86/kernel/debug/mdb/mdb-ia-apic.c
---
total: 0 errors, 0 warnings, 0 checks, 243 lines checked

arch/x86/kernel/debug/mdb/mdb-ia-apic.c has no obvious style problems and is 
ready for submission.
--
arch/x86/kernel/debug/mdb/mdb-ia.c
--
total: 0 errors, 0 warnings, 0 checks, 6887 lines checked

arch/x86/kernel/debug/mdb/mdb-ia.c has no obvious style problems and is ready 
for submission.
--
arch/x86/kernel/debug/mdb/mdb-ia.h
--
total: 0 errors, 0 warnings, 0 checks, 209 lines checked

arch/x86/kernel/debug/mdb/mdb-ia.h has no obvious style problems and is ready 
for submission.
---
arch/x86/kernel/debug/mdb/mdb-ia-proc.h
---
total: 0 errors, 0 warnings, 0 checks, 819 lines checked

arch/x86/kernel/debug/mdb/mdb-ia-proc.h has no obvious style problems and is 
ready for submission.
--
arch/x86/kernel/debug/mdb/mdb-ia-support.c
--
total: 0 errors, 0 warnings, 0 checks, 5342 lines checked

arch/x86/kernel/debug/mdb/mdb-ia-support.c has no obvious style problems and is 
ready for submission.
--
arch/x86/kernel/debug/mdb/mdb-ia-support.h
--
total: 0 errors, 0 warnings, 0 checks, 76 lines checked

arch/x86/kernel/debug/mdb/mdb-ia-support.h has no obvious style problems and is 
ready for submission.

arch/x86/kernel/debug/mdb/mdb-keyboard.h

total: 0 errors, 0 warnings, 0 checks, 127 lines checked

arch/x86/kernel/debug/mdb/mdb-keyboard.h has no obvious style problems and is 
ready for submission.

arch/x86/kernel/debug/mdb/mdb-list.c

total: 0 errors, 0 warnings, 0 checks, 534 lines checked

arch/x86/kernel/debug/mdb/mdb-list.c has no obvious style problems and is ready 
for submission.

arch/x86/kernel/debug/mdb/mdb-list.h

total: 0 errors, 0 warnings, 0 checks, 96 lines checked

arch/x86/kernel/debug/mdb/mdb-list.h has no obvious style problems and is ready 
for submission.
-
arch/x86/kernel/debug/mdb/mdb-logic.c
-
total: 0 errors, 0 warnings, 0 checks, 2118 lines checked

arch/x86/kernel/debug/mdb/mdb-logic.c has no obvious style problems and is 
ready for submission.

arch/x86/kernel/debug/mdb/mdb-main.c

total: 0 errors, 0 warnings, 0 checks, 786 lines checked

arch/x86/kernel/debug/mdb/mdb-main.c has no obvious style problems and is ready 
for submission.
--
arch/x86/kernel/debug/mdb/mdb-os.c
--
total: 0 errors, 0 warnings, 0 checks, 1474 lines checked

arch/x86/kernel/debug/mdb/mdb-os.c has no obvious style problems and is ready 
for submission.
--
arch/x86/kernel/debug/mdb/mdb-os.h
--
total: 0 errors, 0 warnings, 0 checks, 141 lines checked

arch/x86/kernel/debug/mdb/mdb-os.h has no obvious style problems and is ready 
for submission.

Re: [PATCH v2 4/5] of: overlay: Pick up label symbols from overlays.

2016-05-16 Thread Geert Uytterhoeven
On Mon, May 16, 2016 at 6:52 PM, Pantelis Antoniou
 wrote:
> Insert overlay symbols to the base tree when applied.
> This makes it possible to apply an overlay that references a label
> that a previously inserted overlay had.
>
> Signed-off-by: Pantelis Antoniou 

This patch hasn't changed, so I think you can keep my
Tested-by: Geert Uytterhoeven 

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH v2 4/5] of: overlay: Pick up label symbols from overlays.

2016-05-16 Thread Geert Uytterhoeven
On Mon, May 16, 2016 at 6:52 PM, Pantelis Antoniou
 wrote:
> Insert overlay symbols to the base tree when applied.
> This makes it possible to apply an overlay that references a label
> that a previously inserted overlay had.
>
> Signed-off-by: Pantelis Antoniou 

This patch hasn't changed, so I think you can keep my
Tested-by: Geert Uytterhoeven 

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH v2 3/5] of: unittest: hashed phandles unitest

2016-05-16 Thread Geert Uytterhoeven
On Mon, May 16, 2016 at 6:52 PM, Pantelis Antoniou
 wrote:
> Add a benchmarking hashed phandles unittest which report what kind
> of speed up we get switching to hashed phandle lookups.
>
>  ### dt-test ### the hash method is 8.2 times faster than the original
>
> On the beaglebone we perform about 1877 phandle lookups until that
> point in the unittest. Each non-hashed lookup takes about 23us when
> the cash is hot, while the hash lookup takes about 3us.

cache

> For those 1877 lookup we get a speedup in the boot sequence of
> 1877 * (23 - 3) = 37.5ms, which is not spectacular but there's no
> point in wasting cycles and energy.
>
> Signed-off-by: Pantelis Antoniou 
> ---
>  drivers/of/unittest.c | 68 
> +++
>  1 file changed, 68 insertions(+)
>
> diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
> index 7ea3689..59cad84 100644
> --- a/drivers/of/unittest.c
> +++ b/drivers/of/unittest.c
> @@ -25,6 +25,9 @@
>
>  #include 
>
> +#include 
> +#include 
> +
>  #include "of_private.h"
>
>  static struct unittest_results {
> @@ -2266,6 +2269,70 @@ out:
>  static inline void __init of_unittest_overlay(void) { }
>  #endif
>
> +#define PHANDLE_LOOKUPS1000
> +
> +static void __init of_unittest_phandle_hash(void)
> +{
> +   struct device_node *node;
> +   phandle max_phandle;
> +   u32 ph;
> +   unsigned long flags;
> +   int i, j, total;

unsigned int

> +   ktime_t start, end;
> +   s64 dur[2];

No idea why ktime_to_us() returns s64 i.s.o. u64...

> +   int dec, frac;

unsigned int?

> +   /* test only available when hashing is available */
> +   if (!of_phandle_ht_available()) {
> +   pr_warn("phandle hash test requires hash to be 
> initialized\n");
> +   return;
> +   }
> +
> +   /* find the maximum phandle of the tree */
> +   raw_spin_lock_irqsave(_lock, flags);
> +   max_phandle = 0;
> +   total = 0;
> +   for_each_of_allnodes(node) {
> +   if (node->phandle != (phandle)-1U &&

Drop the "U" suffix?

> +   node->phandle > max_phandle)
> +   max_phandle = node->phandle;
> +   total++;
> +   }
> +   raw_spin_unlock_irqrestore(_lock, flags);
> +   max_phandle++;
> +
> +   pr_debug("phandle: max-phandle #%u, #%d total nodes\n",
> +   (u32)max_phandle, total);

phandle is already u32, so no need for the cast.

> +
> +   /* perform random lookups using the hash */
> +   for (j = 0; j < 2; j++) {
> +
> +   /* disabled for pass #0, enabled for pass #1 */
> +   of_phandle_ht_is_disabled = j == 0;
> +
> +   start = ktime_get_raw();
> +   for (i = 0; i < PHANDLE_LOOKUPS; i++) {
> +   ph = prandom_u32() % max_phandle;
> +   node = of_find_node_by_phandle(ph);
> +   of_node_put(node);
> +   }
> +   end = ktime_get_raw();
> +
> +   dur[j] = ktime_to_us(end) - ktime_to_us(start);
> +   pr_debug("#%d lookups in %lld us (%s)\n",

$u

> +   PHANDLE_LOOKUPS, dur[j],
> +   j == 0 ? "original" : "hashed");
> +   }
> +
> +   unittest(dur[0] > dur[1], "Non hashing phandles are faster!?");
> +
> +   dec = (int)div64_s64(dur[0] * 10 + 5, dur[1]);

I'd expect div64_u64(), if not for ktime_to_us() returning s64...

> +   frac = dec % 10;
> +   dec /= 10;
> +   pr_info("the hash method is %d.%d times faster than the original\n",

%u.%u once dec and frac are unsigned.

> +   dec, frac);
> +}

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH v2 3/5] of: unittest: hashed phandles unitest

2016-05-16 Thread Geert Uytterhoeven
On Mon, May 16, 2016 at 6:52 PM, Pantelis Antoniou
 wrote:
> Add a benchmarking hashed phandles unittest which report what kind
> of speed up we get switching to hashed phandle lookups.
>
>  ### dt-test ### the hash method is 8.2 times faster than the original
>
> On the beaglebone we perform about 1877 phandle lookups until that
> point in the unittest. Each non-hashed lookup takes about 23us when
> the cash is hot, while the hash lookup takes about 3us.

cache

> For those 1877 lookup we get a speedup in the boot sequence of
> 1877 * (23 - 3) = 37.5ms, which is not spectacular but there's no
> point in wasting cycles and energy.
>
> Signed-off-by: Pantelis Antoniou 
> ---
>  drivers/of/unittest.c | 68 
> +++
>  1 file changed, 68 insertions(+)
>
> diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
> index 7ea3689..59cad84 100644
> --- a/drivers/of/unittest.c
> +++ b/drivers/of/unittest.c
> @@ -25,6 +25,9 @@
>
>  #include 
>
> +#include 
> +#include 
> +
>  #include "of_private.h"
>
>  static struct unittest_results {
> @@ -2266,6 +2269,70 @@ out:
>  static inline void __init of_unittest_overlay(void) { }
>  #endif
>
> +#define PHANDLE_LOOKUPS1000
> +
> +static void __init of_unittest_phandle_hash(void)
> +{
> +   struct device_node *node;
> +   phandle max_phandle;
> +   u32 ph;
> +   unsigned long flags;
> +   int i, j, total;

unsigned int

> +   ktime_t start, end;
> +   s64 dur[2];

No idea why ktime_to_us() returns s64 i.s.o. u64...

> +   int dec, frac;

unsigned int?

> +   /* test only available when hashing is available */
> +   if (!of_phandle_ht_available()) {
> +   pr_warn("phandle hash test requires hash to be 
> initialized\n");
> +   return;
> +   }
> +
> +   /* find the maximum phandle of the tree */
> +   raw_spin_lock_irqsave(_lock, flags);
> +   max_phandle = 0;
> +   total = 0;
> +   for_each_of_allnodes(node) {
> +   if (node->phandle != (phandle)-1U &&

Drop the "U" suffix?

> +   node->phandle > max_phandle)
> +   max_phandle = node->phandle;
> +   total++;
> +   }
> +   raw_spin_unlock_irqrestore(_lock, flags);
> +   max_phandle++;
> +
> +   pr_debug("phandle: max-phandle #%u, #%d total nodes\n",
> +   (u32)max_phandle, total);

phandle is already u32, so no need for the cast.

> +
> +   /* perform random lookups using the hash */
> +   for (j = 0; j < 2; j++) {
> +
> +   /* disabled for pass #0, enabled for pass #1 */
> +   of_phandle_ht_is_disabled = j == 0;
> +
> +   start = ktime_get_raw();
> +   for (i = 0; i < PHANDLE_LOOKUPS; i++) {
> +   ph = prandom_u32() % max_phandle;
> +   node = of_find_node_by_phandle(ph);
> +   of_node_put(node);
> +   }
> +   end = ktime_get_raw();
> +
> +   dur[j] = ktime_to_us(end) - ktime_to_us(start);
> +   pr_debug("#%d lookups in %lld us (%s)\n",

$u

> +   PHANDLE_LOOKUPS, dur[j],
> +   j == 0 ? "original" : "hashed");
> +   }
> +
> +   unittest(dur[0] > dur[1], "Non hashing phandles are faster!?");
> +
> +   dec = (int)div64_s64(dur[0] * 10 + 5, dur[1]);

I'd expect div64_u64(), if not for ktime_to_us() returning s64...

> +   frac = dec % 10;
> +   dec /= 10;
> +   pr_info("the hash method is %d.%d times faster than the original\n",

%u.%u once dec and frac are unsigned.

> +   dec, frac);
> +}

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH] ftrace/x86: Fix function graph tracer reset path

2016-05-16 Thread Borislav Petkov
On Mon, May 16, 2016 at 11:24:53PM +0900, Namhyung Kim wrote:
> > -GLOBAL(ftrace_stub)
> > +/* This is weak to keep gas from relaxing the jumps */
> > +WEAK(ftrace_stub)
> > retq
> >  END(ftrace_caller)

You could also force the 5-byte jump. I guess you could also write
simply ".long 0" in there but this way it is more robust if someone
decides to add other stuff between the JMP and the ftrace_stub label.

---
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index ed48a9f465f8..b1db8a584c06 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -179,7 +179,9 @@ GLOBAL(ftrace_epilogue)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 GLOBAL(ftrace_graph_call)
-   jmp ftrace_stub
+   .byte 0xe9
+   .long ftrace_stub - 1f
+1:
 #endif
 
 GLOBAL(ftrace_stub)

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


Re: [PATCH v2 2/2] phy dp83867: Make rgmii parameters optional

2016-05-16 Thread Florian Fainelli
On 05/16/2016 11:52 AM, Alexander Graf wrote:
> If you compile without OF_MDIO support in an RGMII configuration, we fail
> to configure the dp83867 phy today by writing garbage into its configuration
> registers.
> 
> On the other hand if you do compile with OF_MDIO and the phy gets loaded via
> device tree, you have to have the properties set in the device tree, otherwise
> we fail to load the driver and don't even attach the generic phy driver to
> the interface anymore.
> 
> To make things slightly more consistent, make the rgmii configuration 
> properties
> optional and allow a user to omit them in their device tree.
> 
> Signed-off-by: Alexander Graf 
> ---
>  drivers/net/phy/dp83867.c | 31 ---
>  1 file changed, 28 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
> index 94cc278..1b01680 100644
> --- a/drivers/net/phy/dp83867.c
> +++ b/drivers/net/phy/dp83867.c
> @@ -65,6 +65,7 @@ struct dp83867_private {
>   int rx_id_delay;
>   int tx_id_delay;
>   int fifo_depth;
> + int values_are_sane;

This could be a boolean type.

>  };
>  
>  static int dp83867_ack_interrupt(struct phy_device *phydev)
> @@ -113,15 +114,30 @@ static int dp83867_of_init(struct phy_device *phydev)
>   ret = of_property_read_u32(of_node, "ti,rx-internal-delay",
>  >rx_id_delay);
>   if (ret)
> - return ret;
> + goto invalid_dt;
>  
>   ret = of_property_read_u32(of_node, "ti,tx-internal-delay",
>  >tx_id_delay);
>   if (ret)
> - return ret;
> + goto invalid_dt;
>  
> - return of_property_read_u32(of_node, "ti,fifo-depth",
> + ret = of_property_read_u32(of_node, "ti,fifo-depth",
>  >fifo_depth);
> + if (ret)
> + goto invalid_dt;
> +
> + dp83867->values_are_sane = 1;
> +
> + return 0;
> +
> +invalid_dt:
> + phydev_err(phydev, "missing properties in device tree");

phydev_warn() maybe?

Other than that, this looks okay to me.
-- 
Florian


Re: [PATCH] ftrace/x86: Fix function graph tracer reset path

2016-05-16 Thread Borislav Petkov
On Mon, May 16, 2016 at 11:24:53PM +0900, Namhyung Kim wrote:
> > -GLOBAL(ftrace_stub)
> > +/* This is weak to keep gas from relaxing the jumps */
> > +WEAK(ftrace_stub)
> > retq
> >  END(ftrace_caller)

You could also force the 5-byte jump. I guess you could also write
simply ".long 0" in there but this way it is more robust if someone
decides to add other stuff between the JMP and the ftrace_stub label.

---
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index ed48a9f465f8..b1db8a584c06 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -179,7 +179,9 @@ GLOBAL(ftrace_epilogue)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 GLOBAL(ftrace_graph_call)
-   jmp ftrace_stub
+   .byte 0xe9
+   .long ftrace_stub - 1f
+1:
 #endif
 
 GLOBAL(ftrace_stub)

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


Re: [PATCH v2 2/2] phy dp83867: Make rgmii parameters optional

2016-05-16 Thread Florian Fainelli
On 05/16/2016 11:52 AM, Alexander Graf wrote:
> If you compile without OF_MDIO support in an RGMII configuration, we fail
> to configure the dp83867 phy today by writing garbage into its configuration
> registers.
> 
> On the other hand if you do compile with OF_MDIO and the phy gets loaded via
> device tree, you have to have the properties set in the device tree, otherwise
> we fail to load the driver and don't even attach the generic phy driver to
> the interface anymore.
> 
> To make things slightly more consistent, make the rgmii configuration 
> properties
> optional and allow a user to omit them in their device tree.
> 
> Signed-off-by: Alexander Graf 
> ---
>  drivers/net/phy/dp83867.c | 31 ---
>  1 file changed, 28 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
> index 94cc278..1b01680 100644
> --- a/drivers/net/phy/dp83867.c
> +++ b/drivers/net/phy/dp83867.c
> @@ -65,6 +65,7 @@ struct dp83867_private {
>   int rx_id_delay;
>   int tx_id_delay;
>   int fifo_depth;
> + int values_are_sane;

This could be a boolean type.

>  };
>  
>  static int dp83867_ack_interrupt(struct phy_device *phydev)
> @@ -113,15 +114,30 @@ static int dp83867_of_init(struct phy_device *phydev)
>   ret = of_property_read_u32(of_node, "ti,rx-internal-delay",
>  >rx_id_delay);
>   if (ret)
> - return ret;
> + goto invalid_dt;
>  
>   ret = of_property_read_u32(of_node, "ti,tx-internal-delay",
>  >tx_id_delay);
>   if (ret)
> - return ret;
> + goto invalid_dt;
>  
> - return of_property_read_u32(of_node, "ti,fifo-depth",
> + ret = of_property_read_u32(of_node, "ti,fifo-depth",
>  >fifo_depth);
> + if (ret)
> + goto invalid_dt;
> +
> + dp83867->values_are_sane = 1;
> +
> + return 0;
> +
> +invalid_dt:
> + phydev_err(phydev, "missing properties in device tree");

phydev_warn() maybe?

Other than that, this looks okay to me.
-- 
Florian


[PATCH v2 1/2] phy dp83867: Fix compilation with CONFIG_OF_MDIO=m

2016-05-16 Thread Alexander Graf
When CONFIG_OF_MDIO is configured as module, the #define for it really
is CONFIG_OF_MDIO_MODULE, not CONFIG_OF_MDIO. So if we are compiling it
as module, the dp83867 doesn't see that OF_MDIO was selected and doesn't
read the dt rgmii parameters.

The fix is simple: Use IS_ENABLED(). It checks for both - module as well
as compiled in code.

Signed-off-by: Alexander Graf 
---
 drivers/net/phy/dp83867.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 2afa61b..94cc278 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -99,7 +99,7 @@ static int dp83867_config_intr(struct phy_device *phydev)
return phy_write(phydev, MII_DP83867_MICR, micr_status);
 }
 
-#ifdef CONFIG_OF_MDIO
+#if IS_ENABLED(CONFIG_OF_MDIO)
 static int dp83867_of_init(struct phy_device *phydev)
 {
struct dp83867_private *dp83867 = phydev->priv;
-- 
1.8.5.6



[PATCH v2 1/2] phy dp83867: Fix compilation with CONFIG_OF_MDIO=m

2016-05-16 Thread Alexander Graf
When CONFIG_OF_MDIO is configured as module, the #define for it really
is CONFIG_OF_MDIO_MODULE, not CONFIG_OF_MDIO. So if we are compiling it
as module, the dp83867 doesn't see that OF_MDIO was selected and doesn't
read the dt rgmii parameters.

The fix is simple: Use IS_ENABLED(). It checks for both - module as well
as compiled in code.

Signed-off-by: Alexander Graf 
---
 drivers/net/phy/dp83867.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 2afa61b..94cc278 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -99,7 +99,7 @@ static int dp83867_config_intr(struct phy_device *phydev)
return phy_write(phydev, MII_DP83867_MICR, micr_status);
 }
 
-#ifdef CONFIG_OF_MDIO
+#if IS_ENABLED(CONFIG_OF_MDIO)
 static int dp83867_of_init(struct phy_device *phydev)
 {
struct dp83867_private *dp83867 = phydev->priv;
-- 
1.8.5.6



Re: Please review arch/x86/kernel/pvclock.c to fix Docker/Mono crashes in new Kernels

2016-05-16 Thread Linus Torvalds
On Mon, May 16, 2016 at 11:37 AM, Andy Lutomirski  wrote:
>
> All of those fixes were intended to fix incorrect times being
> reported, not segfaults.  Weird.

I'm assuming it's "time going backwards". I can easily see that
causing segfaults.

I've seen lots of code that timestamps events, and can easily imagine
confusion if the end result is not ordered (ie walking off the
beginning/end of a list or array or similar because the algorithm
"knows" that the events are ordered).

I agree that the original bisection result is a bit questionable, and
it might just be about exposing a timing issue.

   Linus


Re: Please review arch/x86/kernel/pvclock.c to fix Docker/Mono crashes in new Kernels

2016-05-16 Thread Linus Torvalds
On Mon, May 16, 2016 at 11:37 AM, Andy Lutomirski  wrote:
>
> All of those fixes were intended to fix incorrect times being
> reported, not segfaults.  Weird.

I'm assuming it's "time going backwards". I can easily see that
causing segfaults.

I've seen lots of code that timestamps events, and can easily imagine
confusion if the end result is not ordered (ie walking off the
beginning/end of a list or array or similar because the algorithm
"knows" that the events are ordered).

I agree that the original bisection result is a bit questionable, and
it might just be about exposing a timing issue.

   Linus


UBSAN: Undefined behaviour in arch/x86/events/intel/p6.c:115:29

2016-05-16 Thread Meelis Roos
Not sure if this is a genuine warning or a false positive but since some 
UBSAN warnings have been real and google does not find report about this 
specific warning, I'll send it in anyway.

I have seen similar amd pmu warnings from UBSAN but I do not have any 
amd machines from that time frame online for now, so p6 only.

[0.15] Performance Events: p6 PMU driver.
[0.15] 

[0.15] UBSAN: Undefined behaviour in arch/x86/events/intel/p6.c:115:29
[0.15] index 8 is out of range for type 'u64 [8]'
[0.15] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0 #21
[0.15] Hardware name: Dell Computer Corporation PowerEdge 1550/933  
, BIOS A09 12/10/2004
[0.15]   c13a4bcc 0046 f605de88 0008 c13d5188 c17ddfd4 
c13d5725
[0.15]  c176ae8c f605de8c c17ddfec 0202 0038 00752101 0002 

[0.15]   0297 00c2 c17d2b60  c102b14f 0008 

[0.15] Call Trace:
[0.15]  [] ? dump_stack+0x45/0x69
[0.15]  [] ? ubsan_epilogue+0x8/0x30
[0.15]  [] ? __ubsan_handle_out_of_bounds+0x55/0x60
[0.15]  [] ? __register_nmi_handler+0xbf/0x300
[0.15]  [] ? p4_pmu_schedule_events+0x740/0x740
[0.15]  [] ? p6_pmu_event_map+0x3d/0x50
[0.15]  [] ? p4_pmu_schedule_events+0x740/0x740
[0.15]  [] ? init_hw_perf_events+0x493/0x688
[0.15]  [] ? merge_attr+0x1d5/0x1d5
[0.15]  [] ? do_one_initcall+0x82/0x230
[0.15]  [] ? vprintk_default+0xf/0x20
[0.15]  [] ? printk+0x11/0x12
[0.15]  [] ? print_cpu_info+0x86/0x130
[0.15]  [] ? native_smp_prepare_cpus+0x40e/0x453
[0.15]  [] ? kernel_init_freeable+0x117/0x2fd
[0.15]  [] ? kernel_init+0x6/0x100
[0.15]  [] ? ret_from_kernel_thread+0x21/0x38
[0.15]  [] ? rest_init+0x60/0x60
[0.15] 

[0.15] 

[0.15] UBSAN: Undefined behaviour in arch/x86/events/intel/p6.c:115:9
[0.15] load of address c16adf20 with insufficient space
[0.15] for an object of type 'const u64'
[0.15] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0 #21
[0.15] Hardware name: Dell Computer Corporation PowerEdge 1550/933  
, BIOS A09 12/10/2004
[0.15]   c13a4bcc 0046 f605deb4 c16adf20 c13d5188 c17ddfac 
c13d5229
[0.15]  c176a901 c17ddfc8 c176aca4 c176a95e c16adf20 0202 0008 

[0.15]  c101841d 0008 c10183d0 0008 c1af0c78 c17d2c00 f605df08 
0001
[0.15] Call Trace:
[0.15]  [] ? dump_stack+0x45/0x69
[0.15]  [] ? ubsan_epilogue+0x8/0x30
[0.15]  [] ? __ubsan_handle_type_mismatch+0x79/0x150
[0.15]  [] ? p6_pmu_event_map+0x4d/0x50
[0.15]  [] ? p4_pmu_schedule_events+0x740/0x740
[0.15]  [] ? init_hw_perf_events+0x493/0x688
[0.15]  [] ? merge_attr+0x1d5/0x1d5
[0.15]  [] ? do_one_initcall+0x82/0x230
[0.15]  [] ? vprintk_default+0xf/0x20
[0.15]  [] ? printk+0x11/0x12
[0.15]  [] ? print_cpu_info+0x86/0x130
[0.15]  [] ? native_smp_prepare_cpus+0x40e/0x453
[0.15]  [] ? kernel_init_freeable+0x117/0x2fd
[0.15]  [] ? kernel_init+0x6/0x100
[0.15]  [] ? ret_from_kernel_thread+0x21/0x38
[0.15]  [] ? rest_init+0x60/0x60
[0.15] 

[0.15] ... version:0
[0.15] ... bit width:  32
[0.15] ... generic registers:  2
[0.15] ... value mask: 
[0.15] ... max period: 7fff
[0.15] ... fixed-purpose events:   0
[0.15] ... event mask: 0003



-- 
Meelis Roos (mr...@linux.ee)


UBSAN: Undefined behaviour in arch/x86/events/intel/p6.c:115:29

2016-05-16 Thread Meelis Roos
Not sure if this is a genuine warning or a false positive but since some 
UBSAN warnings have been real and google does not find report about this 
specific warning, I'll send it in anyway.

I have seen similar amd pmu warnings from UBSAN but I do not have any 
amd machines from that time frame online for now, so p6 only.

[0.15] Performance Events: p6 PMU driver.
[0.15] 

[0.15] UBSAN: Undefined behaviour in arch/x86/events/intel/p6.c:115:29
[0.15] index 8 is out of range for type 'u64 [8]'
[0.15] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0 #21
[0.15] Hardware name: Dell Computer Corporation PowerEdge 1550/933  
, BIOS A09 12/10/2004
[0.15]   c13a4bcc 0046 f605de88 0008 c13d5188 c17ddfd4 
c13d5725
[0.15]  c176ae8c f605de8c c17ddfec 0202 0038 00752101 0002 

[0.15]   0297 00c2 c17d2b60  c102b14f 0008 

[0.15] Call Trace:
[0.15]  [] ? dump_stack+0x45/0x69
[0.15]  [] ? ubsan_epilogue+0x8/0x30
[0.15]  [] ? __ubsan_handle_out_of_bounds+0x55/0x60
[0.15]  [] ? __register_nmi_handler+0xbf/0x300
[0.15]  [] ? p4_pmu_schedule_events+0x740/0x740
[0.15]  [] ? p6_pmu_event_map+0x3d/0x50
[0.15]  [] ? p4_pmu_schedule_events+0x740/0x740
[0.15]  [] ? init_hw_perf_events+0x493/0x688
[0.15]  [] ? merge_attr+0x1d5/0x1d5
[0.15]  [] ? do_one_initcall+0x82/0x230
[0.15]  [] ? vprintk_default+0xf/0x20
[0.15]  [] ? printk+0x11/0x12
[0.15]  [] ? print_cpu_info+0x86/0x130
[0.15]  [] ? native_smp_prepare_cpus+0x40e/0x453
[0.15]  [] ? kernel_init_freeable+0x117/0x2fd
[0.15]  [] ? kernel_init+0x6/0x100
[0.15]  [] ? ret_from_kernel_thread+0x21/0x38
[0.15]  [] ? rest_init+0x60/0x60
[0.15] 

[0.15] 

[0.15] UBSAN: Undefined behaviour in arch/x86/events/intel/p6.c:115:9
[0.15] load of address c16adf20 with insufficient space
[0.15] for an object of type 'const u64'
[0.15] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0 #21
[0.15] Hardware name: Dell Computer Corporation PowerEdge 1550/933  
, BIOS A09 12/10/2004
[0.15]   c13a4bcc 0046 f605deb4 c16adf20 c13d5188 c17ddfac 
c13d5229
[0.15]  c176a901 c17ddfc8 c176aca4 c176a95e c16adf20 0202 0008 

[0.15]  c101841d 0008 c10183d0 0008 c1af0c78 c17d2c00 f605df08 
0001
[0.15] Call Trace:
[0.15]  [] ? dump_stack+0x45/0x69
[0.15]  [] ? ubsan_epilogue+0x8/0x30
[0.15]  [] ? __ubsan_handle_type_mismatch+0x79/0x150
[0.15]  [] ? p6_pmu_event_map+0x4d/0x50
[0.15]  [] ? p4_pmu_schedule_events+0x740/0x740
[0.15]  [] ? init_hw_perf_events+0x493/0x688
[0.15]  [] ? merge_attr+0x1d5/0x1d5
[0.15]  [] ? do_one_initcall+0x82/0x230
[0.15]  [] ? vprintk_default+0xf/0x20
[0.15]  [] ? printk+0x11/0x12
[0.15]  [] ? print_cpu_info+0x86/0x130
[0.15]  [] ? native_smp_prepare_cpus+0x40e/0x453
[0.15]  [] ? kernel_init_freeable+0x117/0x2fd
[0.15]  [] ? kernel_init+0x6/0x100
[0.15]  [] ? ret_from_kernel_thread+0x21/0x38
[0.15]  [] ? rest_init+0x60/0x60
[0.15] 

[0.15] ... version:0
[0.15] ... bit width:  32
[0.15] ... generic registers:  2
[0.15] ... value mask: 
[0.15] ... max period: 7fff
[0.15] ... fixed-purpose events:   0
[0.15] ... event mask: 0003



-- 
Meelis Roos (mr...@linux.ee)


[PATCH v2 2/2] phy dp83867: Make rgmii parameters optional

2016-05-16 Thread Alexander Graf
If you compile without OF_MDIO support in an RGMII configuration, we fail
to configure the dp83867 phy today by writing garbage into its configuration
registers.

On the other hand if you do compile with OF_MDIO and the phy gets loaded via
device tree, you have to have the properties set in the device tree, otherwise
we fail to load the driver and don't even attach the generic phy driver to
the interface anymore.

To make things slightly more consistent, make the rgmii configuration properties
optional and allow a user to omit them in their device tree.

Signed-off-by: Alexander Graf 
---
 drivers/net/phy/dp83867.c | 31 ---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 94cc278..1b01680 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -65,6 +65,7 @@ struct dp83867_private {
int rx_id_delay;
int tx_id_delay;
int fifo_depth;
+   int values_are_sane;
 };
 
 static int dp83867_ack_interrupt(struct phy_device *phydev)
@@ -113,15 +114,30 @@ static int dp83867_of_init(struct phy_device *phydev)
ret = of_property_read_u32(of_node, "ti,rx-internal-delay",
   >rx_id_delay);
if (ret)
-   return ret;
+   goto invalid_dt;
 
ret = of_property_read_u32(of_node, "ti,tx-internal-delay",
   >tx_id_delay);
if (ret)
-   return ret;
+   goto invalid_dt;
 
-   return of_property_read_u32(of_node, "ti,fifo-depth",
+   ret = of_property_read_u32(of_node, "ti,fifo-depth",
   >fifo_depth);
+   if (ret)
+   goto invalid_dt;
+
+   dp83867->values_are_sane = 1;
+
+   return 0;
+
+invalid_dt:
+   phydev_err(phydev, "missing properties in device tree");
+
+   /*
+* We can still run with a broken dt by not using any of the optional
+* parameters, so just don't set dp83867->values_are_sane.
+*/
+   return 0;
 }
 #else
 static int dp83867_of_init(struct phy_device *phydev)
@@ -150,6 +166,15 @@ static int dp83867_config_init(struct phy_device *phydev)
dp83867 = (struct dp83867_private *)phydev->priv;
}
 
+   /*
+* With no or broken device tree, we don't have the values that we would
+* want to configure the phy with. In that case, cross our fingers and
+* assume that firmware did everything correctly for us or that we don't
+* need them.
+*/
+   if (!dp83867->values_are_sane)
+   return 0;
+
if (phy_interface_is_rgmii(phydev)) {
ret = phy_write(phydev, MII_DP83867_PHYCTRL,
(dp83867->fifo_depth << 
DP83867_PHYCR_FIFO_DEPTH_SHIFT));
-- 
1.8.5.6



[PATCH v2 2/2] phy dp83867: Make rgmii parameters optional

2016-05-16 Thread Alexander Graf
If you compile without OF_MDIO support in an RGMII configuration, we fail
to configure the dp83867 phy today by writing garbage into its configuration
registers.

On the other hand if you do compile with OF_MDIO and the phy gets loaded via
device tree, you have to have the properties set in the device tree, otherwise
we fail to load the driver and don't even attach the generic phy driver to
the interface anymore.

To make things slightly more consistent, make the rgmii configuration properties
optional and allow a user to omit them in their device tree.

Signed-off-by: Alexander Graf 
---
 drivers/net/phy/dp83867.c | 31 ---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 94cc278..1b01680 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -65,6 +65,7 @@ struct dp83867_private {
int rx_id_delay;
int tx_id_delay;
int fifo_depth;
+   int values_are_sane;
 };
 
 static int dp83867_ack_interrupt(struct phy_device *phydev)
@@ -113,15 +114,30 @@ static int dp83867_of_init(struct phy_device *phydev)
ret = of_property_read_u32(of_node, "ti,rx-internal-delay",
   >rx_id_delay);
if (ret)
-   return ret;
+   goto invalid_dt;
 
ret = of_property_read_u32(of_node, "ti,tx-internal-delay",
   >tx_id_delay);
if (ret)
-   return ret;
+   goto invalid_dt;
 
-   return of_property_read_u32(of_node, "ti,fifo-depth",
+   ret = of_property_read_u32(of_node, "ti,fifo-depth",
   >fifo_depth);
+   if (ret)
+   goto invalid_dt;
+
+   dp83867->values_are_sane = 1;
+
+   return 0;
+
+invalid_dt:
+   phydev_err(phydev, "missing properties in device tree");
+
+   /*
+* We can still run with a broken dt by not using any of the optional
+* parameters, so just don't set dp83867->values_are_sane.
+*/
+   return 0;
 }
 #else
 static int dp83867_of_init(struct phy_device *phydev)
@@ -150,6 +166,15 @@ static int dp83867_config_init(struct phy_device *phydev)
dp83867 = (struct dp83867_private *)phydev->priv;
}
 
+   /*
+* With no or broken device tree, we don't have the values that we would
+* want to configure the phy with. In that case, cross our fingers and
+* assume that firmware did everything correctly for us or that we don't
+* need them.
+*/
+   if (!dp83867->values_are_sane)
+   return 0;
+
if (phy_interface_is_rgmii(phydev)) {
ret = phy_write(phydev, MII_DP83867_PHYCTRL,
(dp83867->fifo_depth << 
DP83867_PHYCR_FIFO_DEPTH_SHIFT));
-- 
1.8.5.6



[PATCH v2 omap 5/6] arm: Add _rcuidle suffix to allow rpm_resume() to be called from idle

2016-05-16 Thread Paul E. McKenney
This commit applies another _rcuidle suffix to fix an RCU use from
idle.

> ===
> [ INFO: suspicious RCU usage. ]
> 4.6.0-rc5-next-20160426+ #1122 Not tainted
> ---
> include/trace/events/rpm.h:69 suspicious rcu_dereference_check() usage!
>
> other info that might help us debug this:
>
>
> RCU used illegally from idle CPU!
> rcu_scheduler_active = 1, debug_locks = 0
> RCU used illegally from extended quiescent state!
> 1 lock held by swapper/0/0:
>  #0:  (&(>power.lock)->rlock){-.-...}, at: [] 
> __pm_runtime_resume+0x3c/0x64
>
> stack backtrace:
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1122
> Hardware name: Generic OMAP36xx (Flattened Device Tree)
> [] (unwind_backtrace) from [] (show_stack+0x10/0x14)
> [] (show_stack) from [] (dump_stack+0xb0/0xe4)
> [] (dump_stack) from [] (rpm_resume+0x5cc/0x7f4)
> [] (rpm_resume) from [] (__pm_runtime_resume+0x4c/0x64)
> [] (__pm_runtime_resume) from [] 
> (omap2_gpio_resume_after_idle+0x54/0x68)
> [] (omap2_gpio_resume_after_idle) from [] 
> (omap3_enter_idle_bm+0xfc/0x1ec)
> [] (omap3_enter_idle_bm) from [] 
> (cpuidle_enter_state+0x80/0x3d4)
> [] (cpuidle_enter_state) from [] 
> (cpu_startup_entry+0x198/0x3a0)
> [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8)
> [] (start_kernel) from [<8000807c>] (0x8000807c)

Reported-by: Tony Lindgren 
Signed-off-by: Paul E. McKenney 
Tested-by: Tony Lindgren 
Cc: Russell King 
Cc: Steven Rostedt 
Cc: "Rafael J. Wysocki" 
Cc: 
Cc: 
Cc: 
---
 drivers/base/power/runtime.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index e4c2b8fdeff3..15b6d5b199d4 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -601,7 +601,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
struct device *parent = NULL;
int retval = 0;
 
-   trace_rpm_resume(dev, rpmflags);
+   trace_rpm_resume_rcuidle(dev, rpmflags);
 
  repeat:
if (dev->power.runtime_error)
@@ -764,7 +764,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
spin_lock_irq(>power.lock);
}
 
-   trace_rpm_return_int(dev, _THIS_IP_, retval);
+   trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
 
return retval;
 }
-- 
2.5.2



Re: [RFC PATCH 4/2] namei: Improve hash mixing if CONFIG_DCACHE_WORD_ACCESS

2016-05-16 Thread Linus Torvalds
On Mon, May 2, 2016 at 3:31 AM, George Spelvin  wrote:
> The hash mixing between adding the next 64 bits of name
> was just a bit weak.
>
> Replaced with a still very fast but slightly more effective
> mixing function.

I'e applied this patch independently of all your other hash rework to my tree.

I verified that the code generation for the inner loop is still fine,
and it does look like a much better mixing function, as well as just
clean up the code.

I hope to get new versions of the actual  fixes during
this merge window from you.

Thanks,

  Linus


[PATCH v2 omap 5/6] arm: Add _rcuidle suffix to allow rpm_resume() to be called from idle

2016-05-16 Thread Paul E. McKenney
This commit applies another _rcuidle suffix to fix an RCU use from
idle.

> ===
> [ INFO: suspicious RCU usage. ]
> 4.6.0-rc5-next-20160426+ #1122 Not tainted
> ---
> include/trace/events/rpm.h:69 suspicious rcu_dereference_check() usage!
>
> other info that might help us debug this:
>
>
> RCU used illegally from idle CPU!
> rcu_scheduler_active = 1, debug_locks = 0
> RCU used illegally from extended quiescent state!
> 1 lock held by swapper/0/0:
>  #0:  (&(>power.lock)->rlock){-.-...}, at: [] 
> __pm_runtime_resume+0x3c/0x64
>
> stack backtrace:
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1122
> Hardware name: Generic OMAP36xx (Flattened Device Tree)
> [] (unwind_backtrace) from [] (show_stack+0x10/0x14)
> [] (show_stack) from [] (dump_stack+0xb0/0xe4)
> [] (dump_stack) from [] (rpm_resume+0x5cc/0x7f4)
> [] (rpm_resume) from [] (__pm_runtime_resume+0x4c/0x64)
> [] (__pm_runtime_resume) from [] 
> (omap2_gpio_resume_after_idle+0x54/0x68)
> [] (omap2_gpio_resume_after_idle) from [] 
> (omap3_enter_idle_bm+0xfc/0x1ec)
> [] (omap3_enter_idle_bm) from [] 
> (cpuidle_enter_state+0x80/0x3d4)
> [] (cpuidle_enter_state) from [] 
> (cpu_startup_entry+0x198/0x3a0)
> [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8)
> [] (start_kernel) from [<8000807c>] (0x8000807c)

Reported-by: Tony Lindgren 
Signed-off-by: Paul E. McKenney 
Tested-by: Tony Lindgren 
Cc: Russell King 
Cc: Steven Rostedt 
Cc: "Rafael J. Wysocki" 
Cc: 
Cc: 
Cc: 
---
 drivers/base/power/runtime.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index e4c2b8fdeff3..15b6d5b199d4 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -601,7 +601,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
struct device *parent = NULL;
int retval = 0;
 
-   trace_rpm_resume(dev, rpmflags);
+   trace_rpm_resume_rcuidle(dev, rpmflags);
 
  repeat:
if (dev->power.runtime_error)
@@ -764,7 +764,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
spin_lock_irq(>power.lock);
}
 
-   trace_rpm_return_int(dev, _THIS_IP_, retval);
+   trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
 
return retval;
 }
-- 
2.5.2



Re: [RFC PATCH 4/2] namei: Improve hash mixing if CONFIG_DCACHE_WORD_ACCESS

2016-05-16 Thread Linus Torvalds
On Mon, May 2, 2016 at 3:31 AM, George Spelvin  wrote:
> The hash mixing between adding the next 64 bits of name
> was just a bit weak.
>
> Replaced with a still very fast but slightly more effective
> mixing function.

I'e applied this patch independently of all your other hash rework to my tree.

I verified that the code generation for the inner loop is still fine,
and it does look like a much better mixing function, as well as just
clean up the code.

I hope to get new versions of the actual  fixes during
this merge window from you.

Thanks,

  Linus


[PATCH v2 omap 6/6] arm: Use _rcuidle suffix to allow clk_core_enable() to used from idle

2016-05-16 Thread Paul E. McKenney
This commit fixes the RCU use-from-idle bug corresponding the following
splat:

> [ INFO: suspicious RCU usage. ]
> 4.6.0-rc5-next-20160426+ #1127 Not tainted
> ---
> include/trace/events/clk.h:45 suspicious rcu_dereference_check() usage!
>
> other info that might help us debug this:
>
>
> RCU used illegally from idle CPU!
> rcu_scheduler_active = 1, debug_locks = 0
> RCU used illegally from extended quiescent state!
> 2 locks held by swapper/0/0:
>  #0:  (>hwmod_key#30){..}, at: [] 
> omap_hwmod_enable+0x18/0x44
>  #1:  (enable_lock){..}, at: [] clk_enable_lock+0x18/0x124
>
> stack backtrace:
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1127
> Hardware name: Generic OMAP36xx (Flattened Device Tree)
> [] (unwind_backtrace) from [] (show_stack+0x10/0x14)
> [] (show_stack) from [] (dump_stack+0xb0/0xe4)
> [] (dump_stack) from [] (clk_core_enable+0x1e0/0x36c)
> [] (clk_core_enable) from [] (clk_enable+0x1c/0x38)
> [] (clk_enable) from [] (_enable_clocks+0x18/0x7c)
> [] (_enable_clocks) from [] (_enable+0x114/0x2ec)
> [] (_enable) from [] (omap_hwmod_enable+0x24/0x44)
> [] (omap_hwmod_enable) from [] 
> (omap_device_enable+0x3c/0x90)
> [] (omap_device_enable) from [] 
> (_od_runtime_resume+0x10/0x38)
> [] (_od_runtime_resume) from [] (__rpm_callback+0x2c/0x60)
> [] (__rpm_callback) from [] (rpm_callback+0x20/0x80)
> [] (rpm_callback) from [] (rpm_resume+0x3d0/0x6f0)
> [] (rpm_resume) from [] (__pm_runtime_resume+0x4c/0x64)
> [] (__pm_runtime_resume) from [] 
> (omap2_gpio_resume_after_idle+0x54/0x68)
> [] (omap2_gpio_resume_after_idle) from [] 
> (omap3_enter_idle_bm+0xfc/0x1ec)
> [] (omap3_enter_idle_bm) from [] 
> (cpuidle_enter_state+0x80/0x3d4)
> [] (cpuidle_enter_state) from [] 
> (cpu_startup_entry+0x198/0x3a0)
> [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8)
> [] (start_kernel) from [<8000807c>] (0x8000807c)

Reported-by: Tony Lindgren 
Signed-off-by: Paul E. McKenney 
Tested-by: Tony Lindgren 
Cc: Russell King 
Cc: Steven Rostedt 
Cc: Michael Turquette 
Cc: Stephen Boyd 
Cc: 
Cc: 
Cc: 
---
 drivers/clk/clk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 4fa43c02d682..ec83f404c1d2 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -735,12 +735,12 @@ static int clk_core_enable(struct clk_core *core)
if (ret)
return ret;
 
-   trace_clk_enable(core);
+   trace_clk_enable_rcuidle(core);
 
if (core->ops->enable)
ret = core->ops->enable(core->hw);
 
-   trace_clk_enable_complete(core);
+   trace_clk_enable_complete_rcuidle(core);
 
if (ret) {
clk_core_disable(core->parent);
-- 
2.5.2



[PATCH v2 omap 6/6] arm: Use _rcuidle suffix to allow clk_core_enable() to used from idle

2016-05-16 Thread Paul E. McKenney
This commit fixes the RCU use-from-idle bug corresponding the following
splat:

> [ INFO: suspicious RCU usage. ]
> 4.6.0-rc5-next-20160426+ #1127 Not tainted
> ---
> include/trace/events/clk.h:45 suspicious rcu_dereference_check() usage!
>
> other info that might help us debug this:
>
>
> RCU used illegally from idle CPU!
> rcu_scheduler_active = 1, debug_locks = 0
> RCU used illegally from extended quiescent state!
> 2 locks held by swapper/0/0:
>  #0:  (>hwmod_key#30){..}, at: [] 
> omap_hwmod_enable+0x18/0x44
>  #1:  (enable_lock){..}, at: [] clk_enable_lock+0x18/0x124
>
> stack backtrace:
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1127
> Hardware name: Generic OMAP36xx (Flattened Device Tree)
> [] (unwind_backtrace) from [] (show_stack+0x10/0x14)
> [] (show_stack) from [] (dump_stack+0xb0/0xe4)
> [] (dump_stack) from [] (clk_core_enable+0x1e0/0x36c)
> [] (clk_core_enable) from [] (clk_enable+0x1c/0x38)
> [] (clk_enable) from [] (_enable_clocks+0x18/0x7c)
> [] (_enable_clocks) from [] (_enable+0x114/0x2ec)
> [] (_enable) from [] (omap_hwmod_enable+0x24/0x44)
> [] (omap_hwmod_enable) from [] 
> (omap_device_enable+0x3c/0x90)
> [] (omap_device_enable) from [] 
> (_od_runtime_resume+0x10/0x38)
> [] (_od_runtime_resume) from [] (__rpm_callback+0x2c/0x60)
> [] (__rpm_callback) from [] (rpm_callback+0x20/0x80)
> [] (rpm_callback) from [] (rpm_resume+0x3d0/0x6f0)
> [] (rpm_resume) from [] (__pm_runtime_resume+0x4c/0x64)
> [] (__pm_runtime_resume) from [] 
> (omap2_gpio_resume_after_idle+0x54/0x68)
> [] (omap2_gpio_resume_after_idle) from [] 
> (omap3_enter_idle_bm+0xfc/0x1ec)
> [] (omap3_enter_idle_bm) from [] 
> (cpuidle_enter_state+0x80/0x3d4)
> [] (cpuidle_enter_state) from [] 
> (cpu_startup_entry+0x198/0x3a0)
> [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8)
> [] (start_kernel) from [<8000807c>] (0x8000807c)

Reported-by: Tony Lindgren 
Signed-off-by: Paul E. McKenney 
Tested-by: Tony Lindgren 
Cc: Russell King 
Cc: Steven Rostedt 
Cc: Michael Turquette 
Cc: Stephen Boyd 
Cc: 
Cc: 
Cc: 
---
 drivers/clk/clk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 4fa43c02d682..ec83f404c1d2 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -735,12 +735,12 @@ static int clk_core_enable(struct clk_core *core)
if (ret)
return ret;
 
-   trace_clk_enable(core);
+   trace_clk_enable_rcuidle(core);
 
if (core->ops->enable)
ret = core->ops->enable(core->hw);
 
-   trace_clk_enable_complete(core);
+   trace_clk_enable_complete_rcuidle(core);
 
if (ret) {
clk_core_disable(core->parent);
-- 
2.5.2



[PATCH v2 omap 1/6] arm: Use _rcuidle tracepoint to allow use from idle

2016-05-16 Thread Paul E. McKenney
Testing on ARM encountered the following pair of lockdep-RCU splats:



===
[ INFO: suspicious RCU usage. ]
4.6.0-rc4-next-20160422 #1 Not tainted
---
include/trace/events/power.h:328 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

RCU used illegally from idle CPU!
rcu_scheduler_active = 1, debug_locks = 0
RCU used illegally from extended quiescent state!
no locks held by swapper/0/0.

stack backtrace:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc4-next-20160422 #1
Hardware name: Generic OMAP3-GP (Flattened Device Tree)
[] (unwind_backtrace) from [] (show_stack+0x10/0x14)
[] (show_stack) from [] (dump_stack+0xa8/0xe0)
[] (dump_stack) from [] (pwrdm_set_next_pwrst+0xf8/0x1cc)
[] (pwrdm_set_next_pwrst) from [] 
(omap3_enter_idle_bm+0x1b8/0x1e8)
[] (omap3_enter_idle_bm) from [] 
(cpuidle_enter_state+0x84/0x408)
[] (cpuidle_enter_state) from [] 
(cpu_startup_entry+0x1c8/0x3f0)
[] (cpu_startup_entry) from [] (start_kernel+0x354/0x3cc)



[] (unwind_backtrace) from [] (show_stack+0x10/0x14)
[] (show_stack) from [] (dump_stack+0xa8/0xe0)
[] (dump_stack) from [] (_pwrdm_state_switch+0x188/0x32c)
[] (_pwrdm_state_switch) from [] 
(_pwrdm_post_transition_cb+0xc/0x14)
[] (_pwrdm_post_transition_cb) from [] 
(pwrdm_for_each+0x30/0x5c)
[] (pwrdm_for_each) from [] 
(pwrdm_post_transition+0x24/0x30)
[] (pwrdm_post_transition) from [] 
(omap_sram_idle+0xfc/0x240)
[] (omap_sram_idle) from [] (omap3_enter_idle_bm+0xf0/0x1e8)
[] (omap3_enter_idle_bm) from [] 
(cpuidle_enter_state+0x84/0x408)
[] (cpuidle_enter_state) from [] 
(cpu_startup_entry+0x1c8/0x3f0)
[] (cpu_startup_entry) from [] (start_kernel+0x354/0x3cc)



These are caused by event tracing from the idle loop, and they were
exposed by commit 293e2421fe25 ("rcu: Remove superfluous versions of
rcu_read_lock_sched_held()"), which suppressed some false negatives.
The current commit therefore adds the _rcuidle suffix to make RCU aware
of this implicit use of RCU by event tracing, thus preventing both splats.

Reported-by: Guenter Roeck 
Signed-off-by: Paul E. McKenney 
Tested-by: Guenter Roeck 
Tested-by: Tony Lindgren 
Cc: Russell King 
Reviewed-by: Steven Rostedt 
Cc: 
Cc: 
---
 arch/arm/mach-omap2/powerdomain.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mach-omap2/powerdomain.c 
b/arch/arm/mach-omap2/powerdomain.c
index 78af6d8cf2e2..daf2753de7aa 100644
--- a/arch/arm/mach-omap2/powerdomain.c
+++ b/arch/arm/mach-omap2/powerdomain.c
@@ -186,8 +186,9 @@ static int _pwrdm_state_switch(struct powerdomain *pwrdm, 
int flag)
trace_state = (PWRDM_TRACE_STATES_FLAG |
   ((next & OMAP_POWERSTATE_MASK) << 8) |
   ((prev & OMAP_POWERSTATE_MASK) << 0));
-   trace_power_domain_target(pwrdm->name, trace_state,
- smp_processor_id());
+   trace_power_domain_target_rcuidle(pwrdm->name,
+ trace_state,
+ smp_processor_id());
}
break;
default:
@@ -523,8 +524,8 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 
pwrst)
 
if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) {
/* Trace the pwrdm desired target state */
-   trace_power_domain_target(pwrdm->name, pwrst,
- smp_processor_id());
+   trace_power_domain_target_rcuidle(pwrdm->name, pwrst,
+ smp_processor_id());
/* Program the pwrdm desired target state */
ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst);
}
-- 
2.5.2



[PATCH v2 omap 1/6] arm: Use _rcuidle tracepoint to allow use from idle

2016-05-16 Thread Paul E. McKenney
Testing on ARM encountered the following pair of lockdep-RCU splats:



===
[ INFO: suspicious RCU usage. ]
4.6.0-rc4-next-20160422 #1 Not tainted
---
include/trace/events/power.h:328 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

RCU used illegally from idle CPU!
rcu_scheduler_active = 1, debug_locks = 0
RCU used illegally from extended quiescent state!
no locks held by swapper/0/0.

stack backtrace:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc4-next-20160422 #1
Hardware name: Generic OMAP3-GP (Flattened Device Tree)
[] (unwind_backtrace) from [] (show_stack+0x10/0x14)
[] (show_stack) from [] (dump_stack+0xa8/0xe0)
[] (dump_stack) from [] (pwrdm_set_next_pwrst+0xf8/0x1cc)
[] (pwrdm_set_next_pwrst) from [] 
(omap3_enter_idle_bm+0x1b8/0x1e8)
[] (omap3_enter_idle_bm) from [] 
(cpuidle_enter_state+0x84/0x408)
[] (cpuidle_enter_state) from [] 
(cpu_startup_entry+0x1c8/0x3f0)
[] (cpu_startup_entry) from [] (start_kernel+0x354/0x3cc)



[] (unwind_backtrace) from [] (show_stack+0x10/0x14)
[] (show_stack) from [] (dump_stack+0xa8/0xe0)
[] (dump_stack) from [] (_pwrdm_state_switch+0x188/0x32c)
[] (_pwrdm_state_switch) from [] 
(_pwrdm_post_transition_cb+0xc/0x14)
[] (_pwrdm_post_transition_cb) from [] 
(pwrdm_for_each+0x30/0x5c)
[] (pwrdm_for_each) from [] 
(pwrdm_post_transition+0x24/0x30)
[] (pwrdm_post_transition) from [] 
(omap_sram_idle+0xfc/0x240)
[] (omap_sram_idle) from [] (omap3_enter_idle_bm+0xf0/0x1e8)
[] (omap3_enter_idle_bm) from [] 
(cpuidle_enter_state+0x84/0x408)
[] (cpuidle_enter_state) from [] 
(cpu_startup_entry+0x1c8/0x3f0)
[] (cpu_startup_entry) from [] (start_kernel+0x354/0x3cc)



These are caused by event tracing from the idle loop, and they were
exposed by commit 293e2421fe25 ("rcu: Remove superfluous versions of
rcu_read_lock_sched_held()"), which suppressed some false negatives.
The current commit therefore adds the _rcuidle suffix to make RCU aware
of this implicit use of RCU by event tracing, thus preventing both splats.

Reported-by: Guenter Roeck 
Signed-off-by: Paul E. McKenney 
Tested-by: Guenter Roeck 
Tested-by: Tony Lindgren 
Cc: Russell King 
Reviewed-by: Steven Rostedt 
Cc: 
Cc: 
---
 arch/arm/mach-omap2/powerdomain.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mach-omap2/powerdomain.c 
b/arch/arm/mach-omap2/powerdomain.c
index 78af6d8cf2e2..daf2753de7aa 100644
--- a/arch/arm/mach-omap2/powerdomain.c
+++ b/arch/arm/mach-omap2/powerdomain.c
@@ -186,8 +186,9 @@ static int _pwrdm_state_switch(struct powerdomain *pwrdm, 
int flag)
trace_state = (PWRDM_TRACE_STATES_FLAG |
   ((next & OMAP_POWERSTATE_MASK) << 8) |
   ((prev & OMAP_POWERSTATE_MASK) << 0));
-   trace_power_domain_target(pwrdm->name, trace_state,
- smp_processor_id());
+   trace_power_domain_target_rcuidle(pwrdm->name,
+ trace_state,
+ smp_processor_id());
}
break;
default:
@@ -523,8 +524,8 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 
pwrst)
 
if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) {
/* Trace the pwrdm desired target state */
-   trace_power_domain_target(pwrdm->name, pwrst,
- smp_processor_id());
+   trace_power_domain_target_rcuidle(pwrdm->name, pwrst,
+ smp_processor_id());
/* Program the pwrdm desired target state */
ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst);
}
-- 
2.5.2



[PATCH v2 omap 2/6] arm: Use _rcuidle for suspend/resume tracepoints

2016-05-16 Thread Paul E. McKenney
Further testing with false negatives suppressed by commit 293e2421fe25
("rcu: Remove superfluous versions of rcu_read_lock_sched_held()")
identified a few more unprotected uses of RCU from the idle loop.
Because RCU actively ignores idle-loop code (for energy-efficiency
reasons, among other things), using RCU from the idle loop can result
in too-short grace periods, in turn resulting in arbitrary misbehavior.

The affected functions are smp_cross_call() and rpm_suspend().

The resulting lockdep-RCU splats are as follows:



===
[ INFO: suspicious RCU usage. ]
4.6.0-rc5-next-20160426+ #1112 Not tainted
---
include/trace/events/ipi.h:35 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

RCU used illegally from idle CPU!
rcu_scheduler_active = 1, debug_locks = 0
RCU used illegally from extended quiescent state!
no locks held by swapper/0/0.

stack backtrace:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1112
Hardware name: Generic OMAP4 (Flattened Device Tree)
[] (unwind_backtrace) from [] (show_stack+0x10/0x14)
[] (show_stack) from [] (dump_stack+0xb0/0xe4)
[] (dump_stack) from [] (smp_cross_call+0xbc/0x188)
[] (smp_cross_call) from [] (generic_exec_single+0x9c/0x15c)
[] (generic_exec_single) from [] 
(smp_call_function_single_async+0 x38/0x9c)
[] (smp_call_function_single_async) from [] 
(cpuidle_coupled_poke_others+0x8c/0xa8)
[] (cpuidle_coupled_poke_others) from [] 
(cpuidle_enter_state_coupled+0x26c/0x390)
[] (cpuidle_enter_state_coupled) from [] 
(cpu_startup_entry+0x198/0x3a0)
[] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8)
[] (start_kernel) from [<8000807c>] (0x8000807c)



Warning from omap3
===
[ INFO: suspicious RCU usage. ]
4.6.0-rc5-next-20160426+ #1112 Not tainted
---
include/trace/events/rpm.h:63 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

RCU used illegally from idle CPU!
rcu_scheduler_active = 1, debug_locks = 0
RCU used illegally from extended quiescent state!
1 lock held by swapper/0/0:
 #0:  (&(>power.lock)->rlock){-.-...}, at: [] 
__pm_runtime_suspend+0x54/0x84

stack backtrace:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1112
Hardware name: Generic OMAP36xx (Flattened Device Tree)
[] (unwind_backtrace) from [] (show_stack+0x10/0x14)
[] (show_stack) from [] (dump_stack+0xb0/0xe4)
[] (dump_stack) from [] (rpm_suspend+0x604/0x7e4)
[] (rpm_suspend) from [] (__pm_runtime_suspend+0x64/0x84)
[] (__pm_runtime_suspend) from [] 
(omap2_gpio_prepare_for_idle+0x5c/0x70)
[] (omap2_gpio_prepare_for_idle) from [] 
(omap_sram_idle+0x140/0x244)
[] (omap_sram_idle) from [] (omap3_enter_idle_bm+0xfc/0x1ec)
[] (omap3_enter_idle_bm) from [] 
(cpuidle_enter_state+0x80/0x3d4)
[] (cpuidle_enter_state) from [] 
(cpu_startup_entry+0x198/0x3a0)
[] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8)
[] (start_kernel) from [<8000807c>] (0x8000807c)



Reported-by: Tony Lindgren 
Signed-off-by: Paul E. McKenney 
Tested-by: Tony Lindgren 
Cc: Russell King 
Cc: Steven Rostedt 
Cc: "Rafael J. Wysocki" 
Cc: Pavel Machek 
Cc: 
Cc: 
Cc: 
---
 arch/arm/kernel/smp.c| 2 +-
 drivers/base/power/runtime.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index baee70267f29..7afe48ae5d76 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -486,7 +486,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
 
 static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
 {
-   trace_ipi_raise(target, ipi_types[ipinr]);
+   trace_ipi_raise_rcuidle(target, ipi_types[ipinr]);
__smp_cross_call(target, ipinr);
 }
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 4c7055009bd6..ad115568ce58 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -419,7 +419,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
struct device *parent = NULL;
int retval;
 
-   trace_rpm_suspend(dev, rpmflags);
+   trace_rpm_suspend_rcuidle(dev, rpmflags);
 
  repeat:
retval = rpm_check_suspend_allowed(dev);
@@ -549,7 +549,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
}
 
  out:
-   trace_rpm_return_int(dev, _THIS_IP_, retval);
+   trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
 
return retval;
 
-- 

[PATCH v2 omap 4/6] arm: Add _rcuidle suffix to allow rpm_idle() use from idle

2016-05-16 Thread Paul E. McKenney
This commit appends a few _rcuidle suffixes to fix the following
RCU-used-from-idle bug:

> ===
> [ INFO: suspicious RCU usage. ]
> 4.6.0-rc5-next-20160426+ #1116 Not tainted
> ---
> include/trace/events/rpm.h:95 suspicious rcu_dereference_check() usage!
>
> other info that might help us debug this:
>
>
> RCU used illegally from idle CPU!
> rcu_scheduler_active = 1, debug_locks = 0
> RCU used illegally from extended quiescent state!
> 1 lock held by swapper/0/0:
>  #0:  (&(>power.lock)->rlock){-.-...}, at: [] 
> __rpm_callback+0x58/0x60
>
> stack backtrace:
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1116
> Hardware name: Generic OMAP36xx (Flattened Device Tree)
> [] (unwind_backtrace) from [] (show_stack+0x10/0x14)
> [] (show_stack) from [] (dump_stack+0xb0/0xe4)
> [] (dump_stack) from [] (rpm_suspend+0x580/0x768)
> [] (rpm_suspend) from [] (__pm_runtime_suspend+0x64/0x84)
> [] (__pm_runtime_suspend) from [] 
> (omap2_gpio_prepare_for_idle+0x5c/0x70)
> [] (omap2_gpio_prepare_for_idle) from [] 
> (omap_sram_idle+0x140/0x244)
> [] (omap_sram_idle) from [] 
> (omap3_enter_idle_bm+0xfc/0x1ec)
> [] (omap3_enter_idle_bm) from [] 
> (cpuidle_enter_state+0x80/0x3d4)
> [] (cpuidle_enter_state) from [] 
> (cpu_startup_entry+0x198/0x3a0)
> [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8)
> [] (start_kernel) from [<8000807c>] (0x8000807c)

In the immortal words of Steven Rostedt, "*Whack* *Whack* *Whack*!!!"

Reported-by: Tony Lindgren 
Signed-off-by: Paul E. McKenney 
Tested-by: Tony Lindgren 
Cc: Russell King 
WhACKED-by: Steven Rostedt 
Cc: "Rafael J. Wysocki" 
Cc: 
Cc: 
Cc: 
---
 drivers/base/power/runtime.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index ad115568ce58..e4c2b8fdeff3 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -301,7 +301,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
int (*callback)(struct device *);
int retval;
 
-   trace_rpm_idle(dev, rpmflags);
+   trace_rpm_idle_rcuidle(dev, rpmflags);
retval = rpm_check_suspend_allowed(dev);
if (retval < 0)
;   /* Conditions are wrong. */
@@ -337,7 +337,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
dev->power.request_pending = true;
queue_work(pm_wq, >power.work);
}
-   trace_rpm_return_int(dev, _THIS_IP_, 0);
+   trace_rpm_return_int_rcuidle(dev, _THIS_IP_, 0);
return 0;
}
 
@@ -352,7 +352,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
wake_up_all(>power.wait_queue);
 
  out:
-   trace_rpm_return_int(dev, _THIS_IP_, retval);
+   trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO);
 }
 
-- 
2.5.2



[PATCH v2 omap 4/6] arm: Add _rcuidle suffix to allow rpm_idle() use from idle

2016-05-16 Thread Paul E. McKenney
This commit appends a few _rcuidle suffixes to fix the following
RCU-used-from-idle bug:

> ===
> [ INFO: suspicious RCU usage. ]
> 4.6.0-rc5-next-20160426+ #1116 Not tainted
> ---
> include/trace/events/rpm.h:95 suspicious rcu_dereference_check() usage!
>
> other info that might help us debug this:
>
>
> RCU used illegally from idle CPU!
> rcu_scheduler_active = 1, debug_locks = 0
> RCU used illegally from extended quiescent state!
> 1 lock held by swapper/0/0:
>  #0:  (&(>power.lock)->rlock){-.-...}, at: [] 
> __rpm_callback+0x58/0x60
>
> stack backtrace:
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1116
> Hardware name: Generic OMAP36xx (Flattened Device Tree)
> [] (unwind_backtrace) from [] (show_stack+0x10/0x14)
> [] (show_stack) from [] (dump_stack+0xb0/0xe4)
> [] (dump_stack) from [] (rpm_suspend+0x580/0x768)
> [] (rpm_suspend) from [] (__pm_runtime_suspend+0x64/0x84)
> [] (__pm_runtime_suspend) from [] 
> (omap2_gpio_prepare_for_idle+0x5c/0x70)
> [] (omap2_gpio_prepare_for_idle) from [] 
> (omap_sram_idle+0x140/0x244)
> [] (omap_sram_idle) from [] 
> (omap3_enter_idle_bm+0xfc/0x1ec)
> [] (omap3_enter_idle_bm) from [] 
> (cpuidle_enter_state+0x80/0x3d4)
> [] (cpuidle_enter_state) from [] 
> (cpu_startup_entry+0x198/0x3a0)
> [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8)
> [] (start_kernel) from [<8000807c>] (0x8000807c)

In the immortal words of Steven Rostedt, "*Whack* *Whack* *Whack*!!!"

Reported-by: Tony Lindgren 
Signed-off-by: Paul E. McKenney 
Tested-by: Tony Lindgren 
Cc: Russell King 
WhACKED-by: Steven Rostedt 
Cc: "Rafael J. Wysocki" 
Cc: 
Cc: 
Cc: 
---
 drivers/base/power/runtime.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index ad115568ce58..e4c2b8fdeff3 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -301,7 +301,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
int (*callback)(struct device *);
int retval;
 
-   trace_rpm_idle(dev, rpmflags);
+   trace_rpm_idle_rcuidle(dev, rpmflags);
retval = rpm_check_suspend_allowed(dev);
if (retval < 0)
;   /* Conditions are wrong. */
@@ -337,7 +337,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
dev->power.request_pending = true;
queue_work(pm_wq, >power.work);
}
-   trace_rpm_return_int(dev, _THIS_IP_, 0);
+   trace_rpm_return_int_rcuidle(dev, _THIS_IP_, 0);
return 0;
}
 
@@ -352,7 +352,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
wake_up_all(>power.wait_queue);
 
  out:
-   trace_rpm_return_int(dev, _THIS_IP_, retval);
+   trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO);
 }
 
-- 
2.5.2



[PATCH v2 omap 2/6] arm: Use _rcuidle for suspend/resume tracepoints

2016-05-16 Thread Paul E. McKenney
Further testing with false negatives suppressed by commit 293e2421fe25
("rcu: Remove superfluous versions of rcu_read_lock_sched_held()")
identified a few more unprotected uses of RCU from the idle loop.
Because RCU actively ignores idle-loop code (for energy-efficiency
reasons, among other things), using RCU from the idle loop can result
in too-short grace periods, in turn resulting in arbitrary misbehavior.

The affected functions are smp_cross_call() and rpm_suspend().

The resulting lockdep-RCU splats are as follows:



===
[ INFO: suspicious RCU usage. ]
4.6.0-rc5-next-20160426+ #1112 Not tainted
---
include/trace/events/ipi.h:35 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

RCU used illegally from idle CPU!
rcu_scheduler_active = 1, debug_locks = 0
RCU used illegally from extended quiescent state!
no locks held by swapper/0/0.

stack backtrace:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1112
Hardware name: Generic OMAP4 (Flattened Device Tree)
[] (unwind_backtrace) from [] (show_stack+0x10/0x14)
[] (show_stack) from [] (dump_stack+0xb0/0xe4)
[] (dump_stack) from [] (smp_cross_call+0xbc/0x188)
[] (smp_cross_call) from [] (generic_exec_single+0x9c/0x15c)
[] (generic_exec_single) from [] 
(smp_call_function_single_async+0 x38/0x9c)
[] (smp_call_function_single_async) from [] 
(cpuidle_coupled_poke_others+0x8c/0xa8)
[] (cpuidle_coupled_poke_others) from [] 
(cpuidle_enter_state_coupled+0x26c/0x390)
[] (cpuidle_enter_state_coupled) from [] 
(cpu_startup_entry+0x198/0x3a0)
[] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8)
[] (start_kernel) from [<8000807c>] (0x8000807c)



Warning from omap3
===
[ INFO: suspicious RCU usage. ]
4.6.0-rc5-next-20160426+ #1112 Not tainted
---
include/trace/events/rpm.h:63 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

RCU used illegally from idle CPU!
rcu_scheduler_active = 1, debug_locks = 0
RCU used illegally from extended quiescent state!
1 lock held by swapper/0/0:
 #0:  (&(>power.lock)->rlock){-.-...}, at: [] 
__pm_runtime_suspend+0x54/0x84

stack backtrace:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1112
Hardware name: Generic OMAP36xx (Flattened Device Tree)
[] (unwind_backtrace) from [] (show_stack+0x10/0x14)
[] (show_stack) from [] (dump_stack+0xb0/0xe4)
[] (dump_stack) from [] (rpm_suspend+0x604/0x7e4)
[] (rpm_suspend) from [] (__pm_runtime_suspend+0x64/0x84)
[] (__pm_runtime_suspend) from [] 
(omap2_gpio_prepare_for_idle+0x5c/0x70)
[] (omap2_gpio_prepare_for_idle) from [] 
(omap_sram_idle+0x140/0x244)
[] (omap_sram_idle) from [] (omap3_enter_idle_bm+0xfc/0x1ec)
[] (omap3_enter_idle_bm) from [] 
(cpuidle_enter_state+0x80/0x3d4)
[] (cpuidle_enter_state) from [] 
(cpu_startup_entry+0x198/0x3a0)
[] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8)
[] (start_kernel) from [<8000807c>] (0x8000807c)



Reported-by: Tony Lindgren 
Signed-off-by: Paul E. McKenney 
Tested-by: Tony Lindgren 
Cc: Russell King 
Cc: Steven Rostedt 
Cc: "Rafael J. Wysocki" 
Cc: Pavel Machek 
Cc: 
Cc: 
Cc: 
---
 arch/arm/kernel/smp.c| 2 +-
 drivers/base/power/runtime.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index baee70267f29..7afe48ae5d76 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -486,7 +486,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
 
 static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
 {
-   trace_ipi_raise(target, ipi_types[ipinr]);
+   trace_ipi_raise_rcuidle(target, ipi_types[ipinr]);
__smp_cross_call(target, ipinr);
 }
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 4c7055009bd6..ad115568ce58 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -419,7 +419,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
struct device *parent = NULL;
int retval;
 
-   trace_rpm_suspend(dev, rpmflags);
+   trace_rpm_suspend_rcuidle(dev, rpmflags);
 
  repeat:
retval = rpm_check_suspend_allowed(dev);
@@ -549,7 +549,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
}
 
  out:
-   trace_rpm_return_int(dev, _THIS_IP_, retval);
+   trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
 
return retval;
 
-- 
2.5.2



[PATCH v2 omap 3/6] arm: Add _rcuidle tracepoints to allow clk_core_disable() use from idle

2016-05-16 Thread Paul E. McKenney
This commit adds an _rcuidle suffix to a pair of trace events to
prevent the following splat:

> ===
> [ INFO: suspicious RCU usage. ]
> 4.6.0-rc5-next-20160426+ #1114 Not tainted
> ---
> include/trace/events/clk.h:59 suspicious rcu_dereference_check() usage!
>
> other info that might help us debug this:
>
>
> RCU used illegally from idle CPU!
> rcu_scheduler_active = 1, debug_locks = 0
> RCU used illegally from extended quiescent state!
> 2 locks held by swapper/0/0:
>  #0:  (>hwmod_key#30){..}, at: [] omap_hwmod_idle+0x18/0x44
>  #1:  (enable_lock){..}, at: [] clk_enable_lock+0x18/0x124
>
> stack backtrace:
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1114
> Hardware name: Generic OMAP36xx (Flattened Device Tree)
> [] (unwind_backtrace) from [] (show_stack+0x10/0x14)
> [] (show_stack) from [] (dump_stack+0xb0/0xe4)
> [] (dump_stack) from [] (clk_core_disable+0x17c/0x348)
> [] (clk_core_disable) from [] (clk_disable+0x24/0x30)
> [] (clk_disable) from [] (_disable_clocks+0x18/0x7c)
> [] (_disable_clocks) from [] (_idle+0x12c/0x230)
> [] (_idle) from [] (omap_hwmod_idle+0x24/0x44)
> [] (omap_hwmod_idle) from [] (omap_device_idle+0x3c/0x90)
> [] (omap_device_idle) from [] (__rpm_callback+0x2c/0x60)
> [] (__rpm_callback) from [] (rpm_callback+0x20/0x80)
> [] (rpm_callback) from [] (rpm_suspend+0x100/0x768)
> [] (rpm_suspend) from [] (__pm_runtime_suspend+0x64/0x84)
> [] (__pm_runtime_suspend) from [] 
> (omap2_gpio_prepare_for_idle+0x5
> c/0x70)
> [] (omap2_gpio_prepare_for_idle) from [] 
> (omap_sram_idle+0x140/0x2
> 44)
> [] (omap_sram_idle) from [] 
> (omap3_enter_idle_bm+0xfc/0x1ec)
> [] (omap3_enter_idle_bm) from [] 
> (cpuidle_enter_state+0x80/0x3d4)
> [] (cpuidle_enter_state) from [] 
> (cpu_startup_entry+0x198/0x3a0)
> [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8)
> [] (start_kernel) from [<8000807c>] (0x8000807c)

Reported-by: Tony Lindgren 
Signed-off-by: Paul E. McKenney 
Tested-by: Tony Lindgren 
Cc: Russell King 
Cc: Steven Rostedt 
Cc: Michael Turquette 
Cc: Stephen Boyd 
Cc: 
Cc: 
Cc: 
---
 drivers/clk/clk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index fb74dc1f7520..4fa43c02d682 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -682,12 +682,12 @@ static void clk_core_disable(struct clk_core *core)
if (--core->enable_count > 0)
return;
 
-   trace_clk_disable(core);
+   trace_clk_disable_rcuidle(core);
 
if (core->ops->disable)
core->ops->disable(core->hw);
 
-   trace_clk_disable_complete(core);
+   trace_clk_disable_complete_rcuidle(core);
 
clk_core_disable(core->parent);
 }
-- 
2.5.2



[PATCH v2 omap 3/6] arm: Add _rcuidle tracepoints to allow clk_core_disable() use from idle

2016-05-16 Thread Paul E. McKenney
This commit adds an _rcuidle suffix to a pair of trace events to
prevent the following splat:

> ===
> [ INFO: suspicious RCU usage. ]
> 4.6.0-rc5-next-20160426+ #1114 Not tainted
> ---
> include/trace/events/clk.h:59 suspicious rcu_dereference_check() usage!
>
> other info that might help us debug this:
>
>
> RCU used illegally from idle CPU!
> rcu_scheduler_active = 1, debug_locks = 0
> RCU used illegally from extended quiescent state!
> 2 locks held by swapper/0/0:
>  #0:  (>hwmod_key#30){..}, at: [] omap_hwmod_idle+0x18/0x44
>  #1:  (enable_lock){..}, at: [] clk_enable_lock+0x18/0x124
>
> stack backtrace:
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1114
> Hardware name: Generic OMAP36xx (Flattened Device Tree)
> [] (unwind_backtrace) from [] (show_stack+0x10/0x14)
> [] (show_stack) from [] (dump_stack+0xb0/0xe4)
> [] (dump_stack) from [] (clk_core_disable+0x17c/0x348)
> [] (clk_core_disable) from [] (clk_disable+0x24/0x30)
> [] (clk_disable) from [] (_disable_clocks+0x18/0x7c)
> [] (_disable_clocks) from [] (_idle+0x12c/0x230)
> [] (_idle) from [] (omap_hwmod_idle+0x24/0x44)
> [] (omap_hwmod_idle) from [] (omap_device_idle+0x3c/0x90)
> [] (omap_device_idle) from [] (__rpm_callback+0x2c/0x60)
> [] (__rpm_callback) from [] (rpm_callback+0x20/0x80)
> [] (rpm_callback) from [] (rpm_suspend+0x100/0x768)
> [] (rpm_suspend) from [] (__pm_runtime_suspend+0x64/0x84)
> [] (__pm_runtime_suspend) from [] 
> (omap2_gpio_prepare_for_idle+0x5
> c/0x70)
> [] (omap2_gpio_prepare_for_idle) from [] 
> (omap_sram_idle+0x140/0x2
> 44)
> [] (omap_sram_idle) from [] 
> (omap3_enter_idle_bm+0xfc/0x1ec)
> [] (omap3_enter_idle_bm) from [] 
> (cpuidle_enter_state+0x80/0x3d4)
> [] (cpuidle_enter_state) from [] 
> (cpu_startup_entry+0x198/0x3a0)
> [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8)
> [] (start_kernel) from [<8000807c>] (0x8000807c)

Reported-by: Tony Lindgren 
Signed-off-by: Paul E. McKenney 
Tested-by: Tony Lindgren 
Cc: Russell King 
Cc: Steven Rostedt 
Cc: Michael Turquette 
Cc: Stephen Boyd 
Cc: 
Cc: 
Cc: 
---
 drivers/clk/clk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index fb74dc1f7520..4fa43c02d682 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -682,12 +682,12 @@ static void clk_core_disable(struct clk_core *core)
if (--core->enable_count > 0)
return;
 
-   trace_clk_disable(core);
+   trace_clk_disable_rcuidle(core);
 
if (core->ops->disable)
core->ops->disable(core->hw);
 
-   trace_clk_disable_complete(core);
+   trace_clk_disable_complete_rcuidle(core);
 
clk_core_disable(core->parent);
 }
-- 
2.5.2



[PATCH omap v2 0/6] Fix OMAP uses of RCU from idle loop

2016-05-16 Thread Paul E. McKenney
Hello!

The following series fixes a number of uses of RCU from the idle loop.
These are all due to tracing, so the fix is simply to append _rcuidle
to the event-tracing call.

Changes since v1: Fix commit-log subjects and add maintainers on CC.

Thanx, Paul



 arch/arm/kernel/smp.c |2 +-
 arch/arm/mach-omap2/powerdomain.c |9 +
 drivers/base/power/runtime.c  |   14 +++---
 drivers/clk/clk.c |8 
 4 files changed, 17 insertions(+), 16 deletions(-)



[PATCH omap v2 0/6] Fix OMAP uses of RCU from idle loop

2016-05-16 Thread Paul E. McKenney
Hello!

The following series fixes a number of uses of RCU from the idle loop.
These are all due to tracing, so the fix is simply to append _rcuidle
to the event-tracing call.

Changes since v1: Fix commit-log subjects and add maintainers on CC.

Thanx, Paul



 arch/arm/kernel/smp.c |2 +-
 arch/arm/mach-omap2/powerdomain.c |9 +
 drivers/base/power/runtime.c  |   14 +++---
 drivers/clk/clk.c |8 
 4 files changed, 17 insertions(+), 16 deletions(-)



linux-4.6/net/kcm/kcmsock.c:1508: bad if test ?

2016-05-16 Thread David Binderman
Hello there,

linux-4.6/net/kcm/kcmsock.c:1508]: (style) Checking if unsigned
variable 'copied' is less than zero.

Source code is

if (copied < 0) {

but

   size_t copied;

Suggest code rework.


Regards

David Binderman


linux-4.6/net/kcm/kcmsock.c:1508: bad if test ?

2016-05-16 Thread David Binderman
Hello there,

linux-4.6/net/kcm/kcmsock.c:1508]: (style) Checking if unsigned
variable 'copied' is less than zero.

Source code is

if (copied < 0) {

but

   size_t copied;

Suggest code rework.


Regards

David Binderman


[GIT PULL] x86/debug change for v4.7

2016-05-16 Thread Ingo Molnar
Linus,

Please pull the latest x86-debug-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-debug-for-linus

   # HEAD: 8fad7ec51e1b9e262e0bdd34e800ac1ea5e84dec x86/dumpstack: Combine some 
printk()s

A printk() output simplification.

 Thanks,

Ingo

-->
Rasmus Villemoes (1):
  x86/dumpstack: Combine some printk()s


 arch/x86/kernel/dumpstack.c | 19 ++-
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 8efa57a5f29e..2bb25c3fe2e8 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -260,19 +260,12 @@ int __die(const char *str, struct pt_regs *regs, long err)
unsigned long sp;
 #endif
printk(KERN_DEFAULT
-  "%s: %04lx [#%d] ", str, err & 0x, ++die_counter);
-#ifdef CONFIG_PREEMPT
-   printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
-   printk("SMP ");
-#endif
-   if (debug_pagealloc_enabled())
-   printk("DEBUG_PAGEALLOC ");
-#ifdef CONFIG_KASAN
-   printk("KASAN");
-#endif
-   printk("\n");
+  "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0x, ++die_counter,
+  IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+  IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
+  debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
+  IS_ENABLED(CONFIG_KASAN)   ? " KASAN"   : "");
+
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
return 1;


[GIT PULL] x86/debug change for v4.7

2016-05-16 Thread Ingo Molnar
Linus,

Please pull the latest x86-debug-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-debug-for-linus

   # HEAD: 8fad7ec51e1b9e262e0bdd34e800ac1ea5e84dec x86/dumpstack: Combine some 
printk()s

A printk() output simplification.

 Thanks,

Ingo

-->
Rasmus Villemoes (1):
  x86/dumpstack: Combine some printk()s


 arch/x86/kernel/dumpstack.c | 19 ++-
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 8efa57a5f29e..2bb25c3fe2e8 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -260,19 +260,12 @@ int __die(const char *str, struct pt_regs *regs, long err)
unsigned long sp;
 #endif
printk(KERN_DEFAULT
-  "%s: %04lx [#%d] ", str, err & 0x, ++die_counter);
-#ifdef CONFIG_PREEMPT
-   printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
-   printk("SMP ");
-#endif
-   if (debug_pagealloc_enabled())
-   printk("DEBUG_PAGEALLOC ");
-#ifdef CONFIG_KASAN
-   printk("KASAN");
-#endif
-   printk("\n");
+  "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0x, ++die_counter,
+  IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+  IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
+  debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
+  IS_ENABLED(CONFIG_KASAN)   ? " KASAN"   : "");
+
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
return 1;


Re: Please review arch/x86/kernel/pvclock.c to fix Docker/Mono crashes in new Kernels

2016-05-16 Thread Andy Lutomirski
On Mon, May 16, 2016 at 11:10 AM, Linus Torvalds
 wrote:
> There is something odd being reported in Ubuntu.
>
> There's a Mono SIGSEGV that was bisected to Andy's commit 1ddf0b1b11aa
> ("x86, vdso: Use asm volatile in __getcpu"), and then reported to be
> fixed with commits

I'm reasonably confident that the addition of "volatile" is not the
root cause...

>
>   80f7fdb1c7f0 ("x86: vdso: fix pvclock races with task migration")
>   0a4e6be9ca17 ("x86: kvm: Revert "remove sched notifier for cross-cpu
> migrations"")
>
> and when those were backported all looked well.
>
> But then those two commits in turn were reverted with
>
>   73459e2a1ada ("x86: pvclock: Really remove the sched notifier for
> cross-cpu migrations")
>
> and people seem to report that it's back as a result:
>
>   https://bugzilla.xamarin.com/show_bug.cgi?id=29212#c16
>
> so apparently that task migration notifier somehow does matter.

All of those fixes were intended to fix incorrect times being
reported, not segfaults.  Weird.

I tried to sign up for Xamarin bugzilla and made zero progress (the
confirmation email was never sent).  That being said, this bug report
is very confusing.  Could someone who can reproduce this provide the
following information:

1. What is the contents of
/sys/devices/system/clocksource/clocksource0/current_clocksource

2. If you do:

 echo tsc >/sys/devices/system/clocksource/clocksource0/current_clocksource

can you still reproduce it?

3. I rewrote the whole vdso pvclock mess in Linux 4.5.  Does the bug
exist in Linux 4.5?

4. What is actually crashing?  The stack trace says:

Method (wrapper managed-to-managed) string:.ctor (char[],int,int)
emitted at 0x40b5b1b0 to 0x40b5b1d9 (code length 41)

[bug-18026.exe]
converting method (wrapper managed-to-native)
object:__icall_wrapper_mono_gc_alloc_string (intptr,intptr,int)
Method (wrapper managed-to-native)
object:__icall_wrapper_mono_gc_alloc_string (intptr,intptr,int)
emitted at 0x40b5b1f0 to 0x40b5b284 (code length 148) [bug-18026.exe]

Unhandled Exception:
System.NullReferenceException: Object reference not set to an instance
of an object
  at Test.Main () [0x0] in :0
[ERROR] FATAL UNHANDLED EXCEPTION: System.NullReferenceException:
Object reference not set to an instance of an object
  at Test.Main () [0x0] in :0


What on earth does that mean?  Is mono crashing in the vdso?  Is mono
crashing because time went backwards?  Is mono crashing because its GC
is just weirdly buggy, uses clock_gettime, and has a race condition
that is or is not triggered depending on how long the function takes?

An actual stack dump of the segfault (the native stack, not what mono
thinks the stack is) would be nice.


FWIW, the pvclock host code is complicated and it's not obvious to me
that it has any particular guarantee of monotonicity.  (That's not to
say it has a bug that breaks monotonicity -- it's just that I, as a
reader of the code, have never had a clear understanding of what it's
trying to do or why it's trying to do it.)

If it's fixed in 4.5, I suppose the big rewrite could be backported,
but I'd rather have some understanding of what's going on.

--Andy

>
> Comments?
>
>   Linus
>
>
> On Mon, May 16, 2016 at 1:13 AM,   wrote:
>>
>> Hello Linus,
>>
>> Am am sorry to bother you, but it seems that the bug from old kernels was
>> copied to new >=4.1 kernels. We use Ubuntu/Docker/Mono and we had to
>> rollback to 3.19.0-54 kernel for the work around.
>>
>> We found that a year ago there was a discussion on the launchpad
>> (https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1450584) regarding
>> SIGSEGV on multi-cpu vm.
>> It seems to me that the commits around that bug
>> https://github.com/torvalds/linux/commits/master/arch/x86/kernel/pvclock.c
>> caused 4.1 and up kernels to keep that bug.
>> Please review pvclock.c to fix that problem.
>>
>> Kiitos! Thank you!
>>
>> --
>> Oleg Khalzov
>> SDE
>> Vestbery
>>



-- 
Andy Lutomirski
AMA Capital Management, LLC


Re: Please review arch/x86/kernel/pvclock.c to fix Docker/Mono crashes in new Kernels

2016-05-16 Thread Andy Lutomirski
On Mon, May 16, 2016 at 11:10 AM, Linus Torvalds
 wrote:
> There is something odd being reported in Ubuntu.
>
> There's a Mono SIGSEGV that was bisected to Andy's commit 1ddf0b1b11aa
> ("x86, vdso: Use asm volatile in __getcpu"), and then reported to be
> fixed with commits

I'm reasonably confident that the addition of "volatile" is not the
root cause...

>
>   80f7fdb1c7f0 ("x86: vdso: fix pvclock races with task migration")
>   0a4e6be9ca17 ("x86: kvm: Revert "remove sched notifier for cross-cpu
> migrations"")
>
> and when those were backported all looked well.
>
> But then those two commits in turn were reverted with
>
>   73459e2a1ada ("x86: pvclock: Really remove the sched notifier for
> cross-cpu migrations")
>
> and people seem to report that it's back as a result:
>
>   https://bugzilla.xamarin.com/show_bug.cgi?id=29212#c16
>
> so apparently that task migration notifier somehow does matter.

All of those fixes were intended to fix incorrect times being
reported, not segfaults.  Weird.

I tried to sign up for Xamarin bugzilla and made zero progress (the
confirmation email was never sent).  That being said, this bug report
is very confusing.  Could someone who can reproduce this provide the
following information:

1. What is the contents of
/sys/devices/system/clocksource/clocksource0/current_clocksource

2. If you do:

 echo tsc >/sys/devices/system/clocksource/clocksource0/current_clocksource

can you still reproduce it?

3. I rewrote the whole vdso pvclock mess in Linux 4.5.  Does the bug
exist in Linux 4.5?

4. What is actually crashing?  The stack trace says:

Method (wrapper managed-to-managed) string:.ctor (char[],int,int)
emitted at 0x40b5b1b0 to 0x40b5b1d9 (code length 41)

[bug-18026.exe]
converting method (wrapper managed-to-native)
object:__icall_wrapper_mono_gc_alloc_string (intptr,intptr,int)
Method (wrapper managed-to-native)
object:__icall_wrapper_mono_gc_alloc_string (intptr,intptr,int)
emitted at 0x40b5b1f0 to 0x40b5b284 (code length 148) [bug-18026.exe]

Unhandled Exception:
System.NullReferenceException: Object reference not set to an instance
of an object
  at Test.Main () [0x0] in :0
[ERROR] FATAL UNHANDLED EXCEPTION: System.NullReferenceException:
Object reference not set to an instance of an object
  at Test.Main () [0x0] in :0


What on earth does that mean?  Is mono crashing in the vdso?  Is mono
crashing because time went backwards?  Is mono crashing because its GC
is just weirdly buggy, uses clock_gettime, and has a race condition
that is or is not triggered depending on how long the function takes?

An actual stack dump of the segfault (the native stack, not what mono
thinks the stack is) would be nice.


FWIW, the pvclock host code is complicated and it's not obvious to me
that it has any particular guarantee of monotonicity.  (That's not to
say it has a bug that breaks monotonicity -- it's just that I, as a
reader of the code, have never had a clear understanding of what it's
trying to do or why it's trying to do it.)

If it's fixed in 4.5, I suppose the big rewrite could be backported,
but I'd rather have some understanding of what's going on.

--Andy

>
> Comments?
>
>   Linus
>
>
> On Mon, May 16, 2016 at 1:13 AM,   wrote:
>>
>> Hello Linus,
>>
>> Am am sorry to bother you, but it seems that the bug from old kernels was
>> copied to new >=4.1 kernels. We use Ubuntu/Docker/Mono and we had to
>> rollback to 3.19.0-54 kernel for the work around.
>>
>> We found that a year ago there was a discussion on the launchpad
>> (https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1450584) regarding
>> SIGSEGV on multi-cpu vm.
>> It seems to me that the commits around that bug
>> https://github.com/torvalds/linux/commits/master/arch/x86/kernel/pvclock.c
>> caused 4.1 and up kernels to keep that bug.
>> Please review pvclock.c to fix that problem.
>>
>> Kiitos! Thank you!
>>
>> --
>> Oleg Khalzov
>> SDE
>> Vestbery
>>



-- 
Andy Lutomirski
AMA Capital Management, LLC


[GIT PULL] x86/build change for v4.7

2016-05-16 Thread Ingo Molnar
Linus,

Please pull the latest x86-build-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-build-for-linus

   # HEAD: 7a09b225f31031f8cac9e7801b6004e79f8b0da1 x86/build/defconfig/64: 
Enable CONFIG_E1000E=y

Small defconfig addition.

 Thanks,

Ingo

-->
Konstantin Khlebnikov (1):
  x86/build/defconfig/64: Enable CONFIG_E1000E=y


 arch/x86/configs/x86_64_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/configs/x86_64_defconfig 
b/arch/x86/configs/x86_64_defconfig
index 4f404a64681b..0c8d7963483c 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -173,6 +173,7 @@ CONFIG_TIGON3=y
 CONFIG_NET_TULIP=y
 CONFIG_E100=y
 CONFIG_E1000=y
+CONFIG_E1000E=y
 CONFIG_SKY2=y
 CONFIG_FORCEDETH=y
 CONFIG_8139TOO=y



[GIT PULL] x86/build change for v4.7

2016-05-16 Thread Ingo Molnar
Linus,

Please pull the latest x86-build-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-build-for-linus

   # HEAD: 7a09b225f31031f8cac9e7801b6004e79f8b0da1 x86/build/defconfig/64: 
Enable CONFIG_E1000E=y

Small defconfig addition.

 Thanks,

Ingo

-->
Konstantin Khlebnikov (1):
  x86/build/defconfig/64: Enable CONFIG_E1000E=y


 arch/x86/configs/x86_64_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/configs/x86_64_defconfig 
b/arch/x86/configs/x86_64_defconfig
index 4f404a64681b..0c8d7963483c 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -173,6 +173,7 @@ CONFIG_TIGON3=y
 CONFIG_NET_TULIP=y
 CONFIG_E100=y
 CONFIG_E1000=y
+CONFIG_E1000E=y
 CONFIG_SKY2=y
 CONFIG_FORCEDETH=y
 CONFIG_8139TOO=y



[GIT PULL] x86/cleanups change for v4.7

2016-05-16 Thread Ingo Molnar
Linus,

Please pull the latest x86-cleanups-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
x86-cleanups-for-linus

   # HEAD: a3819e3e71d5000c176918309284a1fa2f133fcf x86: Fix non-static inlines

Inline optimizations.

 Thanks,

Ingo

-->
Denys Vlasenko (1):
  x86: Fix non-static inlines


 arch/x86/crypto/sha-mb/sha1_mb.c  | 4 ++--
 arch/x86/kernel/hpet.c| 2 +-
 arch/x86/kernel/pci-iommu_table.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index a8a0224fa0f8..fb9c7a84700c 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -102,14 +102,14 @@ static asmlinkage struct job_sha1* 
(*sha1_job_mgr_submit)(struct sha1_mb_mgr *st
 static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)(struct sha1_mb_mgr 
*state);
 static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)(struct 
sha1_mb_mgr *state);
 
-inline void sha1_init_digest(uint32_t *digest)
+static inline void sha1_init_digest(uint32_t *digest)
 {
static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0,
SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 };
memcpy(digest, initial_digest, sizeof(initial_digest));
 }
 
-inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
+static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
 uint32_t total_len)
 {
uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a1f0e4a5c47e..130f2b4b8ecb 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -54,7 +54,7 @@ struct hpet_dev {
charname[10];
 };
 
-inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
+static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device 
*evtdev)
 {
return container_of(evtdev, struct hpet_dev, evt);
 }
diff --git a/arch/x86/kernel/pci-iommu_table.c 
b/arch/x86/kernel/pci-iommu_table.c
index 35ccf75696eb..f712dfdf1357 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -72,7 +72,7 @@ void __init check_iommu_entries(struct iommu_table_entry 
*start,
}
 }
 #else
-inline void check_iommu_entries(struct iommu_table_entry *start,
+void __init check_iommu_entries(struct iommu_table_entry *start,
   struct iommu_table_entry *finish)
 {
 }



[GIT PULL] x86/cleanups change for v4.7

2016-05-16 Thread Ingo Molnar
Linus,

Please pull the latest x86-cleanups-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
x86-cleanups-for-linus

   # HEAD: a3819e3e71d5000c176918309284a1fa2f133fcf x86: Fix non-static inlines

Inline optimizations.

 Thanks,

Ingo

-->
Denys Vlasenko (1):
  x86: Fix non-static inlines


 arch/x86/crypto/sha-mb/sha1_mb.c  | 4 ++--
 arch/x86/kernel/hpet.c| 2 +-
 arch/x86/kernel/pci-iommu_table.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index a8a0224fa0f8..fb9c7a84700c 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -102,14 +102,14 @@ static asmlinkage struct job_sha1* 
(*sha1_job_mgr_submit)(struct sha1_mb_mgr *st
 static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)(struct sha1_mb_mgr 
*state);
 static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)(struct 
sha1_mb_mgr *state);
 
-inline void sha1_init_digest(uint32_t *digest)
+static inline void sha1_init_digest(uint32_t *digest)
 {
static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0,
SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 };
memcpy(digest, initial_digest, sizeof(initial_digest));
 }
 
-inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
+static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
 uint32_t total_len)
 {
uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a1f0e4a5c47e..130f2b4b8ecb 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -54,7 +54,7 @@ struct hpet_dev {
charname[10];
 };
 
-inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
+static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device 
*evtdev)
 {
return container_of(evtdev, struct hpet_dev, evt);
 }
diff --git a/arch/x86/kernel/pci-iommu_table.c 
b/arch/x86/kernel/pci-iommu_table.c
index 35ccf75696eb..f712dfdf1357 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -72,7 +72,7 @@ void __init check_iommu_entries(struct iommu_table_entry 
*start,
}
 }
 #else
-inline void check_iommu_entries(struct iommu_table_entry *start,
+void __init check_iommu_entries(struct iommu_table_entry *start,
   struct iommu_table_entry *finish)
 {
 }



Re: [PATCH] Staging: comedi: quatech_daqp_cs.c: fixed a warning issue

2016-05-16 Thread Greg KH
On Mon, May 16, 2016 at 11:04:31PM +0530, Amit Ghadge wrote:
> Fixed a warning issue to use 'unsigned int'.
> 

build warning?  I don't see that anywhere in the build output.

Please be specific.


[GIT PULL] x86/boot changes for v4.7

2016-05-16 Thread Ingo Molnar
Linus,

Please pull the latest x86-boot-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-boot-for-linus

   # HEAD: d2d3462f9f08da364c8fbd41e8e32229d610d49d x86/KASLR: Clarify purpose 
of each get_random_long()

The biggest changes in this cycle were:

 - prepare for more KASLR related changes, by restructuring, cleaning up and 
   fixing the existing boot code. (Kees Cook, Baoquan He, Yinghai Lu)

 - simplifly/concentrate subarch handling code, eliminate paravirt_enabled() 
   usage. (Luis R. Rodriguez)


  out-of-topic modifications in x86-boot-for-linus:
  ---
  drivers/pnp/pnpbios/core.c # 80dfd83dfab6: x86, drivers/pnpbios: 
Replac
  include/linux/pnp.h# 80dfd83dfab6: x86, drivers/pnpbios: 
Replac
  tools/lguest/lguest.c  # 46504590321d: tools/lguest: Force 
disable 
   # 907bb6557974: tools/lguest: Make lguest la

 Thanks,

Ingo

-->
Baoquan He (5):
  x86/KASLR: Update description for decompressor worst case size
  x86/KASLR: Drop CONFIG_RANDOMIZE_BASE_MAX_OFFSET
  x86/KASLR: Handle kernel relocations above 2G correctly
  x86/KASLR: Add 'struct slot_area' to manage random_addr slots
  x86/KASLR: Add virtual address choosing function

Borislav Petkov (2):
  x86/boot: Simplify pointer casting in choose_random_location()
  x86/boot: Comment what finalize_identity_maps() does

Kees Cook (19):
  x86/KASLR: Rename aslr.c to kaslr.c
  x86/boot: Rename "real_mode" to "boot_params"
  x86/boot: Clarify purpose of functions in misc.c
  x86/KASLR: Clarify purpose of kaslr.c
  x86/KASLR: Rename "random" to "random_addr"
  x86/boot: Clean up things used by decompressors
  x86/boot: Make memcpy() handle overlaps
  x86/KASLR: Warn when KASLR is disabled
  x86/boot: Rename overlapping memcpy() to memmove()
  x86/boot: Extract error reporting functions
  x86/boot: Warn on future overlapping memcpy() use
  x86/boot: Clean up pointer casting
  x86/KASLR: Improve comments around the mem_avoid[] logic
  x86/boot: Clean up indenting for asm/boot.h
  x86/KASLR: Build identity mappings on demand
  x86/KASLR: Initialize mapping_info every time
  x86/boot: Add missing file header comments
  x86/KASLR: Return earliest overlap when avoiding regions
  x86/KASLR: Clarify purpose of each get_random_long()

Luis R. Rodriguez (16):
  x86/boot: Enumerate documentation for the x86 hardware_subarch
  x86/xen: Use X86_SUBARCH_XEN for PV guest boots
  tools/lguest: Make lguest launcher use X86_SUBARCH_LGUEST explicitly
  x86/rtc: Replace paravirt rtc check with platform legacy quirk
  x86/ACPI: Move ACPI_FADT_NO_CMOS_RTC check to ACPI boot code
  x86/init: Use a platform legacy quirk for EBDA
  tools/lguest: Force disable tboot and APM
  x86/apm32: Remove paravirt_enabled() use
  x86/tboot: Remove paravirt_enabled() use
  x86/cpu/intel: Remove not needed paravirt_enabled() use for F00F work 
around
  x86, drivers/pnpbios: Replace paravirt_enabled() check with legacy device 
check
  x86/ACPI: Parse ACPI_FADT_LEGACY_DEVICES
  x86/init: Rename EBDA code file
  x86/paravirt: Remove paravirt_enabled()
  x86/init: Disable pnpbios for X86_SUBARCH_INTEL_MID
  x86/init: Disable pnpbios and rtc for X86_SUBARCH_CE4100

Yinghai Lu (8):
  x86/KASLR: Remove unneeded boot_params argument
  x86/boot: Move compressed kernel to the end of the decompression buffer
  x86/boot: Calculate decompression size during boot not build
  x86/boot: Fix "run_size" calculation
  x86/KASLR: Clean up unused code from old 'run_size' and rename it to 
'kernel_total_size'
  x86/boot: Correctly bounds-check relocations
  x86/KASLR: Consolidate mem_avoid[] entries
  x86/boot: Split out kernel_ident_mapping_init()


 arch/x86/Kconfig   |  72 ++---
 arch/x86/Makefile  |   3 +-
 arch/x86/boot/Makefile |  13 +-
 arch/x86/boot/compressed/Makefile  |  23 +-
 arch/x86/boot/compressed/aslr.c| 339 --
 arch/x86/boot/compressed/cmdline.c |   4 +-
 arch/x86/boot/compressed/error.c   |  22 ++
 arch/x86/boot/compressed/error.h   |   7 +
 arch/x86/boot/compressed/head_32.S |  22 +-
 arch/x86/boot/compressed/head_64.S |  19 +-
 arch/x86/boot/compressed/kaslr.c   | 510 +
 arch/x86/boot/compressed/misc.c| 188 
 arch/x86/boot/compressed/misc.h|  27 +-
 arch/x86/boot/compressed/mkpiggy.c |  34 +--
 arch/x86/boot/compressed/pagetable.c   | 129 +
 arch/x86/boot/compressed/string.c  |  37 ++-
 arch/x86/boot/compressed/vmlinux.lds.S |   1 +
 arch/x86/boot/early_serial_console.c   |   4 +
 arch/x86/boot/header.S | 109 ++-

Re: [PATCH] Staging: comedi: quatech_daqp_cs.c: fixed a warning issue

2016-05-16 Thread Greg KH
On Mon, May 16, 2016 at 11:04:31PM +0530, Amit Ghadge wrote:
> Fixed a warning issue to use 'unsigned int'.
> 

build warning?  I don't see that anywhere in the build output.

Please be specific.


[GIT PULL] x86/boot changes for v4.7

2016-05-16 Thread Ingo Molnar
Linus,

Please pull the latest x86-boot-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-boot-for-linus

   # HEAD: d2d3462f9f08da364c8fbd41e8e32229d610d49d x86/KASLR: Clarify purpose 
of each get_random_long()

The biggest changes in this cycle were:

 - prepare for more KASLR related changes, by restructuring, cleaning up and 
   fixing the existing boot code. (Kees Cook, Baoquan He, Yinghai Lu)

 - simplifly/concentrate subarch handling code, eliminate paravirt_enabled() 
   usage. (Luis R. Rodriguez)


  out-of-topic modifications in x86-boot-for-linus:
  ---
  drivers/pnp/pnpbios/core.c # 80dfd83dfab6: x86, drivers/pnpbios: 
Replac
  include/linux/pnp.h# 80dfd83dfab6: x86, drivers/pnpbios: 
Replac
  tools/lguest/lguest.c  # 46504590321d: tools/lguest: Force 
disable 
   # 907bb6557974: tools/lguest: Make lguest la

 Thanks,

Ingo

-->
Baoquan He (5):
  x86/KASLR: Update description for decompressor worst case size
  x86/KASLR: Drop CONFIG_RANDOMIZE_BASE_MAX_OFFSET
  x86/KASLR: Handle kernel relocations above 2G correctly
  x86/KASLR: Add 'struct slot_area' to manage random_addr slots
  x86/KASLR: Add virtual address choosing function

Borislav Petkov (2):
  x86/boot: Simplify pointer casting in choose_random_location()
  x86/boot: Comment what finalize_identity_maps() does

Kees Cook (19):
  x86/KASLR: Rename aslr.c to kaslr.c
  x86/boot: Rename "real_mode" to "boot_params"
  x86/boot: Clarify purpose of functions in misc.c
  x86/KASLR: Clarify purpose of kaslr.c
  x86/KASLR: Rename "random" to "random_addr"
  x86/boot: Clean up things used by decompressors
  x86/boot: Make memcpy() handle overlaps
  x86/KASLR: Warn when KASLR is disabled
  x86/boot: Rename overlapping memcpy() to memmove()
  x86/boot: Extract error reporting functions
  x86/boot: Warn on future overlapping memcpy() use
  x86/boot: Clean up pointer casting
  x86/KASLR: Improve comments around the mem_avoid[] logic
  x86/boot: Clean up indenting for asm/boot.h
  x86/KASLR: Build identity mappings on demand
  x86/KASLR: Initialize mapping_info every time
  x86/boot: Add missing file header comments
  x86/KASLR: Return earliest overlap when avoiding regions
  x86/KASLR: Clarify purpose of each get_random_long()

Luis R. Rodriguez (16):
  x86/boot: Enumerate documentation for the x86 hardware_subarch
  x86/xen: Use X86_SUBARCH_XEN for PV guest boots
  tools/lguest: Make lguest launcher use X86_SUBARCH_LGUEST explicitly
  x86/rtc: Replace paravirt rtc check with platform legacy quirk
  x86/ACPI: Move ACPI_FADT_NO_CMOS_RTC check to ACPI boot code
  x86/init: Use a platform legacy quirk for EBDA
  tools/lguest: Force disable tboot and APM
  x86/apm32: Remove paravirt_enabled() use
  x86/tboot: Remove paravirt_enabled() use
  x86/cpu/intel: Remove not needed paravirt_enabled() use for F00F work 
around
  x86, drivers/pnpbios: Replace paravirt_enabled() check with legacy device 
check
  x86/ACPI: Parse ACPI_FADT_LEGACY_DEVICES
  x86/init: Rename EBDA code file
  x86/paravirt: Remove paravirt_enabled()
  x86/init: Disable pnpbios for X86_SUBARCH_INTEL_MID
  x86/init: Disable pnpbios and rtc for X86_SUBARCH_CE4100

Yinghai Lu (8):
  x86/KASLR: Remove unneeded boot_params argument
  x86/boot: Move compressed kernel to the end of the decompression buffer
  x86/boot: Calculate decompression size during boot not build
  x86/boot: Fix "run_size" calculation
  x86/KASLR: Clean up unused code from old 'run_size' and rename it to 
'kernel_total_size'
  x86/boot: Correctly bounds-check relocations
  x86/KASLR: Consolidate mem_avoid[] entries
  x86/boot: Split out kernel_ident_mapping_init()


 arch/x86/Kconfig   |  72 ++---
 arch/x86/Makefile  |   3 +-
 arch/x86/boot/Makefile |  13 +-
 arch/x86/boot/compressed/Makefile  |  23 +-
 arch/x86/boot/compressed/aslr.c| 339 --
 arch/x86/boot/compressed/cmdline.c |   4 +-
 arch/x86/boot/compressed/error.c   |  22 ++
 arch/x86/boot/compressed/error.h   |   7 +
 arch/x86/boot/compressed/head_32.S |  22 +-
 arch/x86/boot/compressed/head_64.S |  19 +-
 arch/x86/boot/compressed/kaslr.c   | 510 +
 arch/x86/boot/compressed/misc.c| 188 
 arch/x86/boot/compressed/misc.h|  27 +-
 arch/x86/boot/compressed/mkpiggy.c |  34 +--
 arch/x86/boot/compressed/pagetable.c   | 129 +
 arch/x86/boot/compressed/string.c  |  37 ++-
 arch/x86/boot/compressed/vmlinux.lds.S |   1 +
 arch/x86/boot/early_serial_console.c   |   4 +
 arch/x86/boot/header.S | 109 ++-

[PATCH] MIPS: perf: Fix I6400 event numbers

2016-05-16 Thread James Hogan
Fix perf hardware performance counter event numbers for I6400. This core
does not follow the performance event numbering scheme of previous MIPS
cores. All performance counters (both odd and even) are capable of
counting any of the available events.

Fixes: 4e88a8621301 ("MIPS: Add cases for CPU_I6400")
Signed-off-by: James Hogan 
Cc: Ralf Baechle 
Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: linux-m...@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/kernel/perf_event_mipsxx.c | 54 ++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/perf_event_mipsxx.c 
b/arch/mips/kernel/perf_event_mipsxx.c
index 9bc1191b1ab0..d1d17d99a830 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -825,6 +825,16 @@ static const struct mips_perf_event mipsxxcore_event_map2
[PERF_COUNT_HW_BRANCH_MISSES] = { 0x27, CNTR_ODD, T },
 };
 
+static const struct mips_perf_event i6400_event_map[PERF_COUNT_HW_MAX] = {
+   [PERF_COUNT_HW_CPU_CYCLES]  = { 0x00, CNTR_EVEN | CNTR_ODD },
+   [PERF_COUNT_HW_INSTRUCTIONS]= { 0x01, CNTR_EVEN | CNTR_ODD },
+   /* These only count dcache, not icache */
+   [PERF_COUNT_HW_CACHE_REFERENCES]= { 0x45, CNTR_EVEN | CNTR_ODD },
+   [PERF_COUNT_HW_CACHE_MISSES]= { 0x48, CNTR_EVEN | CNTR_ODD },
+   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x15, CNTR_EVEN | CNTR_ODD },
+   [PERF_COUNT_HW_BRANCH_MISSES]   = { 0x16, CNTR_EVEN | CNTR_ODD },
+};
+
 static const struct mips_perf_event loongson3_event_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_EVEN },
[PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, CNTR_ODD },
@@ -1015,6 +1025,46 @@ static const struct mips_perf_event mipsxxcore_cache_map2
 },
 };
 
+static const struct mips_perf_event i6400_cache_map
+   [PERF_COUNT_HW_CACHE_MAX]
+   [PERF_COUNT_HW_CACHE_OP_MAX]
+   [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+   [C(OP_READ)] = {
+   [C(RESULT_ACCESS)]  = { 0x46, CNTR_EVEN | CNTR_ODD },
+   [C(RESULT_MISS)]= { 0x49, CNTR_EVEN | CNTR_ODD },
+   },
+   [C(OP_WRITE)] = {
+   [C(RESULT_ACCESS)]  = { 0x47, CNTR_EVEN | CNTR_ODD },
+   [C(RESULT_MISS)]= { 0x4a, CNTR_EVEN | CNTR_ODD },
+   },
+},
+[C(L1I)] = {
+   [C(OP_READ)] = {
+   [C(RESULT_ACCESS)]  = { 0x84, CNTR_EVEN | CNTR_ODD },
+   [C(RESULT_MISS)]= { 0x85, CNTR_EVEN | CNTR_ODD },
+   },
+},
+[C(DTLB)] = {
+   /* Can't distinguish read & write */
+   [C(OP_READ)] = {
+   [C(RESULT_ACCESS)]  = { 0x40, CNTR_EVEN | CNTR_ODD },
+   [C(RESULT_MISS)]= { 0x41, CNTR_EVEN | CNTR_ODD },
+   },
+   [C(OP_WRITE)] = {
+   [C(RESULT_ACCESS)]  = { 0x40, CNTR_EVEN | CNTR_ODD },
+   [C(RESULT_MISS)]= { 0x41, CNTR_EVEN | CNTR_ODD },
+   },
+},
+[C(BPU)] = {
+   /* Conditional branches / mispredicted */
+   [C(OP_READ)] = {
+   [C(RESULT_ACCESS)]  = { 0x15, CNTR_EVEN | CNTR_ODD },
+   [C(RESULT_MISS)]= { 0x16, CNTR_EVEN | CNTR_ODD },
+   },
+},
+};
+
 static const struct mips_perf_event loongson3_cache_map
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -1720,8 +1770,8 @@ init_hw_perf_events(void)
break;
case CPU_I6400:
mipspmu.name = "mips/I6400";
-   mipspmu.general_event_map = _event_map2;
-   mipspmu.cache_event_map = _cache_map2;
+   mipspmu.general_event_map = _event_map;
+   mipspmu.cache_event_map = _cache_map;
break;
case CPU_1004K:
mipspmu.name = "mips/1004K";
-- 
2.4.10



[PATCH] MIPS: perf: Fix I6400 event numbers

2016-05-16 Thread James Hogan
Fix perf hardware performance counter event numbers for I6400. This core
does not follow the performance event numbering scheme of previous MIPS
cores. All performance counters (both odd and even) are capable of
counting any of the available events.

Fixes: 4e88a8621301 ("MIPS: Add cases for CPU_I6400")
Signed-off-by: James Hogan 
Cc: Ralf Baechle 
Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: linux-m...@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/kernel/perf_event_mipsxx.c | 54 ++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/perf_event_mipsxx.c 
b/arch/mips/kernel/perf_event_mipsxx.c
index 9bc1191b1ab0..d1d17d99a830 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -825,6 +825,16 @@ static const struct mips_perf_event mipsxxcore_event_map2
[PERF_COUNT_HW_BRANCH_MISSES] = { 0x27, CNTR_ODD, T },
 };
 
+static const struct mips_perf_event i6400_event_map[PERF_COUNT_HW_MAX] = {
+   [PERF_COUNT_HW_CPU_CYCLES]  = { 0x00, CNTR_EVEN | CNTR_ODD },
+   [PERF_COUNT_HW_INSTRUCTIONS]= { 0x01, CNTR_EVEN | CNTR_ODD },
+   /* These only count dcache, not icache */
+   [PERF_COUNT_HW_CACHE_REFERENCES]= { 0x45, CNTR_EVEN | CNTR_ODD },
+   [PERF_COUNT_HW_CACHE_MISSES]= { 0x48, CNTR_EVEN | CNTR_ODD },
+   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x15, CNTR_EVEN | CNTR_ODD },
+   [PERF_COUNT_HW_BRANCH_MISSES]   = { 0x16, CNTR_EVEN | CNTR_ODD },
+};
+
 static const struct mips_perf_event loongson3_event_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_EVEN },
[PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, CNTR_ODD },
@@ -1015,6 +1025,46 @@ static const struct mips_perf_event mipsxxcore_cache_map2
 },
 };
 
+static const struct mips_perf_event i6400_cache_map
+   [PERF_COUNT_HW_CACHE_MAX]
+   [PERF_COUNT_HW_CACHE_OP_MAX]
+   [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+   [C(OP_READ)] = {
+   [C(RESULT_ACCESS)]  = { 0x46, CNTR_EVEN | CNTR_ODD },
+   [C(RESULT_MISS)]= { 0x49, CNTR_EVEN | CNTR_ODD },
+   },
+   [C(OP_WRITE)] = {
+   [C(RESULT_ACCESS)]  = { 0x47, CNTR_EVEN | CNTR_ODD },
+   [C(RESULT_MISS)]= { 0x4a, CNTR_EVEN | CNTR_ODD },
+   },
+},
+[C(L1I)] = {
+   [C(OP_READ)] = {
+   [C(RESULT_ACCESS)]  = { 0x84, CNTR_EVEN | CNTR_ODD },
+   [C(RESULT_MISS)]= { 0x85, CNTR_EVEN | CNTR_ODD },
+   },
+},
+[C(DTLB)] = {
+   /* Can't distinguish read & write */
+   [C(OP_READ)] = {
+   [C(RESULT_ACCESS)]  = { 0x40, CNTR_EVEN | CNTR_ODD },
+   [C(RESULT_MISS)]= { 0x41, CNTR_EVEN | CNTR_ODD },
+   },
+   [C(OP_WRITE)] = {
+   [C(RESULT_ACCESS)]  = { 0x40, CNTR_EVEN | CNTR_ODD },
+   [C(RESULT_MISS)]= { 0x41, CNTR_EVEN | CNTR_ODD },
+   },
+},
+[C(BPU)] = {
+   /* Conditional branches / mispredicted */
+   [C(OP_READ)] = {
+   [C(RESULT_ACCESS)]  = { 0x15, CNTR_EVEN | CNTR_ODD },
+   [C(RESULT_MISS)]= { 0x16, CNTR_EVEN | CNTR_ODD },
+   },
+},
+};
+
 static const struct mips_perf_event loongson3_cache_map
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -1720,8 +1770,8 @@ init_hw_perf_events(void)
break;
case CPU_I6400:
mipspmu.name = "mips/I6400";
-   mipspmu.general_event_map = _event_map2;
-   mipspmu.cache_event_map = _cache_map2;
+   mipspmu.general_event_map = _event_map;
+   mipspmu.cache_event_map = _cache_map;
break;
case CPU_1004K:
mipspmu.name = "mips/1004K";
-- 
2.4.10



[PATCH v2 2/3] dell_rbu: Update documentation

2016-05-16 Thread Mario Limonciello
Signed-off-by: Mario Limonciello 
---
 Documentation/dell_rbu.txt | 24 +++-
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt
index d262e22..b2714e6 100644
--- a/Documentation/dell_rbu.txt
+++ b/Documentation/dell_rbu.txt
@@ -31,8 +31,6 @@ The user should not unload the rbu driver after downloading 
the BIOS image
 or updating.
 
 The driver load creates the following directories under the /sys file system.
-/sys/class/firmware/dell_rbu/loading
-/sys/class/firmware/dell_rbu/data
 /sys/devices/platform/dell_rbu/image_type
 /sys/devices/platform/dell_rbu/data
 /sys/devices/platform/dell_rbu/packet_size
@@ -60,7 +58,7 @@ added together should match the specified packet_size. This 
makes one
 packet, the user needs to create more such packets out of the entire BIOS
 image file and then arrange all these packets back to back in to one single
 file.
-This file is then copied to /sys/class/firmware/dell_rbu/data.
+This file is then copied to /lib/firmware/dell_rbu.
 Once this file gets to the driver, the driver extracts packet_size data from
 the file and spreads it across the physical memory in contiguous packet_sized
 space.
@@ -70,29 +68,13 @@ In monolithic update the user simply get the BIOS image 
(.hdr file) and copies
 to the data file as is without any change to the BIOS image itself.
 
 Do the steps below to download the BIOS image.
-1) echo 1 > /sys/class/firmware/dell_rbu/loading
-2) cp bios_image.hdr /sys/class/firmware/dell_rbu/data
-3) echo 0 > /sys/class/firmware/dell_rbu/loading
-
-The /sys/class/firmware/dell_rbu/ entries will remain till the following is
-done.
-echo -1 > /sys/class/firmware/dell_rbu/loading
-Until this step is completed the driver cannot be unloaded.
-Also echoing either mono, packet or init in to image_type will free up the
-memory allocated by the driver.
-
-If a user by accident executes steps 1 and 3 above without executing step 2;
-it will make the /sys/class/firmware/dell_rbu/ entries disappear.
-The entries can be recreated by doing the following
-echo init > /sys/devices/platform/dell_rbu/image_type
-NOTE: echoing init in image_type does not change it original value.
+1) Prepare BIOS image and place in /lib/firmware/dell_rbu
+2) echo "init" > /sys/devices/platform/dell_rbu/packet_type
 
 Also the driver provides /sys/devices/platform/dell_rbu/data readonly file to
 read back the image downloaded.
 
 NOTE:
-This driver requires a patch for firmware_class.c which has the modified
-request_firmware_nowait function.
 Also after updating the BIOS image a user mode application needs to execute
 code which sends the BIOS update request to the BIOS. So on the next reboot
 the BIOS knows about the new image downloaded and it updates itself.
-- 
2.7.4



[PATCH v2 2/3] dell_rbu: Update documentation

2016-05-16 Thread Mario Limonciello
Signed-off-by: Mario Limonciello 
---
 Documentation/dell_rbu.txt | 24 +++-
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt
index d262e22..b2714e6 100644
--- a/Documentation/dell_rbu.txt
+++ b/Documentation/dell_rbu.txt
@@ -31,8 +31,6 @@ The user should not unload the rbu driver after downloading 
the BIOS image
 or updating.
 
 The driver load creates the following directories under the /sys file system.
-/sys/class/firmware/dell_rbu/loading
-/sys/class/firmware/dell_rbu/data
 /sys/devices/platform/dell_rbu/image_type
 /sys/devices/platform/dell_rbu/data
 /sys/devices/platform/dell_rbu/packet_size
@@ -60,7 +58,7 @@ added together should match the specified packet_size. This 
makes one
 packet, the user needs to create more such packets out of the entire BIOS
 image file and then arrange all these packets back to back in to one single
 file.
-This file is then copied to /sys/class/firmware/dell_rbu/data.
+This file is then copied to /lib/firmware/dell_rbu.
 Once this file gets to the driver, the driver extracts packet_size data from
 the file and spreads it across the physical memory in contiguous packet_sized
 space.
@@ -70,29 +68,13 @@ In monolithic update the user simply get the BIOS image 
(.hdr file) and copies
 to the data file as is without any change to the BIOS image itself.
 
 Do the steps below to download the BIOS image.
-1) echo 1 > /sys/class/firmware/dell_rbu/loading
-2) cp bios_image.hdr /sys/class/firmware/dell_rbu/data
-3) echo 0 > /sys/class/firmware/dell_rbu/loading
-
-The /sys/class/firmware/dell_rbu/ entries will remain till the following is
-done.
-echo -1 > /sys/class/firmware/dell_rbu/loading
-Until this step is completed the driver cannot be unloaded.
-Also echoing either mono, packet or init in to image_type will free up the
-memory allocated by the driver.
-
-If a user by accident executes steps 1 and 3 above without executing step 2;
-it will make the /sys/class/firmware/dell_rbu/ entries disappear.
-The entries can be recreated by doing the following
-echo init > /sys/devices/platform/dell_rbu/image_type
-NOTE: echoing init in image_type does not change it original value.
+1) Prepare BIOS image and place in /lib/firmware/dell_rbu
+2) echo "init" > /sys/devices/platform/dell_rbu/packet_type
 
 Also the driver provides /sys/devices/platform/dell_rbu/data readonly file to
 read back the image downloaded.
 
 NOTE:
-This driver requires a patch for firmware_class.c which has the modified
-request_firmware_nowait function.
 Also after updating the BIOS image a user mode application needs to execute
 code which sends the BIOS update request to the BIOS. So on the next reboot
 the BIOS knows about the new image downloaded and it updates itself.
-- 
2.7.4



[PATCH v2 1/3] dell_rbu: Don't fallback to userhelper

2016-05-16 Thread Mario Limonciello
when loading firmware dell_rbu previously would allow a userspace
application to craft the payload after dell_rbu was loaded and
abuse the udev userspace API.

Instead require the payload to be crafted and placed in
/lib/firmware/dell_rbu ahead of time.

This adjusts dell_rbu to immediately load the firmware from
/lib/firmware/dell_rbu when "init" is passed into image_type using the
kernel helper.

Signed-off-by: Mario Limonciello 
---
 drivers/firmware/Kconfig| 1 -
 drivers/firmware/dell_rbu.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 6664f11..85afe59 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -86,7 +86,6 @@ config DELL_RBU
tristate "BIOS update support for DELL systems via sysfs"
depends on X86
select FW_LOADER
-   select FW_LOADER_USER_HELPER
help
 Say m if you want to have the option of updating the BIOS for your
 DELL system. Note you need a Dell OpenManage or Dell Update package 
(DUP)
diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c
index 2f452f1..77b2a77 100644
--- a/drivers/firmware/dell_rbu.c
+++ b/drivers/firmware/dell_rbu.c
@@ -620,7 +620,7 @@ static ssize_t write_rbu_image_type(struct file *filp, 
struct kobject *kobj,
if (!rbu_data.entry_created) {
spin_unlock(_data.lock);
req_firm_rc = request_firmware_nowait(THIS_MODULE,
-   FW_ACTION_NOHOTPLUG, "dell_rbu",
+   FW_ACTION_HOTPLUG, "dell_rbu",
_device->dev, GFP_KERNEL, ,
callbackfn_rbu);
if (req_firm_rc) {
-- 
2.7.4



[PATCH v2 1/3] dell_rbu: Don't fallback to userhelper

2016-05-16 Thread Mario Limonciello
when loading firmware dell_rbu previously would allow a userspace
application to craft the payload after dell_rbu was loaded and
abuse the udev userspace API.

Instead require the payload to be crafted and placed in
/lib/firmware/dell_rbu ahead of time.

This adjusts dell_rbu to immediately load the firmware from
/lib/firmware/dell_rbu when "init" is passed into image_type using the
kernel helper.

Signed-off-by: Mario Limonciello 
---
 drivers/firmware/Kconfig| 1 -
 drivers/firmware/dell_rbu.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 6664f11..85afe59 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -86,7 +86,6 @@ config DELL_RBU
tristate "BIOS update support for DELL systems via sysfs"
depends on X86
select FW_LOADER
-   select FW_LOADER_USER_HELPER
help
 Say m if you want to have the option of updating the BIOS for your
 DELL system. Note you need a Dell OpenManage or Dell Update package 
(DUP)
diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c
index 2f452f1..77b2a77 100644
--- a/drivers/firmware/dell_rbu.c
+++ b/drivers/firmware/dell_rbu.c
@@ -620,7 +620,7 @@ static ssize_t write_rbu_image_type(struct file *filp, 
struct kobject *kobj,
if (!rbu_data.entry_created) {
spin_unlock(_data.lock);
req_firm_rc = request_firmware_nowait(THIS_MODULE,
-   FW_ACTION_NOHOTPLUG, "dell_rbu",
+   FW_ACTION_HOTPLUG, "dell_rbu",
_device->dev, GFP_KERNEL, ,
callbackfn_rbu);
if (req_firm_rc) {
-- 
2.7.4



[PATCH v2 3/3] firmware_class: drop support for FW_LOADER_USER_HELPER_FALLBACK

2016-05-16 Thread Mario Limonciello
The last consumer of this is dell_rbu, and it no longer needs this
due to userspace changes in how updates are passed to the OS.

Signed-off-by: Mario Limonciello 
---
 drivers/base/Kconfig  | 14 --
 drivers/base/firmware_class.c |  9 ++---
 2 files changed, 2 insertions(+), 21 deletions(-)

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 98504ec..55e6ed1 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -151,20 +151,6 @@ config EXTRA_FIRMWARE_DIR
 config FW_LOADER_USER_HELPER
bool
 
-config FW_LOADER_USER_HELPER_FALLBACK
-   bool "Fallback user-helper invocation for firmware loading"
-   depends on FW_LOADER
-   select FW_LOADER_USER_HELPER
-   help
- This option enables / disables the invocation of user-helper
- (e.g. udev) for loading firmware files as a fallback after the
- direct file loading in kernel fails.  The user-mode helper is
- no longer required unless you have a special firmware file that
- resides in a non-standard path. Moreover, the udev support has
- been deprecated upstream.
-
- If you are unsure about this, say N here.
-
 config WANT_DEV_COREDUMP
bool
help
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 773fc30..3da4f31 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -106,11 +106,6 @@ static inline long firmware_loading_timeout(void)
 #else
 #define FW_OPT_USERHELPER  0
 #endif
-#ifdef CONFIG_FW_LOADER_USER_HELPER_FALLBACK
-#define FW_OPT_FALLBACKFW_OPT_USERHELPER
-#else
-#define FW_OPT_FALLBACK0
-#endif
 #define FW_OPT_NO_WARN (1U << 3)
 
 struct firmware_cache {
@@ -1185,7 +1180,7 @@ request_firmware(const struct firmware **firmware_p, 
const char *name,
/* Need to pin this module until return */
__module_get(THIS_MODULE);
ret = _request_firmware(firmware_p, name, device,
-   FW_OPT_UEVENT | FW_OPT_FALLBACK);
+   FW_OPT_UEVENT);
module_put(THIS_MODULE);
return ret;
 }
@@ -1301,7 +1296,7 @@ request_firmware_nowait(
fw_work->device = device;
fw_work->context = context;
fw_work->cont = cont;
-   fw_work->opt_flags = FW_OPT_NOWAIT | FW_OPT_FALLBACK |
+   fw_work->opt_flags = FW_OPT_NOWAIT |
(uevent ? FW_OPT_UEVENT : FW_OPT_USERHELPER);
 
if (!try_module_get(module)) {
-- 
2.7.4



[PATCH v2 3/3] firmware_class: drop support for FW_LOADER_USER_HELPER_FALLBACK

2016-05-16 Thread Mario Limonciello
The last consumer of this is dell_rbu, and it no longer needs this
due to userspace changes in how updates are passed to the OS.

Signed-off-by: Mario Limonciello 
---
 drivers/base/Kconfig  | 14 --
 drivers/base/firmware_class.c |  9 ++---
 2 files changed, 2 insertions(+), 21 deletions(-)

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 98504ec..55e6ed1 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -151,20 +151,6 @@ config EXTRA_FIRMWARE_DIR
 config FW_LOADER_USER_HELPER
bool
 
-config FW_LOADER_USER_HELPER_FALLBACK
-   bool "Fallback user-helper invocation for firmware loading"
-   depends on FW_LOADER
-   select FW_LOADER_USER_HELPER
-   help
- This option enables / disables the invocation of user-helper
- (e.g. udev) for loading firmware files as a fallback after the
- direct file loading in kernel fails.  The user-mode helper is
- no longer required unless you have a special firmware file that
- resides in a non-standard path. Moreover, the udev support has
- been deprecated upstream.
-
- If you are unsure about this, say N here.
-
 config WANT_DEV_COREDUMP
bool
help
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 773fc30..3da4f31 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -106,11 +106,6 @@ static inline long firmware_loading_timeout(void)
 #else
 #define FW_OPT_USERHELPER  0
 #endif
-#ifdef CONFIG_FW_LOADER_USER_HELPER_FALLBACK
-#define FW_OPT_FALLBACKFW_OPT_USERHELPER
-#else
-#define FW_OPT_FALLBACK0
-#endif
 #define FW_OPT_NO_WARN (1U << 3)
 
 struct firmware_cache {
@@ -1185,7 +1180,7 @@ request_firmware(const struct firmware **firmware_p, 
const char *name,
/* Need to pin this module until return */
__module_get(THIS_MODULE);
ret = _request_firmware(firmware_p, name, device,
-   FW_OPT_UEVENT | FW_OPT_FALLBACK);
+   FW_OPT_UEVENT);
module_put(THIS_MODULE);
return ret;
 }
@@ -1301,7 +1296,7 @@ request_firmware_nowait(
fw_work->device = device;
fw_work->context = context;
fw_work->cont = cont;
-   fw_work->opt_flags = FW_OPT_NOWAIT | FW_OPT_FALLBACK |
+   fw_work->opt_flags = FW_OPT_NOWAIT |
(uevent ? FW_OPT_UEVENT : FW_OPT_USERHELPER);
 
if (!try_module_get(module)) {
-- 
2.7.4



Re: Stack trace of csum_partial_copy_generic

2016-05-16 Thread Josh Poimboeuf
Hi Nikolay,

On Fri, May 13, 2016 at 02:07:47PM +0300, Nikolay Borisov wrote:
> Hello Josh, 
> 
> I'd like to ask you whether objtool is supposed to produce a 
> warning when arch/x86/lib/csum-copy_64.o (produced from 
> arch/x86/lib/csum-copy_64.S). Since I cannot see any specific 
> usage of rbp for defining a stackframe. I'm chasing against 
> poor performance of a network benchmark and this is what perf produces: 
> 
> # Overhead  Command  Shared Object
>  Symbol
> #   ...  .  
> .
> #
> 37.30%iperf  [kernel.kallsyms]  [k] 
> csum_partial_copy_generic
>   |
>   --- csum_partial_copy_generic
>  |  
>  |--99.98%-- 0x7f809108b7cd
>  |  |  
>  |  |--69.72%-- 0x2
>  |  |  
>  |   --30.28%-- 0x7f809108b7c2
>  | 0x2
>   --0.02%-- [...]
> 
> So this is not very helpful in tracing where this is being 
> called from. Presumably somewhere from the networking layer. So 
> should objtool catch this or since csum_partial_copy_generic is a leaf
> function reliable stack trace isn't needed?

Right, since it's a leaf function, objtool ignores it and lets it do
whatever it wants with the frame pointer.

> Furthermore this function is called from C wrapper in
> csum-wrappers_64.c - shouldn't at least they be present in the
> callstack?

I suspect the problem is that it can't walk the stack because the
function overwrites the rbp register.  Try replacing all uses of rbp in
that function with another register.  r15?

(Another solution would be to tell perf to use DWARF unwinding instead
of frame pointers, but currently, kernel asm code doesn't have any DWARF
annotations.  I'm planning on adding support for that soon in the 4.8
timeframe by generating DWARF metadata using objtool.)

-- 
Josh


Re: Stack trace of csum_partial_copy_generic

2016-05-16 Thread Josh Poimboeuf
Hi Nikolay,

On Fri, May 13, 2016 at 02:07:47PM +0300, Nikolay Borisov wrote:
> Hello Josh, 
> 
> I'd like to ask you whether objtool is supposed to produce a 
> warning when arch/x86/lib/csum-copy_64.o (produced from 
> arch/x86/lib/csum-copy_64.S). Since I cannot see any specific 
> usage of rbp for defining a stackframe. I'm chasing against 
> poor performance of a network benchmark and this is what perf produces: 
> 
> # Overhead  Command  Shared Object
>  Symbol
> #   ...  .  
> .
> #
> 37.30%iperf  [kernel.kallsyms]  [k] 
> csum_partial_copy_generic
>   |
>   --- csum_partial_copy_generic
>  |  
>  |--99.98%-- 0x7f809108b7cd
>  |  |  
>  |  |--69.72%-- 0x2
>  |  |  
>  |   --30.28%-- 0x7f809108b7c2
>  | 0x2
>   --0.02%-- [...]
> 
> So this is not very helpful in tracing where this is being 
> called from. Presumably somewhere from the networking layer. So 
> should objtool catch this or since csum_partial_copy_generic is a leaf
> function reliable stack trace isn't needed?

Right, since it's a leaf function, objtool ignores it and lets it do
whatever it wants with the frame pointer.

> Furthermore this function is called from C wrapper in
> csum-wrappers_64.c - shouldn't at least they be present in the
> callstack?

I suspect the problem is that it can't walk the stack because the
function overwrites the rbp register.  Try replacing all uses of rbp in
that function with another register.  r15?

(Another solution would be to tell perf to use DWARF unwinding instead
of frame pointers, but currently, kernel asm code doesn't have any DWARF
annotations.  I'm planning on adding support for that soon in the 4.8
timeframe by generating DWARF metadata using objtool.)

-- 
Josh


[PATCH v2] tpm: Factor out common startup code

2016-05-16 Thread Jason Gunthorpe
Provide some flags in tpm_class_ops to allow drivers to opt-in to the
common startup sequence. This is the sequence used by tpm_tis and
tpm_crb.

All drivers should set this flag.

Signed-off-by: Jason Gunthorpe 
Tested-by: Andrew Zamansky 
---
 drivers/char/tpm/st33zp24/st33zp24.c |  4 +---
 drivers/char/tpm/tpm-chip.c  | 15 ++
 drivers/char/tpm/tpm-interface.c | 27 
 drivers/char/tpm/tpm.h   |  2 ++
 drivers/char/tpm/tpm2-cmd.c  | 40 
 drivers/char/tpm/tpm_crb.c   | 10 +
 drivers/char/tpm/tpm_i2c_atmel.c |  6 +-
 drivers/char/tpm/tpm_i2c_infineon.c  |  4 +---
 drivers/char/tpm/tpm_i2c_nuvoton.c   |  7 +--
 drivers/char/tpm/tpm_tis.c   | 24 +-
 include/linux/tpm.h  |  6 ++
 11 files changed, 96 insertions(+), 49 deletions(-)

v2 has a little typo fix From Andrew in the call to tpm2_startup

diff --git a/drivers/char/tpm/st33zp24/st33zp24.c 
b/drivers/char/tpm/st33zp24/st33zp24.c
index 8d626784cd8d..4556c95f83cb 100644
--- a/drivers/char/tpm/st33zp24/st33zp24.c
+++ b/drivers/char/tpm/st33zp24/st33zp24.c
@@ -532,6 +532,7 @@ static bool st33zp24_req_canceled(struct tpm_chip *chip, u8 
status)
 }
 
 static const struct tpm_class_ops st33zp24_tpm = {
+   .flags = TPM_OPS_AUTO_STARTUP,
.send = st33zp24_send,
.recv = st33zp24_recv,
.cancel = st33zp24_cancel,
@@ -618,9 +619,6 @@ int st33zp24_probe(void *phy_id, const struct 
st33zp24_phy_ops *ops,
tpm_gen_interrupt(chip);
}
 
-   tpm_get_timeouts(chip);
-   tpm_do_selftest(chip);
-
return tpm_chip_register(chip);
 _tpm_clean_answer:
dev_info(>dev, "TPM initialization fail\n");
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 274dd0123237..9a36cedd94eb 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -223,6 +223,21 @@ int tpm_chip_register(struct tpm_chip *chip)
 {
int rc;
 
+   if (chip->ops->flags & TPM_OPS_PROBE_TPM2) {
+   rc = tpm2_probe(chip);
+   if (rc)
+   return rc;
+   }
+
+   if (chip->ops->flags & TPM_OPS_AUTO_STARTUP) {
+   if (chip->flags & TPM_CHIP_FLAG_TPM2)
+   rc = tpm2_auto_startup(chip);
+   else
+   rc = tpm1_auto_startup(chip);
+   if (rc)
+   return rc;
+   }
+
rc = tpm1_chip_register(chip);
if (rc)
return rc;
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index e2fa89c88304..4e6798ab3a90 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -842,6 +842,33 @@ int tpm_do_selftest(struct tpm_chip *chip)
 }
 EXPORT_SYMBOL_GPL(tpm_do_selftest);
 
+/**
+ * tpm1_auto_startup - Perform the standard automatic TPM initialization
+ * sequence
+ * @chip: TPM chip to use
+ *
+ * Returns 0 on success, < 0 in case of fatal error.
+ */
+int tpm1_auto_startup(struct tpm_chip *chip)
+{
+   int rc;
+
+   rc = tpm_get_timeouts(chip);
+   if (rc)
+   goto out;
+   rc = tpm_do_selftest(chip);
+   if (rc) {
+   dev_err(>dev, "TPM self test failed\n");
+   goto out;
+   }
+
+   return rc;
+out:
+   if (rc > 0)
+   rc = -ENODEV;
+   return rc;
+}
+
 int tpm_send(u32 chip_num, void *cmd, size_t buflen)
 {
struct tpm_chip *chip;
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 28b477e8da6a..a99105f1a5c4 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -501,6 +501,7 @@ ssize_t tpm_transmit_cmd(struct tpm_chip *chip, void *cmd, 
int len,
 const char *desc);
 extern int tpm_get_timeouts(struct tpm_chip *);
 extern void tpm_gen_interrupt(struct tpm_chip *);
+int tpm1_auto_startup(struct tpm_chip *chip);
 extern int tpm_do_selftest(struct tpm_chip *);
 extern unsigned long tpm_calc_ordinal_duration(struct tpm_chip *, u32);
 extern int tpm_pm_suspend(struct device *);
@@ -539,6 +540,7 @@ int tpm2_unseal_trusted(struct tpm_chip *chip,
 ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id,
u32 *value, const char *desc);
 
+int tpm2_auto_startup(struct tpm_chip *chip);
 extern int tpm2_startup(struct tpm_chip *chip, u16 startup_type);
 extern void tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type);
 extern unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *, u32);
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index b28e4da3d2cf..984190e551a1 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -943,3 +943,43 @@ int tpm2_probe(struct tpm_chip *chip)
return 0;
 }
 

[PATCH v2] tpm: Factor out common startup code

2016-05-16 Thread Jason Gunthorpe
Provide some flags in tpm_class_ops to allow drivers to opt-in to the
common startup sequence. This is the sequence used by tpm_tis and
tpm_crb.

All drivers should set this flag.

Signed-off-by: Jason Gunthorpe 
Tested-by: Andrew Zamansky 
---
 drivers/char/tpm/st33zp24/st33zp24.c |  4 +---
 drivers/char/tpm/tpm-chip.c  | 15 ++
 drivers/char/tpm/tpm-interface.c | 27 
 drivers/char/tpm/tpm.h   |  2 ++
 drivers/char/tpm/tpm2-cmd.c  | 40 
 drivers/char/tpm/tpm_crb.c   | 10 +
 drivers/char/tpm/tpm_i2c_atmel.c |  6 +-
 drivers/char/tpm/tpm_i2c_infineon.c  |  4 +---
 drivers/char/tpm/tpm_i2c_nuvoton.c   |  7 +--
 drivers/char/tpm/tpm_tis.c   | 24 +-
 include/linux/tpm.h  |  6 ++
 11 files changed, 96 insertions(+), 49 deletions(-)

v2 has a little typo fix From Andrew in the call to tpm2_startup

diff --git a/drivers/char/tpm/st33zp24/st33zp24.c 
b/drivers/char/tpm/st33zp24/st33zp24.c
index 8d626784cd8d..4556c95f83cb 100644
--- a/drivers/char/tpm/st33zp24/st33zp24.c
+++ b/drivers/char/tpm/st33zp24/st33zp24.c
@@ -532,6 +532,7 @@ static bool st33zp24_req_canceled(struct tpm_chip *chip, u8 
status)
 }
 
 static const struct tpm_class_ops st33zp24_tpm = {
+   .flags = TPM_OPS_AUTO_STARTUP,
.send = st33zp24_send,
.recv = st33zp24_recv,
.cancel = st33zp24_cancel,
@@ -618,9 +619,6 @@ int st33zp24_probe(void *phy_id, const struct 
st33zp24_phy_ops *ops,
tpm_gen_interrupt(chip);
}
 
-   tpm_get_timeouts(chip);
-   tpm_do_selftest(chip);
-
return tpm_chip_register(chip);
 _tpm_clean_answer:
dev_info(>dev, "TPM initialization fail\n");
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 274dd0123237..9a36cedd94eb 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -223,6 +223,21 @@ int tpm_chip_register(struct tpm_chip *chip)
 {
int rc;
 
+   if (chip->ops->flags & TPM_OPS_PROBE_TPM2) {
+   rc = tpm2_probe(chip);
+   if (rc)
+   return rc;
+   }
+
+   if (chip->ops->flags & TPM_OPS_AUTO_STARTUP) {
+   if (chip->flags & TPM_CHIP_FLAG_TPM2)
+   rc = tpm2_auto_startup(chip);
+   else
+   rc = tpm1_auto_startup(chip);
+   if (rc)
+   return rc;
+   }
+
rc = tpm1_chip_register(chip);
if (rc)
return rc;
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index e2fa89c88304..4e6798ab3a90 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -842,6 +842,33 @@ int tpm_do_selftest(struct tpm_chip *chip)
 }
 EXPORT_SYMBOL_GPL(tpm_do_selftest);
 
+/**
+ * tpm1_auto_startup - Perform the standard automatic TPM initialization
+ * sequence
+ * @chip: TPM chip to use
+ *
+ * Returns 0 on success, < 0 in case of fatal error.
+ */
+int tpm1_auto_startup(struct tpm_chip *chip)
+{
+   int rc;
+
+   rc = tpm_get_timeouts(chip);
+   if (rc)
+   goto out;
+   rc = tpm_do_selftest(chip);
+   if (rc) {
+   dev_err(>dev, "TPM self test failed\n");
+   goto out;
+   }
+
+   return rc;
+out:
+   if (rc > 0)
+   rc = -ENODEV;
+   return rc;
+}
+
 int tpm_send(u32 chip_num, void *cmd, size_t buflen)
 {
struct tpm_chip *chip;
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 28b477e8da6a..a99105f1a5c4 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -501,6 +501,7 @@ ssize_t tpm_transmit_cmd(struct tpm_chip *chip, void *cmd, 
int len,
 const char *desc);
 extern int tpm_get_timeouts(struct tpm_chip *);
 extern void tpm_gen_interrupt(struct tpm_chip *);
+int tpm1_auto_startup(struct tpm_chip *chip);
 extern int tpm_do_selftest(struct tpm_chip *);
 extern unsigned long tpm_calc_ordinal_duration(struct tpm_chip *, u32);
 extern int tpm_pm_suspend(struct device *);
@@ -539,6 +540,7 @@ int tpm2_unseal_trusted(struct tpm_chip *chip,
 ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id,
u32 *value, const char *desc);
 
+int tpm2_auto_startup(struct tpm_chip *chip);
 extern int tpm2_startup(struct tpm_chip *chip, u16 startup_type);
 extern void tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type);
 extern unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *, u32);
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index b28e4da3d2cf..984190e551a1 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -943,3 +943,43 @@ int tpm2_probe(struct tpm_chip *chip)
return 0;
 }
 EXPORT_SYMBOL_GPL(tpm2_probe);
+
+/**
+ * tpm2_auto_startup - Perform the 

Re: SHA1-MB algorithm broken on latest kernel

2016-05-16 Thread Megha Dey
On Mon, 2016-05-16 at 09:44 -0500, Josh Poimboeuf wrote:
> On Fri, May 13, 2016 at 10:32:26AM -0700, Megha Dey wrote:
> > On Fri, 2016-05-13 at 07:51 +0200, Ingo Molnar wrote:
> > > * Herbert Xu  wrote:
> > > 
> > > > On Thu, May 12, 2016 at 04:31:06PM -0700, Megha Dey wrote:
> > > > > Hi,
> > > > >  
> > > > > When booting latest kernel with the CONFIG_CRYPTO_SHA1_MB enabled, I
> > > > > observe a panic.
> > > > >  
> > > > > After having a quick look, on reverting the following patches, I am 
> > > > > able
> > > > > to complete the booting process.
> > > > > aec4d0e301f17bb143341c82cc44685b8af0b945
> > > > > 8691ccd764f9ecc69a6812dfe76214c86ac9ba06
> > > > > 68874ac3304ade7ed5ebb12af00d6b9bbbca0a16
> > > > >  
> > > > > Of the 3 patches, aec4d0e301f17bb143341c82cc44685b8af0b945 seems 
> > > > > wrong.
> > > > > The r10 to r15 registers are used in sha1_x8_avx2.S, which is called
> > > > > from sha1_mb_mgr_flush_avx2.S.
> > > > >
> > > > > I do not think the functionality of the SHA1-MB crypto algorithm has
> > > > > been tested after applying these changes. (I am not sure if any of the
> > > > > other crypto algorithms have been affected by these changes).
> > > > 
> > > > Josh, Ingo:
> > > > 
> > > > Any ideas on this? Should we revert?
> > > 
> > > Yeah, I think so - although another option would be to standardize 
> > > sha1_x8_avx2() 
> > > - the problem is that it is a function that clobbers registers without 
> > > saving/restoring them, breaking the C function ABI. I realize it's 
> > > written in 
> > > assembly, but unless there are strong performance reasons to deviate from 
> > > the 
> > > regular calling convention it might make sense to fix that.
> > > 
> > > Do any warnings get generated after the revert, if you enable 
> > > CONFIG_STACK_VALIDATION=y?
> > 
> > After the revert and enabling CONFIG_STACK_VALIDATION:
> > arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.o: warning: objtool:
> > sha1_mb_mgr_flush_avx2()+0x20d: call without frame pointer save/setup
> > 
> > arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.o: warning: objtool:
> > sha1_mb_mgr_submit_avx2()+0x115: call without frame pointer save/setup
> 
> Megha,
> 
> Sorry for breaking it.  I completely missed the fact that the function
> calls sha1_x8_avx2() which clobbers registers.
> 
> If the performance penalty isn't too bad, I'll submit a patch to
> standardize sha1_x8_avx2() to follow the C ABI.
> 
> Do you have any tips for testing this code?  I've tried using the tcrypt
> module, but no luck.
> 
Josh,
Build the kernel with the following configs:
CONFIG_CRYPTO_SHA1_MB=y
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=n
There was a kernel panic while booting. 
So if after applying your new patch, we are able to get complete the
boot, then we are good.

Could you please send a copy of the patch, I could test it on my end
too. 





Re: SHA1-MB algorithm broken on latest kernel

2016-05-16 Thread Megha Dey
On Mon, 2016-05-16 at 09:44 -0500, Josh Poimboeuf wrote:
> On Fri, May 13, 2016 at 10:32:26AM -0700, Megha Dey wrote:
> > On Fri, 2016-05-13 at 07:51 +0200, Ingo Molnar wrote:
> > > * Herbert Xu  wrote:
> > > 
> > > > On Thu, May 12, 2016 at 04:31:06PM -0700, Megha Dey wrote:
> > > > > Hi,
> > > > >  
> > > > > When booting latest kernel with the CONFIG_CRYPTO_SHA1_MB enabled, I
> > > > > observe a panic.
> > > > >  
> > > > > After having a quick look, on reverting the following patches, I am 
> > > > > able
> > > > > to complete the booting process.
> > > > > aec4d0e301f17bb143341c82cc44685b8af0b945
> > > > > 8691ccd764f9ecc69a6812dfe76214c86ac9ba06
> > > > > 68874ac3304ade7ed5ebb12af00d6b9bbbca0a16
> > > > >  
> > > > > Of the 3 patches, aec4d0e301f17bb143341c82cc44685b8af0b945 seems 
> > > > > wrong.
> > > > > The r10 to r15 registers are used in sha1_x8_avx2.S, which is called
> > > > > from sha1_mb_mgr_flush_avx2.S.
> > > > >
> > > > > I do not think the functionality of the SHA1-MB crypto algorithm has
> > > > > been tested after applying these changes. (I am not sure if any of the
> > > > > other crypto algorithms have been affected by these changes).
> > > > 
> > > > Josh, Ingo:
> > > > 
> > > > Any ideas on this? Should we revert?
> > > 
> > > Yeah, I think so - although another option would be to standardize 
> > > sha1_x8_avx2() 
> > > - the problem is that it is a function that clobbers registers without 
> > > saving/restoring them, breaking the C function ABI. I realize it's 
> > > written in 
> > > assembly, but unless there are strong performance reasons to deviate from 
> > > the 
> > > regular calling convention it might make sense to fix that.
> > > 
> > > Do any warnings get generated after the revert, if you enable 
> > > CONFIG_STACK_VALIDATION=y?
> > 
> > After the revert and enabling CONFIG_STACK_VALIDATION:
> > arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.o: warning: objtool:
> > sha1_mb_mgr_flush_avx2()+0x20d: call without frame pointer save/setup
> > 
> > arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.o: warning: objtool:
> > sha1_mb_mgr_submit_avx2()+0x115: call without frame pointer save/setup
> 
> Megha,
> 
> Sorry for breaking it.  I completely missed the fact that the function
> calls sha1_x8_avx2() which clobbers registers.
> 
> If the performance penalty isn't too bad, I'll submit a patch to
> standardize sha1_x8_avx2() to follow the C ABI.
> 
> Do you have any tips for testing this code?  I've tried using the tcrypt
> module, but no luck.
> 
Josh,
Build the kernel with the following configs:
CONFIG_CRYPTO_SHA1_MB=y
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=n
There was a kernel panic while booting. 
So if after applying your new patch, we are able to get complete the
boot, then we are good.

Could you please send a copy of the patch, I could test it on my end
too. 





Re: [PATCH v5 0/4] x86, boot: KASLR memory randomization

2016-05-16 Thread Thomas Garnier
Any feedback on the patch? Ingo? Kees?

Kees mentioned he will take care of the build warning on the KASLR
refactor (the function is not used right now).

Thanks,
Thomas

On Thu, May 12, 2016 at 12:28 PM, Thomas Garnier  wrote:
> This is PATCH v5 for KASLR memory implementation for x86_64.
>
> Recent changes:
> Add performance information on commit.
> Add details on PUD alignment.
> Add information on testing against the KASLR bypass exploit.
> Rebase on next-20160511 and merge recent KASLR changes.
> Integrate feedback from Kees.
>
> ***Background:
> The current implementation of KASLR randomizes only the base address of
> the kernel and its modules. Research was published showing that static
> memory can be overwitten to elevate privileges bypassing KASLR.
>
> In more details:
>
>The physical memory mapping holds most allocations from boot and heap
>allocators. Knowning the base address and physical memory size, an
>attacker can deduce the PDE virtual address for the vDSO memory page.
>This attack was demonstrated at CanSecWest 2016, in the "Getting
>Physical Extreme Abuse of Intel Based Paged Systems"
>https://goo.gl/ANpWdV (see second part of the presentation). The
>exploits used against Linux worked successfuly against 4.6+ but fail
>with KASLR memory enabled (https://goo.gl/iTtXMJ). Similar research
>was done at Google leading to this patch proposal. Variants exists to
>overwrite /proc or /sys objects ACLs leading to elevation of privileges.
>These variants were tested against 4.6+.
>
> This set of patches randomizes base address and padding of three
> major memory sections (physical memory mapping, vmalloc & vmemmap).
> It mitigates exploits relying on predictable kernel addresses. This
> feature can be enabled with the CONFIG_RANDOMIZE_MEMORY option.
>
> Padding for the memory hotplug support is managed by
> CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING. The default value is 10
> terabytes.
>
> The patches were tested on qemu & physical machines. Xen compatibility was
> also verified. Multiple reboots were used to verify entropy for each
> memory section.
>
> ***Problems that needed solving:
>  - The three target memory sections are never at the same place between
>boots.
>  - The physical memory mapping can use a virtual address not aligned on
>the PGD page table.
>  - Have good entropy early at boot before get_random_bytes is available.
>  - Add optional padding for memory hotplug compatibility.
>
> ***Parts:
>  - The first part prepares for the KASLR memory randomization by
>refactoring entropy functions used by the current implementation and
>support PUD level virtual addresses for physical mapping.
>(Patches 01-02)
>  - The second part implements the KASLR memory randomization for all
>sections mentioned.
>(Patch 03)
>  - The third part adds support for memory hotplug by adding an option to
>define the padding used between the physical memory mapping section
>and the others.
>(Patch 04)
>
> Performance data:
>
> Kernbench shows almost no difference (-+ less than 1%):
>
> Before:
>
> Average Optimal load -j 12 Run (std deviation):
> Elapsed Time 102.63 (1.2695)
> User Time 1034.89 (1.18115)
> System Time 87.056 (0.456416)
> Percent CPU 1092.9 (13.892)
> Context Switches 199805 (3455.33)
> Sleeps 97907.8 (900.636)
>
> After:
>
> Average Optimal load -j 12 Run (std deviation):
> Elapsed Time 102.489 (1.10636)
> User Time 1034.86 (1.36053)
> System Time 87.764 (0.49345)
> Percent CPU 1095 (12.7715)
> Context Switches 199036 (4298.1)
> Sleeps 97681.6 (1031.11)
>
> Hackbench shows 0% difference on average (hackbench 90
> repeated 10 times):
>
> attemp,before,after
> 1,0.076,0.069
> 2,0.072,0.069
> 3,0.066,0.066
> 4,0.066,0.068
> 5,0.066,0.067
> 6,0.066,0.069
> 7,0.067,0.066
> 8,0.063,0.067
> 9,0.067,0.065
> 10,0.068,0.071
> average,0.0677,0.0677
>
> Thanks!
>


Re: [PATCHv8 resend 2/2] selftest/x86: add mremap vdso test

2016-05-16 Thread Andy Lutomirski
On Mon, May 16, 2016 at 9:24 AM, Dmitry Safonov  wrote:
> On 05/16/2016 04:54 PM, Ingo Molnar wrote:
>>
>>
>> * Dmitry Safonov  wrote:
>>
>>> Should print on success:
>>> [root@localhost ~]# ./test_mremap_vdso_32
>>> AT_SYSINFO_EHDR is 0xf773f000
>>> [NOTE]  Moving vDSO: [f773f000, f774] -> [a00, a001000]
>>> [OK]
>>> Or segfault if landing was bad (before patches):
>>> [root@localhost ~]# ./test_mremap_vdso_32
>>> AT_SYSINFO_EHDR is 0xf774f000
>>> [NOTE]  Moving vDSO: [f774f000, f775] -> [a00, a001000]
>>> Segmentation fault (core dumped)
>>
>>
>> Can the segfault be caught and recovered from, to print a proper failure
>> message?
>
>
> Will add segfault handler, thanks.
>

It may be more complicated that that.  Glibc is likely to explode if
this happens, and the headers are sufficiently screwed up that it's
awkward to bypass glibc and call rt_sigaction directly.  I have a test
that does the latter, though, so it's at least possible, but I'm
unconvinced it's worth it just for an error message.

-- 
Andy Lutomirski
AMA Capital Management, LLC


I am still waiting for your response to my numerous un-replied emails to you concerning your family inheritance fund ($4.6 million dollars). I seek your assistance and I assured of your capability to

2016-05-16 Thread Johnson Morgan



Re: [PATCH v5 0/4] x86, boot: KASLR memory randomization

2016-05-16 Thread Thomas Garnier
Any feedback on the patch? Ingo? Kees?

Kees mentioned he will take care of the build warning on the KASLR
refactor (the function is not used right now).

Thanks,
Thomas

On Thu, May 12, 2016 at 12:28 PM, Thomas Garnier  wrote:
> This is PATCH v5 for KASLR memory implementation for x86_64.
>
> Recent changes:
> Add performance information on commit.
> Add details on PUD alignment.
> Add information on testing against the KASLR bypass exploit.
> Rebase on next-20160511 and merge recent KASLR changes.
> Integrate feedback from Kees.
>
> ***Background:
> The current implementation of KASLR randomizes only the base address of
> the kernel and its modules. Research was published showing that static
> memory can be overwitten to elevate privileges bypassing KASLR.
>
> In more details:
>
>The physical memory mapping holds most allocations from boot and heap
>allocators. Knowning the base address and physical memory size, an
>attacker can deduce the PDE virtual address for the vDSO memory page.
>This attack was demonstrated at CanSecWest 2016, in the "Getting
>Physical Extreme Abuse of Intel Based Paged Systems"
>https://goo.gl/ANpWdV (see second part of the presentation). The
>exploits used against Linux worked successfuly against 4.6+ but fail
>with KASLR memory enabled (https://goo.gl/iTtXMJ). Similar research
>was done at Google leading to this patch proposal. Variants exists to
>overwrite /proc or /sys objects ACLs leading to elevation of privileges.
>These variants were tested against 4.6+.
>
> This set of patches randomizes base address and padding of three
> major memory sections (physical memory mapping, vmalloc & vmemmap).
> It mitigates exploits relying on predictable kernel addresses. This
> feature can be enabled with the CONFIG_RANDOMIZE_MEMORY option.
>
> Padding for the memory hotplug support is managed by
> CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING. The default value is 10
> terabytes.
>
> The patches were tested on qemu & physical machines. Xen compatibility was
> also verified. Multiple reboots were used to verify entropy for each
> memory section.
>
> ***Problems that needed solving:
>  - The three target memory sections are never at the same place between
>boots.
>  - The physical memory mapping can use a virtual address not aligned on
>the PGD page table.
>  - Have good entropy early at boot before get_random_bytes is available.
>  - Add optional padding for memory hotplug compatibility.
>
> ***Parts:
>  - The first part prepares for the KASLR memory randomization by
>refactoring entropy functions used by the current implementation and
>support PUD level virtual addresses for physical mapping.
>(Patches 01-02)
>  - The second part implements the KASLR memory randomization for all
>sections mentioned.
>(Patch 03)
>  - The third part adds support for memory hotplug by adding an option to
>define the padding used between the physical memory mapping section
>and the others.
>(Patch 04)
>
> Performance data:
>
> Kernbench shows almost no difference (-+ less than 1%):
>
> Before:
>
> Average Optimal load -j 12 Run (std deviation):
> Elapsed Time 102.63 (1.2695)
> User Time 1034.89 (1.18115)
> System Time 87.056 (0.456416)
> Percent CPU 1092.9 (13.892)
> Context Switches 199805 (3455.33)
> Sleeps 97907.8 (900.636)
>
> After:
>
> Average Optimal load -j 12 Run (std deviation):
> Elapsed Time 102.489 (1.10636)
> User Time 1034.86 (1.36053)
> System Time 87.764 (0.49345)
> Percent CPU 1095 (12.7715)
> Context Switches 199036 (4298.1)
> Sleeps 97681.6 (1031.11)
>
> Hackbench shows 0% difference on average (hackbench 90
> repeated 10 times):
>
> attemp,before,after
> 1,0.076,0.069
> 2,0.072,0.069
> 3,0.066,0.066
> 4,0.066,0.068
> 5,0.066,0.067
> 6,0.066,0.069
> 7,0.067,0.066
> 8,0.063,0.067
> 9,0.067,0.065
> 10,0.068,0.071
> average,0.0677,0.0677
>
> Thanks!
>


Re: [PATCHv8 resend 2/2] selftest/x86: add mremap vdso test

2016-05-16 Thread Andy Lutomirski
On Mon, May 16, 2016 at 9:24 AM, Dmitry Safonov  wrote:
> On 05/16/2016 04:54 PM, Ingo Molnar wrote:
>>
>>
>> * Dmitry Safonov  wrote:
>>
>>> Should print on success:
>>> [root@localhost ~]# ./test_mremap_vdso_32
>>> AT_SYSINFO_EHDR is 0xf773f000
>>> [NOTE]  Moving vDSO: [f773f000, f774] -> [a00, a001000]
>>> [OK]
>>> Or segfault if landing was bad (before patches):
>>> [root@localhost ~]# ./test_mremap_vdso_32
>>> AT_SYSINFO_EHDR is 0xf774f000
>>> [NOTE]  Moving vDSO: [f774f000, f775] -> [a00, a001000]
>>> Segmentation fault (core dumped)
>>
>>
>> Can the segfault be caught and recovered from, to print a proper failure
>> message?
>
>
> Will add segfault handler, thanks.
>

It may be more complicated that that.  Glibc is likely to explode if
this happens, and the headers are sufficiently screwed up that it's
awkward to bypass glibc and call rt_sigaction directly.  I have a test
that does the latter, though, so it's at least possible, but I'm
unconvinced it's worth it just for an error message.

-- 
Andy Lutomirski
AMA Capital Management, LLC


I am still waiting for your response to my numerous un-replied emails to you concerning your family inheritance fund ($4.6 million dollars). I seek your assistance and I assured of your capability to

2016-05-16 Thread Johnson Morgan



Re: [PATCH] phy dp83867: depend on CONFIG_OF_MDIO

2016-05-16 Thread Dan Murphy
Alex

On 05/16/2016 12:57 PM, Alexander Graf wrote:
> Hi Dan,
>
> On 16.05.16 15:38, Dan Murphy wrote:
>> Alexander
>>
>> On 05/16/2016 06:28 AM, Alexander Graf wrote:
>>> The DP83867 phy driver doesn't actually work when CONFIG_OF_MDIO isn't 
>>> enabled.
>>> It simply passes the device tree test, but leaves all internal configuration
>>> initialized at 0. Then it configures the phy with those values and renders a
>>> previously working configuration useless.
>>>
>>> This patch makes sure that we only build the DP83867 phy code when
>>> CONFIG_OF_MDIO is set, to not run into that problem.
>>>
>>> Signed-off-by: Alexander Graf 
>>> ---
>>>  drivers/net/phy/Kconfig   | 1 +
>>>  drivers/net/phy/dp83867.c | 7 ---
>>>  2 files changed, 1 insertion(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
>>> index 6dad9a9..4265ad5 100644
>>> --- a/drivers/net/phy/Kconfig
>>> +++ b/drivers/net/phy/Kconfig
>>> @@ -148,6 +148,7 @@ config DP83848_PHY
>>>  
>>>  config DP83867_PHY
>>> tristate "Drivers for Texas Instruments DP83867 Gigabit PHY"
>>> +   depends on OF_MDIO
>>> ---help---
>>>   Currently supports the DP83867 PHY.
>>>  
>>> diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
>>> index 2afa61b..ff867ba 100644
>>> --- a/drivers/net/phy/dp83867.c
>>> +++ b/drivers/net/phy/dp83867.c
>>> @@ -99,7 +99,6 @@ static int dp83867_config_intr(struct phy_device *phydev)
>>> return phy_write(phydev, MII_DP83867_MICR, micr_status);
>>>  }
>>>  
>>> -#ifdef CONFIG_OF_MDIO
>>>  static int dp83867_of_init(struct phy_device *phydev)
>>>  {
>>> struct dp83867_private *dp83867 = phydev->priv;
>>> @@ -123,12 +122,6 @@ static int dp83867_of_init(struct phy_device *phydev)
>>> return of_property_read_u32(of_node, "ti,fifo-depth",
>>>>fifo_depth);
>>>  }
>>> -#else
>>> -static int dp83867_of_init(struct phy_device *phydev)
>>> -{
>>> -   return 0;
>>> -}
>>> -#endif /* CONFIG_OF_MDIO */
>>>  
>>>  static int dp83867_config_init(struct phy_device *phydev)
>>>  {
>> I don't think we want this to depend solely on OF_MDIO.
>>
>> The #else case should probably be coded to look at platform data, if
>> it exists.  I don't have any boards that still used platform data to test 
>> this
>> out so I did not feel comfortable adding code I could not test.
> Since there was no code to look at platform data, those boards would be
> broken just as well today, no? So at the end of the day, this change
> should be no regression for them.

As Andrew pointed out if you are not using RGMII you don't need internal delay 
or fifo_depth so making the driver dependent on OF_MDIO
does not make sense.

The DP83867 RGMII tx and rx delays and fifo should really be changed to 
optional parameters and only programmed if set.

Dan
>
> Alex


-- 
--
Dan Murphy



Re: [RFC v2 PATCH 0/8] VFS:userns: support portable root filesystems

2016-05-16 Thread Seth Forshee
On Mon, May 16, 2016 at 11:42:46AM -0500, Eric W. Biederman wrote:
> Seth Forshee  writes:
> 
> > On Sat, May 14, 2016 at 09:21:55PM -0500, Eric W. Biederman wrote:
> >>   I have slowly been working with Seth Forshee on these issues as
> >>   the last thing I want is to introduce more security bugs right now.
> >>   Seth being a braver man than I am has already merged his changes into
> >>   the Ubuntu kernel.
> >
> > Maybe not quite so brave as you think. I also threw on a patch to
> > disable the feature unless explicitly enabled by a sys admin.
> >
> >> James I think you are missing the fact that all filesystems already have
> >> the make_kuid and make_kgid calls right where the data comes off disk,
> >> and the from_kuid and from_kgid calls right where the on-disk data is
> >> being created just before it goes on disk.  Which means that the actual
> >> impact on filesystems of the translation is trivial.
> >
> > It is fairly simple but a there's bit more that just id conversions to
> > change. With ext4 I found that there were mount options which needed to
> > be restricted, some capability checks to update, and access to external
> > journal devices must be checked. In all it wasn't a whole lot of changes
> > to the filesystem though. Fuse was a bit more involved, but the
> > complexities there won't apply to other filesystems.
> >
> >> Djalal if you could work with Seth I think that would be very useful.  I
> >> know I am dragging my heels there but I really hope I can dig in and get
> >> everything reviewed and merged soonish.
> >
> > That would make me very happy :-)
> 
> It has missed this merge window :( But I am hoping with am aiming to
> review them and get your patches (or modified versions of your patches)
> into my tree as soon after rc1 as humanly possible.
> 
> Part of that will have to be the fix for mqueuefs, that Docker just hit.

Yeah, I've got a patch that's been tested to fix the bug, so I'll send
new patches which include that before long.

Seth



Re: CQ and RDMA READ/WRITE APIs

2016-05-16 Thread Doug Ledford
On 05/16/2016 01:46 PM, Linus Torvalds wrote:
> On Mon, May 16, 2016 at 7:51 AM, Doug Ledford  wrote:
>>
>> The linux kernel as a whole is, but individual files still retain their
>> separate copyright, they don't loose it just because they are shipped as
>> part of the larger kernel.
> 
> .. they do lose it if they have GPL'd code merged into them.
> 
> We do generally try pretty hard to respect dual licensing, though,
> just to make it easy to keep drivers that are intentionally shared
> with other projects still sharable.
> 
> That said, that is only true for individual drivers that started out
> that way. I missed the first part of the private discussion, but "new
> files into the subsystem" does not sound like that case, and them
> being GPL-only is pretty much the norm.

Agreed.  That was not my point of contention.

> That is particularly true if
> that new code came from other places in the kernel (or other GPLv2
> projects), where we don't even have a choice.

They were newly written by Christoph, so he has the right to license
them as he sees fit.

> In other words:
> 
>  - I _do_ heavily prefer that we keep dual-licensed drivers
> dual-licensed. It's not a _legal_ requirement, but it's certainly a
> matter of being polite.
> 
>If the original author of a driver dual-licensed it (or licensed it
> under something like a two-clause BSD license that can be converted to
> GPLv2), it's just bad form to ignore that original license.
> 
>  - the dual-license thing is _particularly_ true if the other license
> is actively used by developers who actually give back. If it's some
> kind of "we want to keep it dual-licensed without helping maintain
> it", I honestly don't give a shit any more.
> 
>IOW, if the people doing all the heavy lifting work on a particular
> file are GPL-only, then at that point there is nobody to be polite to
> any more.
> 
> Not knowing the details, I have a hard time making any sane judgement call.

In this particular case, the dual license is used by the OpenFabrics
Alliance.  They strip the RDMA stack in the kernel down to just the RDMA
stack files and ship those separate from the rest of the kernel, along
with the necessary user space stuff, and put the entire compilation
under the same dual GPL/BSD license.  That's what their OFED product is.

As I understand it, members of the OFA (Intel, Mellanox, Chelsio, etc.)
actually signed an agreement as part of their membership entry into OFA
that they would preserve that dual license when submitting code
upstream.  This was originally intended to make sure that the stack as a
whole could be used upstream, in distros, on switches, etc.  The idea
being that a unified stack that could be copied around would enhance
interoperability or something like that.

I can't speak to how actively used it is any more.  I think maybe on
switches or some other dedicated devices.  But, I was asked by the OFA
to try and preserve it.

In this particular case, Christoph wrote his code from scratch.  I'm not
concerned with it.  It was never dual licensed and need not be.  But he
did submit patches that modified existing dual license drivers to use
his new code and removed their own implementation of the same thing in
the process.  What used to be more or less functional drivers that could
be copied and used elsewhere will no longer be able to be copied in the
same way.  I'm just waiting for Sagi Grimberg to speak for iSER and for
Bart van Assche to speak for SRP and let me know that they are OK with
the change.  I think a patch set that will essentially change the
licensing nature of their code should carry their explicit approval of
the license change.

-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [RFC v2 PATCH 0/8] VFS:userns: support portable root filesystems

2016-05-16 Thread Seth Forshee
On Mon, May 16, 2016 at 11:42:46AM -0500, Eric W. Biederman wrote:
> Seth Forshee  writes:
> 
> > On Sat, May 14, 2016 at 09:21:55PM -0500, Eric W. Biederman wrote:
> >>   I have slowly been working with Seth Forshee on these issues as
> >>   the last thing I want is to introduce more security bugs right now.
> >>   Seth being a braver man than I am has already merged his changes into
> >>   the Ubuntu kernel.
> >
> > Maybe not quite so brave as you think. I also threw on a patch to
> > disable the feature unless explicitly enabled by a sys admin.
> >
> >> James I think you are missing the fact that all filesystems already have
> >> the make_kuid and make_kgid calls right where the data comes off disk,
> >> and the from_kuid and from_kgid calls right where the on-disk data is
> >> being created just before it goes on disk.  Which means that the actual
> >> impact on filesystems of the translation is trivial.
> >
> > It is fairly simple but a there's bit more that just id conversions to
> > change. With ext4 I found that there were mount options which needed to
> > be restricted, some capability checks to update, and access to external
> > journal devices must be checked. In all it wasn't a whole lot of changes
> > to the filesystem though. Fuse was a bit more involved, but the
> > complexities there won't apply to other filesystems.
> >
> >> Djalal if you could work with Seth I think that would be very useful.  I
> >> know I am dragging my heels there but I really hope I can dig in and get
> >> everything reviewed and merged soonish.
> >
> > That would make me very happy :-)
> 
> It has missed this merge window :( But I am hoping with am aiming to
> review them and get your patches (or modified versions of your patches)
> into my tree as soon after rc1 as humanly possible.
> 
> Part of that will have to be the fix for mqueuefs, that Docker just hit.

Yeah, I've got a patch that's been tested to fix the bug, so I'll send
new patches which include that before long.

Seth



Re: CQ and RDMA READ/WRITE APIs

2016-05-16 Thread Doug Ledford
On 05/16/2016 01:46 PM, Linus Torvalds wrote:
> On Mon, May 16, 2016 at 7:51 AM, Doug Ledford  wrote:
>>
>> The linux kernel as a whole is, but individual files still retain their
>> separate copyright, they don't loose it just because they are shipped as
>> part of the larger kernel.
> 
> .. they do lose it if they have GPL'd code merged into them.
> 
> We do generally try pretty hard to respect dual licensing, though,
> just to make it easy to keep drivers that are intentionally shared
> with other projects still sharable.
> 
> That said, that is only true for individual drivers that started out
> that way. I missed the first part of the private discussion, but "new
> files into the subsystem" does not sound like that case, and them
> being GPL-only is pretty much the norm.

Agreed.  That was not my point of contention.

> That is particularly true if
> that new code came from other places in the kernel (or other GPLv2
> projects), where we don't even have a choice.

They were newly written by Christoph, so he has the right to license
them as he sees fit.

> In other words:
> 
>  - I _do_ heavily prefer that we keep dual-licensed drivers
> dual-licensed. It's not a _legal_ requirement, but it's certainly a
> matter of being polite.
> 
>If the original author of a driver dual-licensed it (or licensed it
> under something like a two-clause BSD license that can be converted to
> GPLv2), it's just bad form to ignore that original license.
> 
>  - the dual-license thing is _particularly_ true if the other license
> is actively used by developers who actually give back. If it's some
> kind of "we want to keep it dual-licensed without helping maintain
> it", I honestly don't give a shit any more.
> 
>IOW, if the people doing all the heavy lifting work on a particular
> file are GPL-only, then at that point there is nobody to be polite to
> any more.
> 
> Not knowing the details, I have a hard time making any sane judgement call.

In this particular case, the dual license is used by the OpenFabrics
Alliance.  They strip the RDMA stack in the kernel down to just the RDMA
stack files and ship those separate from the rest of the kernel, along
with the necessary user space stuff, and put the entire compilation
under the same dual GPL/BSD license.  That's what their OFED product is.

As I understand it, members of the OFA (Intel, Mellanox, Chelsio, etc.)
actually signed an agreement as part of their membership entry into OFA
that they would preserve that dual license when submitting code
upstream.  This was originally intended to make sure that the stack as a
whole could be used upstream, in distros, on switches, etc.  The idea
being that a unified stack that could be copied around would enhance
interoperability or something like that.

I can't speak to how actively used it is any more.  I think maybe on
switches or some other dedicated devices.  But, I was asked by the OFA
to try and preserve it.

In this particular case, Christoph wrote his code from scratch.  I'm not
concerned with it.  It was never dual licensed and need not be.  But he
did submit patches that modified existing dual license drivers to use
his new code and removed their own implementation of the same thing in
the process.  What used to be more or less functional drivers that could
be copied and used elsewhere will no longer be able to be copied in the
same way.  I'm just waiting for Sagi Grimberg to speak for iSER and for
Bart van Assche to speak for SRP and let me know that they are OK with
the change.  I think a patch set that will essentially change the
licensing nature of their code should carry their explicit approval of
the license change.

-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [PATCH] phy dp83867: depend on CONFIG_OF_MDIO

2016-05-16 Thread Dan Murphy
Alex

On 05/16/2016 12:57 PM, Alexander Graf wrote:
> Hi Dan,
>
> On 16.05.16 15:38, Dan Murphy wrote:
>> Alexander
>>
>> On 05/16/2016 06:28 AM, Alexander Graf wrote:
>>> The DP83867 phy driver doesn't actually work when CONFIG_OF_MDIO isn't 
>>> enabled.
>>> It simply passes the device tree test, but leaves all internal configuration
>>> initialized at 0. Then it configures the phy with those values and renders a
>>> previously working configuration useless.
>>>
>>> This patch makes sure that we only build the DP83867 phy code when
>>> CONFIG_OF_MDIO is set, to not run into that problem.
>>>
>>> Signed-off-by: Alexander Graf 
>>> ---
>>>  drivers/net/phy/Kconfig   | 1 +
>>>  drivers/net/phy/dp83867.c | 7 ---
>>>  2 files changed, 1 insertion(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
>>> index 6dad9a9..4265ad5 100644
>>> --- a/drivers/net/phy/Kconfig
>>> +++ b/drivers/net/phy/Kconfig
>>> @@ -148,6 +148,7 @@ config DP83848_PHY
>>>  
>>>  config DP83867_PHY
>>> tristate "Drivers for Texas Instruments DP83867 Gigabit PHY"
>>> +   depends on OF_MDIO
>>> ---help---
>>>   Currently supports the DP83867 PHY.
>>>  
>>> diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
>>> index 2afa61b..ff867ba 100644
>>> --- a/drivers/net/phy/dp83867.c
>>> +++ b/drivers/net/phy/dp83867.c
>>> @@ -99,7 +99,6 @@ static int dp83867_config_intr(struct phy_device *phydev)
>>> return phy_write(phydev, MII_DP83867_MICR, micr_status);
>>>  }
>>>  
>>> -#ifdef CONFIG_OF_MDIO
>>>  static int dp83867_of_init(struct phy_device *phydev)
>>>  {
>>> struct dp83867_private *dp83867 = phydev->priv;
>>> @@ -123,12 +122,6 @@ static int dp83867_of_init(struct phy_device *phydev)
>>> return of_property_read_u32(of_node, "ti,fifo-depth",
>>>>fifo_depth);
>>>  }
>>> -#else
>>> -static int dp83867_of_init(struct phy_device *phydev)
>>> -{
>>> -   return 0;
>>> -}
>>> -#endif /* CONFIG_OF_MDIO */
>>>  
>>>  static int dp83867_config_init(struct phy_device *phydev)
>>>  {
>> I don't think we want this to depend solely on OF_MDIO.
>>
>> The #else case should probably be coded to look at platform data, if
>> it exists.  I don't have any boards that still used platform data to test 
>> this
>> out so I did not feel comfortable adding code I could not test.
> Since there was no code to look at platform data, those boards would be
> broken just as well today, no? So at the end of the day, this change
> should be no regression for them.

As Andrew pointed out if you are not using RGMII you don't need internal delay 
or fifo_depth so making the driver dependent on OF_MDIO
does not make sense.

The DP83867 RGMII tx and rx delays and fifo should really be changed to 
optional parameters and only programmed if set.

Dan
>
> Alex


-- 
--
Dan Murphy



Re: klp_task_patch: was: [RFC PATCH v2 17/18] livepatch: change to a per-task consistency model

2016-05-16 Thread Josh Poimboeuf
On Mon, May 09, 2016 at 02:23:03PM +0200, Petr Mladek wrote:
> On Fri 2016-05-06 07:38:55, Josh Poimboeuf wrote:
> > On Thu, May 05, 2016 at 01:57:01PM +0200, Petr Mladek wrote:
> > > I have missed that the two commands are called with preemption
> > > disabled. So, I had the following crazy scenario in mind:
> > > 
> > > 
> > > CPU0  CPU1
> > > 
> > > klp_enable_patch()
> > > 
> > >   klp_target_state = KLP_PATCHED;
> > > 
> > >   for_each_task()
> > >  set TIF_PENDING_PATCH
> > > 
> > >   # task 123
> > > 
> > >   if (klp_patch_pending(current)
> > > klp_patch_task(current)
> > > 
> > > clear TIF_PENDING_PATCH
> > > 
> > >   smp_rmb();
> > > 
> > >   # switch to assembly of
> > >   # klp_patch_task()
> > > 
> > >   mov klp_target_state, %r12
> > > 
> > >   # interrupt and schedule
> > >   # another task
> > > 
> > > 
> > >   klp_reverse_transition();
> > > 
> > > klp_target_state = KLP_UNPATCHED;
> > > 
> > > klt_try_to_complete_transition()
> > > 
> > >   task = 123;
> > >   if (task->patch_state == klp_target_state;
> > >  return 0;
> > > 
> > > => task 123 is in target state and does
> > > not block conversion
> > > 
> > >   klp_complete_transition()
> > > 
> > > 
> > >   # disable previous patch on the stack
> > >   klp_disable_patch();
> > > 
> > > klp_target_state = KLP_UNPATCHED;
> > >   
> > >   
> > >   # task 123 gets scheduled again
> > >   lea %r12, task->patch_state
> > > 
> > >   => it happily stores an outdated
> > >   state
> > > 
> > 
> > Thanks for the clear explanation, this helps a lot.
> > 
> > > This is why the two functions should get called with preemption
> > > disabled. We should document it at least. I imagine that we will
> > > use them later also in another context and nobody will remember
> > > this crazy scenario.
> > > 
> > > Well, even disabled preemption does not help. The process on
> > > CPU1 might be also interrupted by an NMI and do some long
> > > printk in it.
> > > 
> > > IMHO, the only safe approach is to call klp_patch_task()
> > > only for "current" on a safe place. Then this race is harmless.
> > > The switch happen on a safe place, so that it does not matter
> > > into which state the process is switched.
> > 
> > I'm not sure about this solution.  When klp_complete_transition() is
> > called, we need all tasks to be patched, for good.  We don't want any of
> > them to randomly switch to the wrong state at some later time in the
> > middle of a future patch operation.  How would changing klp_patch_task()
> > to only use "current" prevent that?
> 
> You are right that it is pity but it really should be safe because
> it is not entirely random.
> 
> If the race happens and assign an outdated value, there are two
> situations:
> 
> 1. It is assigned when there is not transition in the progress.
>Then it is OK because it will be ignored by the ftrace handler.
>The right state will be set before the next transition starts.
> 
> 2. It is assigned when some other transition is in progress.
>Then it is OK as long as the function is called from "current".
>The "wrong" state will be used consistently. It will switch
>to the right state on another safe state.

Maybe it would be safe, though I'm not entirely convinced.  Regardless I
think we should avoid these situations entirely because they create
windows for future bugs and races.

> > > By other words, the task state might be updated only
> > > 
> > >+ by the task itself on a safe place
> > >+ by other task when the updated on is sleeping on a safe place
> > > 
> > > This should be well documented and the API should help to avoid
> > > a misuse.
> > 
> > I think we could fix it to be safe for future callers who might not have
> > preemption disabled with a couple of changes to klp_patch_task():
> > disabling preemption and testing/clearing the TIF_PATCH_PENDING flag
> > before changing the patch state:
> > 
> >   void klp_patch_task(struct task_struct *task)
> >   {
> > preempt_disable();
> >   
> > if (test_and_clear_tsk_thread_flag(task, TIF_PATCH_PENDING))
> > task->patch_state = READ_ONCE(klp_target_state);
> >   
> > preempt_enable();
> >   }
> 
> It reduces the race window a bit but it is still there. For example,
> NMI still might add a huge delay between reading klp_target_state
> and assigning task->patch state.

Maybe you missed this paragraph from my last email:

| We would also need a synchronize_sched() after the patching is complete,
| either at the end of klp_try_complete_transition() or in
| 

Re: klp_task_patch: was: [RFC PATCH v2 17/18] livepatch: change to a per-task consistency model

2016-05-16 Thread Josh Poimboeuf
On Mon, May 09, 2016 at 02:23:03PM +0200, Petr Mladek wrote:
> On Fri 2016-05-06 07:38:55, Josh Poimboeuf wrote:
> > On Thu, May 05, 2016 at 01:57:01PM +0200, Petr Mladek wrote:
> > > I have missed that the two commands are called with preemption
> > > disabled. So, I had the following crazy scenario in mind:
> > > 
> > > 
> > > CPU0  CPU1
> > > 
> > > klp_enable_patch()
> > > 
> > >   klp_target_state = KLP_PATCHED;
> > > 
> > >   for_each_task()
> > >  set TIF_PENDING_PATCH
> > > 
> > >   # task 123
> > > 
> > >   if (klp_patch_pending(current)
> > > klp_patch_task(current)
> > > 
> > > clear TIF_PENDING_PATCH
> > > 
> > >   smp_rmb();
> > > 
> > >   # switch to assembly of
> > >   # klp_patch_task()
> > > 
> > >   mov klp_target_state, %r12
> > > 
> > >   # interrupt and schedule
> > >   # another task
> > > 
> > > 
> > >   klp_reverse_transition();
> > > 
> > > klp_target_state = KLP_UNPATCHED;
> > > 
> > > klt_try_to_complete_transition()
> > > 
> > >   task = 123;
> > >   if (task->patch_state == klp_target_state;
> > >  return 0;
> > > 
> > > => task 123 is in target state and does
> > > not block conversion
> > > 
> > >   klp_complete_transition()
> > > 
> > > 
> > >   # disable previous patch on the stack
> > >   klp_disable_patch();
> > > 
> > > klp_target_state = KLP_UNPATCHED;
> > >   
> > >   
> > >   # task 123 gets scheduled again
> > >   lea %r12, task->patch_state
> > > 
> > >   => it happily stores an outdated
> > >   state
> > > 
> > 
> > Thanks for the clear explanation, this helps a lot.
> > 
> > > This is why the two functions should get called with preemption
> > > disabled. We should document it at least. I imagine that we will
> > > use them later also in another context and nobody will remember
> > > this crazy scenario.
> > > 
> > > Well, even disabled preemption does not help. The process on
> > > CPU1 might be also interrupted by an NMI and do some long
> > > printk in it.
> > > 
> > > IMHO, the only safe approach is to call klp_patch_task()
> > > only for "current" on a safe place. Then this race is harmless.
> > > The switch happen on a safe place, so that it does not matter
> > > into which state the process is switched.
> > 
> > I'm not sure about this solution.  When klp_complete_transition() is
> > called, we need all tasks to be patched, for good.  We don't want any of
> > them to randomly switch to the wrong state at some later time in the
> > middle of a future patch operation.  How would changing klp_patch_task()
> > to only use "current" prevent that?
> 
> You are right that it is pity but it really should be safe because
> it is not entirely random.
> 
> If the race happens and assign an outdated value, there are two
> situations:
> 
> 1. It is assigned when there is not transition in the progress.
>Then it is OK because it will be ignored by the ftrace handler.
>The right state will be set before the next transition starts.
> 
> 2. It is assigned when some other transition is in progress.
>Then it is OK as long as the function is called from "current".
>The "wrong" state will be used consistently. It will switch
>to the right state on another safe state.

Maybe it would be safe, though I'm not entirely convinced.  Regardless I
think we should avoid these situations entirely because they create
windows for future bugs and races.

> > > By other words, the task state might be updated only
> > > 
> > >+ by the task itself on a safe place
> > >+ by other task when the updated on is sleeping on a safe place
> > > 
> > > This should be well documented and the API should help to avoid
> > > a misuse.
> > 
> > I think we could fix it to be safe for future callers who might not have
> > preemption disabled with a couple of changes to klp_patch_task():
> > disabling preemption and testing/clearing the TIF_PATCH_PENDING flag
> > before changing the patch state:
> > 
> >   void klp_patch_task(struct task_struct *task)
> >   {
> > preempt_disable();
> >   
> > if (test_and_clear_tsk_thread_flag(task, TIF_PATCH_PENDING))
> > task->patch_state = READ_ONCE(klp_target_state);
> >   
> > preempt_enable();
> >   }
> 
> It reduces the race window a bit but it is still there. For example,
> NMI still might add a huge delay between reading klp_target_state
> and assigning task->patch state.

Maybe you missed this paragraph from my last email:

| We would also need a synchronize_sched() after the patching is complete,
| either at the end of klp_try_complete_transition() or in
| 

Re: [v4.6-rc7-183-g1410b74e4061]

2016-05-16 Thread Sedat Dilek
On 5/16/16, Peter Zijlstra  wrote:
> On Mon, May 16, 2016 at 07:42:35PM +0200, Sedat Dilek wrote:
>
>> Unfortunately, I could not reproduce this again with none of my
>> 183-kernels.
>> When I first hit a "chain_key collision" issue, it was hard to redproduce,
>> so.
>> Any idea, how I can "force" this?
>
> Nope; I wish I knew, that'd be so much easier to work with :/
>
> I'm hoping someone will report a reproducer, even something that
> triggers once every 5-10 runs would be awesome.
>
> In any case, like I've explained before, nothing regressed as such, we
> only added this new warning under DEBUG_LOCKDEP because we want to
> better understand the condition that triggers it.
>
> If it bothers you, just turn off DEBUG_LOCKDEP and know that your kernel
> is as reliable as it was before. OTOH, if you do keep it on, please
> let me know if you can (semi) reliably trigger this, as I'd really like
> to have a better understanding.
>

OK, I keep checking my logs.

I refreshed your patch Ingo pointed me to.

But it fails like this (on top of Linux v4.6 final)...
[...]
  if [ "" = "-pg" ]; then if [ kernel/locking/mutex-debug.o !=
"scripts/mod/empty.o" ]; then ./scripts/recordmcount
"kernel/locking/mutex-debug.o"; fi; fi;
  mycompiler -Wp,-MD,kernel/locking/.lockdep.o.d  -nostdinc -isystem
/usr/lib/gcc/x86_64-linux-gnu/4.9/include -nostdinc -isystem
/usr/lib/gcc/x86_64-linux-gnu/4.9/include -I./arch/x86/include
-Iarch/x86/include/generated/uapi -Iarch/x86/include/generated
-Iinclude -I./arch/x86/include/uapi -Iarch/x86/include/generated/uapi
-I./include/uapi -Iinclude/generated/uapi -include
./include/linux/kconfig.h -D__KERNEL__ -Wall -Wundef
-Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common
-Werror-implicit-function-declaration -Wno-format-security -std=gnu89
-mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -falign-jumps=1
-falign-loops=1 -mno-80387 -mno-fp-ret-in-387
-mpreferred-stack-boundary=3 -mtune=generic -mno-red-zone
-mcmodel=kernel -funit-at-a-time -maccumulate-outgoing-args
-DCONFIG_X86_X32_ABI -DCONFIG_AS_CFI=1 -DCONFIG_AS_CFI_SIGNAL_FRAME=1
-DCONFIG_AS_CFI_SECTIONS=1 -DCONFIG_AS_FXSAVEQ=1 -DCONFIG_AS_SSSE3=1
-DCONFIG_AS_CRC32=1 -DCONFIG_AS_AVX=1 -DCONFIG_AS_AVX2=1 -pipe
-Wno-sign-compare -fno-asynchronous-unwind-tables
-fno-delete-null-pointer-checks -O2 --param=allow-store-data-races=0
-Wframe-larger-than=1024 -fno-stack-protector
-Wno-unused-but-set-variable -fno-omit-frame-pointer
-fno-optimize-sibling-calls -fno-var-tracking-assignments -mfentry
-DCC_USING_FENTRY -Wdeclaration-after-statement -Wno-pointer-sign
-fno-strict-overflow -fconserve-stack -Werror=implicit-int
-Werror=strict-prototypes -Werror=date-time -DCC_HAVE_ASM_GOTO
-D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(lockdep)"
-D"KBUILD_MODNAME=KBUILD_STR(lockdep)" -c -o
kernel/locking/.tmp_lockdep.o kernel/locking/lockdep.c
kernel/locking/lockdep.c: In function 'print_chain_keys_held_locks':
kernel/locking/lockdep.c:2034:2: error: too few arguments to function
'print_chain_key_iteration'
  print_chain_key_iteration(hlock_next->class_idx, chain_key);
  ^
kernel/locking/lockdep.c:2006:12: note: declared here
 static u64 print_chain_key_iteration(int class_idx, u64 chain_key,
u64 prev_key)
^
make[4]: *** [kernel/locking/lockdep.o] Error 1
make[3]: *** [kernel/locking] Error 2
make[2]: *** [kernel] Error 2
[...]

- Sedat -
From b953be255bfb46970c75950e297be836577bc525 Mon Sep 17 00:00:00 2001
From: Sedat Dilek 
Date: Mon, 16 May 2016 15:51:04 +0200
Subject: [PATCH] locking/lockdep: Some more additional chain_key collision
 information

From: Peter Zijlstra 

For more details see thread "[v4.6-rc7-183-g1410b74e4061]" at LKML [1].

Patch for testing from Peter Zijlstra see [2] and [3].

[1] http://marc.info/?t=14632178432=1=2
[2] http://marc.info/?l=linux-kernel=146339587506110=2
[3] https://lkml.org/lkml/2016/5/10/214

Cc: Wanpeng Li 
Cc: Alfredo Alvarez Fernandez 
Cc: Peter Zijlstra (Intel) 
Cc: Linus Torvalds 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Sedat Dilek 
Cc: Ted Tso 
Cc: LKML 
Cc: the arch/x86 maintainers 
Cc: linux-fsde...@vger.kernel.org
---
 kernel/locking/lockdep.c | 31 +++
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 78c1c0ee6dc1..5dc21eb101b0 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2003,13 +2003,14 @@ static inline int get_first_held_lock(struct task_struct *curr,
 /*
  * Returns the next chain_key iteration
  */
-static u64 print_chain_key_iteration(int class_idx, u64 chain_key)
+static u64 print_chain_key_iteration(int class_idx, u64 

Re: [v4.6-rc7-183-g1410b74e4061]

2016-05-16 Thread Sedat Dilek
On 5/16/16, Peter Zijlstra  wrote:
> On Mon, May 16, 2016 at 07:42:35PM +0200, Sedat Dilek wrote:
>
>> Unfortunately, I could not reproduce this again with none of my
>> 183-kernels.
>> When I first hit a "chain_key collision" issue, it was hard to redproduce,
>> so.
>> Any idea, how I can "force" this?
>
> Nope; I wish I knew, that'd be so much easier to work with :/
>
> I'm hoping someone will report a reproducer, even something that
> triggers once every 5-10 runs would be awesome.
>
> In any case, like I've explained before, nothing regressed as such, we
> only added this new warning under DEBUG_LOCKDEP because we want to
> better understand the condition that triggers it.
>
> If it bothers you, just turn off DEBUG_LOCKDEP and know that your kernel
> is as reliable as it was before. OTOH, if you do keep it on, please
> let me know if you can (semi) reliably trigger this, as I'd really like
> to have a better understanding.
>

OK, I keep checking my logs.

I refreshed your patch Ingo pointed me to.

But it fails like this (on top of Linux v4.6 final)...
[...]
  if [ "" = "-pg" ]; then if [ kernel/locking/mutex-debug.o !=
"scripts/mod/empty.o" ]; then ./scripts/recordmcount
"kernel/locking/mutex-debug.o"; fi; fi;
  mycompiler -Wp,-MD,kernel/locking/.lockdep.o.d  -nostdinc -isystem
/usr/lib/gcc/x86_64-linux-gnu/4.9/include -nostdinc -isystem
/usr/lib/gcc/x86_64-linux-gnu/4.9/include -I./arch/x86/include
-Iarch/x86/include/generated/uapi -Iarch/x86/include/generated
-Iinclude -I./arch/x86/include/uapi -Iarch/x86/include/generated/uapi
-I./include/uapi -Iinclude/generated/uapi -include
./include/linux/kconfig.h -D__KERNEL__ -Wall -Wundef
-Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common
-Werror-implicit-function-declaration -Wno-format-security -std=gnu89
-mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -falign-jumps=1
-falign-loops=1 -mno-80387 -mno-fp-ret-in-387
-mpreferred-stack-boundary=3 -mtune=generic -mno-red-zone
-mcmodel=kernel -funit-at-a-time -maccumulate-outgoing-args
-DCONFIG_X86_X32_ABI -DCONFIG_AS_CFI=1 -DCONFIG_AS_CFI_SIGNAL_FRAME=1
-DCONFIG_AS_CFI_SECTIONS=1 -DCONFIG_AS_FXSAVEQ=1 -DCONFIG_AS_SSSE3=1
-DCONFIG_AS_CRC32=1 -DCONFIG_AS_AVX=1 -DCONFIG_AS_AVX2=1 -pipe
-Wno-sign-compare -fno-asynchronous-unwind-tables
-fno-delete-null-pointer-checks -O2 --param=allow-store-data-races=0
-Wframe-larger-than=1024 -fno-stack-protector
-Wno-unused-but-set-variable -fno-omit-frame-pointer
-fno-optimize-sibling-calls -fno-var-tracking-assignments -mfentry
-DCC_USING_FENTRY -Wdeclaration-after-statement -Wno-pointer-sign
-fno-strict-overflow -fconserve-stack -Werror=implicit-int
-Werror=strict-prototypes -Werror=date-time -DCC_HAVE_ASM_GOTO
-D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(lockdep)"
-D"KBUILD_MODNAME=KBUILD_STR(lockdep)" -c -o
kernel/locking/.tmp_lockdep.o kernel/locking/lockdep.c
kernel/locking/lockdep.c: In function 'print_chain_keys_held_locks':
kernel/locking/lockdep.c:2034:2: error: too few arguments to function
'print_chain_key_iteration'
  print_chain_key_iteration(hlock_next->class_idx, chain_key);
  ^
kernel/locking/lockdep.c:2006:12: note: declared here
 static u64 print_chain_key_iteration(int class_idx, u64 chain_key,
u64 prev_key)
^
make[4]: *** [kernel/locking/lockdep.o] Error 1
make[3]: *** [kernel/locking] Error 2
make[2]: *** [kernel] Error 2
[...]

- Sedat -
From b953be255bfb46970c75950e297be836577bc525 Mon Sep 17 00:00:00 2001
From: Sedat Dilek 
Date: Mon, 16 May 2016 15:51:04 +0200
Subject: [PATCH] locking/lockdep: Some more additional chain_key collision
 information

From: Peter Zijlstra 

For more details see thread "[v4.6-rc7-183-g1410b74e4061]" at LKML [1].

Patch for testing from Peter Zijlstra see [2] and [3].

[1] http://marc.info/?t=14632178432=1=2
[2] http://marc.info/?l=linux-kernel=146339587506110=2
[3] https://lkml.org/lkml/2016/5/10/214

Cc: Wanpeng Li 
Cc: Alfredo Alvarez Fernandez 
Cc: Peter Zijlstra (Intel) 
Cc: Linus Torvalds 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Sedat Dilek 
Cc: Ted Tso 
Cc: LKML 
Cc: the arch/x86 maintainers 
Cc: linux-fsde...@vger.kernel.org
---
 kernel/locking/lockdep.c | 31 +++
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 78c1c0ee6dc1..5dc21eb101b0 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2003,13 +2003,14 @@ static inline int get_first_held_lock(struct task_struct *curr,
 /*
  * Returns the next chain_key iteration
  */
-static u64 print_chain_key_iteration(int class_idx, u64 chain_key)
+static u64 print_chain_key_iteration(int class_idx, u64 chain_key, u64 prev_key)
 {
 	u64 new_chain_key = iterate_chain_key(chain_key, class_idx);
 
-	printk(" class_idx:%d -> chain_key:%016Lx",
+	printk(" class_idx: %d -> chain_key: %016Lx (%016Lx)",
 		class_idx,
-		(unsigned long long)new_chain_key);
+		(unsigned long long)new_chain_key,
+		(unsigned long 

<    4   5   6   7   8   9   10   11   12   13   >