[GIT PULL] SMP bootup printout changes for v4.10

2016-12-11 Thread Ingo Molnar
Linus,

Please pull the latest core-smp-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core-smp-for-linus

   # HEAD: 5dce2509506d16efd321939895ff7ffe1dc2 kernel/smp: Tell the user 
we're bringing up secondary CPUs

Three changes to unify/standardize some of the bootup message printing in 
kernel/smp.c between architectures.

 Thanks,

Ingo

-->
Michael Ellerman (3):
  kernel/smp: Define pr_fmt() for smp.c
  kernel/smp: Make the SMP boot message common on all arches
  kernel/smp: Tell the user we're bringing up secondary CPUs


 arch/x86/kernel/smpboot.c |  8 
 kernel/smp.c  | 18 --
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 42f5eb7b4f6c..b9f02383f372 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -821,14 +821,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned 
long start_eip)
return (send_status | accept_status);
 }
 
-void smp_announce(void)
-{
-   int num_nodes = num_online_nodes();
-
-   printk(KERN_INFO "x86: Booted up %d node%s, %d CPUs\n",
-  num_nodes, (num_nodes > 1 ? "s" : ""), num_online_cpus());
-}
-
 /* reduce the number of lines printed when booting a large cpu count system */
 static void announce_cpu(int cpu, int apicid)
 {
diff --git a/kernel/smp.c b/kernel/smp.c
index bba3b201668d..77fcdb9f2775 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -3,6 +3,9 @@
  *
  * (C) Jens Axboe  2008
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include 
 #include 
 #include 
@@ -543,19 +546,17 @@ void __init setup_nr_cpu_ids(void)
nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
 }
 
-void __weak smp_announce(void)
-{
-   printk(KERN_INFO "Brought up %d CPUs\n", num_online_cpus());
-}
-
 /* Called by boot processor to activate the rest. */
 void __init smp_init(void)
 {
+   int num_nodes, num_cpus;
unsigned int cpu;
 
idle_threads_init();
cpuhp_threads_init();
 
+   pr_info("Bringing up secondary CPUs ...\n");
+
/* FIXME: This should be done in userspace --RR */
for_each_present_cpu(cpu) {
if (num_online_cpus() >= setup_max_cpus)
@@ -564,8 +565,13 @@ void __init smp_init(void)
cpu_up(cpu);
}
 
+   num_nodes = num_online_nodes();
+   num_cpus  = num_online_cpus();
+   pr_info("Brought up %d node%s, %d CPU%s\n",
+   num_nodes, (num_nodes > 1 ? "s" : ""),
+   num_cpus,  (num_cpus  > 1 ? "s" : ""));
+
/* Any cleanup work */
-   smp_announce();
smp_cpus_done(setup_max_cpus);
 }
 


[GIT PULL] SMP bootup printout changes for v4.10

2016-12-11 Thread Ingo Molnar
Linus,

Please pull the latest core-smp-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core-smp-for-linus

   # HEAD: 5dce2509506d16efd321939895ff7ffe1dc2 kernel/smp: Tell the user 
we're bringing up secondary CPUs

Three changes to unify/standardize some of the bootup message printing in 
kernel/smp.c between architectures.

 Thanks,

Ingo

-->
Michael Ellerman (3):
  kernel/smp: Define pr_fmt() for smp.c
  kernel/smp: Make the SMP boot message common on all arches
  kernel/smp: Tell the user we're bringing up secondary CPUs


 arch/x86/kernel/smpboot.c |  8 
 kernel/smp.c  | 18 --
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 42f5eb7b4f6c..b9f02383f372 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -821,14 +821,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned 
long start_eip)
return (send_status | accept_status);
 }
 
-void smp_announce(void)
-{
-   int num_nodes = num_online_nodes();
-
-   printk(KERN_INFO "x86: Booted up %d node%s, %d CPUs\n",
-  num_nodes, (num_nodes > 1 ? "s" : ""), num_online_cpus());
-}
-
 /* reduce the number of lines printed when booting a large cpu count system */
 static void announce_cpu(int cpu, int apicid)
 {
diff --git a/kernel/smp.c b/kernel/smp.c
index bba3b201668d..77fcdb9f2775 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -3,6 +3,9 @@
  *
  * (C) Jens Axboe  2008
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include 
 #include 
 #include 
@@ -543,19 +546,17 @@ void __init setup_nr_cpu_ids(void)
nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
 }
 
-void __weak smp_announce(void)
-{
-   printk(KERN_INFO "Brought up %d CPUs\n", num_online_cpus());
-}
-
 /* Called by boot processor to activate the rest. */
 void __init smp_init(void)
 {
+   int num_nodes, num_cpus;
unsigned int cpu;
 
idle_threads_init();
cpuhp_threads_init();
 
+   pr_info("Bringing up secondary CPUs ...\n");
+
/* FIXME: This should be done in userspace --RR */
for_each_present_cpu(cpu) {
if (num_online_cpus() >= setup_max_cpus)
@@ -564,8 +565,13 @@ void __init smp_init(void)
cpu_up(cpu);
}
 
+   num_nodes = num_online_nodes();
+   num_cpus  = num_online_cpus();
+   pr_info("Brought up %d node%s, %d CPU%s\n",
+   num_nodes, (num_nodes > 1 ? "s" : ""),
+   num_cpus,  (num_cpus  > 1 ? "s" : ""));
+
/* Any cleanup work */
-   smp_announce();
smp_cpus_done(setup_max_cpus);
 }
 


Re: [PATCH v6 0/3] spi-nor: Add support for Intel SPI serial flash controller

2016-12-11 Thread Lee Jones
On Fri, 09 Dec 2016, Marek Vasut wrote:

> On 12/09/2016 01:25 PM, Mika Westerberg wrote:
> > On Fri, Dec 09, 2016 at 08:57:53AM +, Lee Jones wrote:
> >> On Wed, 07 Dec 2016, Marek Vasut wrote:
> >>
> >>> On 12/07/2016 09:53 AM, Mika Westerberg wrote:
>  On Tue, Dec 06, 2016 at 09:45:25AM +, Lee Jones wrote:
> > I'm happy either way.  However if you take them, I will require a
> > pull-request to an immutable branch containing only these patches.
> >
> > If I take them, it won't be until v4.11, since we are half way though
> > -rc8 already and I would like them to soak in -next for at least a
> > couple of weeks.
> 
>  This series already missed v4.8 and v4.9 so I don't think there is a
>  rush to get it for v4.10 either ;-) I'm fine if it goes for v4.11.
> >>>
> >>> Hmmm, that kinda sucks. Lee, are you positive this isn't 4.10
> >>> material ?
> >>
> >> The merge-window opens in 2 days.
> >>
> >> As I mentioned, I like patches to reside in -next for at least 1 maybe
> >> 2 RC cycles before merging.  It's far to easy to accept code, then get
> >> bitten if/when it breaks after being merged by Linus.
> > 
> > I agree. Better to give it some time in linux-next.
> > 
> > Can you queue this series for v4.11?
> 
> OK, I'm fine with this.

Yes, no problem.

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog


Re: [PATCH v6 0/3] spi-nor: Add support for Intel SPI serial flash controller

2016-12-11 Thread Lee Jones
On Fri, 09 Dec 2016, Marek Vasut wrote:

> On 12/09/2016 01:25 PM, Mika Westerberg wrote:
> > On Fri, Dec 09, 2016 at 08:57:53AM +, Lee Jones wrote:
> >> On Wed, 07 Dec 2016, Marek Vasut wrote:
> >>
> >>> On 12/07/2016 09:53 AM, Mika Westerberg wrote:
>  On Tue, Dec 06, 2016 at 09:45:25AM +, Lee Jones wrote:
> > I'm happy either way.  However if you take them, I will require a
> > pull-request to an immutable branch containing only these patches.
> >
> > If I take them, it won't be until v4.11, since we are half way though
> > -rc8 already and I would like them to soak in -next for at least a
> > couple of weeks.
> 
>  This series already missed v4.8 and v4.9 so I don't think there is a
>  rush to get it for v4.10 either ;-) I'm fine if it goes for v4.11.
> >>>
> >>> Hmmm, that kinda sucks. Lee, are you positive this isn't 4.10
> >>> material ?
> >>
> >> The merge-window opens in 2 days.
> >>
> >> As I mentioned, I like patches to reside in -next for at least 1 maybe
> >> 2 RC cycles before merging.  It's far to easy to accept code, then get
> >> bitten if/when it breaks after being merged by Linus.
> > 
> > I agree. Better to give it some time in linux-next.
> > 
> > Can you queue this series for v4.11?
> 
> OK, I'm fine with this.

Yes, no problem.

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog


[PATCH] drivers: media: i2c: constify v4l2_subdev_* structures

2016-12-11 Thread Bhumika Goyal
v4l2_subdev_{core/pad/video}_ops structures are stored in the
fields of the v4l2_subdev_ops structure which are of type const.
Also, v4l2_subdev_ops structure is passed to a function
having its argument of type const. As these structures are never
modified, so declare them as const.
Done using Coccinelle: (One of the scripts used)

@r1 disable optional_qualifier @
identifier i;
position p;
@@
static struct v4l2_subdev_video_ops i@p = {...};

@ok1@
identifier r1.i;
position p;
struct v4l2_subdev_ops obj;
@@
obj.video=@p;

@bad@
position p!={r1.p,ok1.p};
identifier r1.i;
@@
i@p

@depends on !bad disable optional_qualifier@
identifier r1.i;
@@
+const
struct v4l2_subdev_video_ops i;

File sizes before:
  text data bss dec hex filename
   7810 736  1685622172 drivers/media/i2c/mt9p031.o
   9652 736  24   1041228ac drivers/media/i2c/mt9v032.o
   4613 552  2051851441 drivers/media/i2c/noon010pc30.o
   2615 552   83175 c67 drivers/media/i2c/s5k6a3.o

File sizes after:
  text data bss dec hex filename
   8322 232  168570217a drivers/media/i2c/mt9p031.o
  10164 232  24   1042028b4 drivers/media/i2c/mt9v032.o
   4933 232  2051851441 drivers/media/i2c/noon010pc30.o
   2935 232   83175 c67 drivers/media/i2c/s5k6a3.o

Signed-off-by: Bhumika Goyal 
---
 drivers/media/i2c/mt9p031.c | 8 
 drivers/media/i2c/mt9v032.c | 8 
 drivers/media/i2c/noon010pc30.c | 4 ++--
 drivers/media/i2c/s5k6a3.c  | 6 +++---
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/media/i2c/mt9p031.c b/drivers/media/i2c/mt9p031.c
index 237737f..91d822f 100644
--- a/drivers/media/i2c/mt9p031.c
+++ b/drivers/media/i2c/mt9p031.c
@@ -972,15 +972,15 @@ static int mt9p031_close(struct v4l2_subdev *subdev, 
struct v4l2_subdev_fh *fh)
return mt9p031_set_power(subdev, 0);
 }
 
-static struct v4l2_subdev_core_ops mt9p031_subdev_core_ops = {
+static const struct v4l2_subdev_core_ops mt9p031_subdev_core_ops = {
.s_power= mt9p031_set_power,
 };
 
-static struct v4l2_subdev_video_ops mt9p031_subdev_video_ops = {
+static const struct v4l2_subdev_video_ops mt9p031_subdev_video_ops = {
.s_stream   = mt9p031_s_stream,
 };
 
-static struct v4l2_subdev_pad_ops mt9p031_subdev_pad_ops = {
+static const struct v4l2_subdev_pad_ops mt9p031_subdev_pad_ops = {
.enum_mbus_code = mt9p031_enum_mbus_code,
.enum_frame_size = mt9p031_enum_frame_size,
.get_fmt = mt9p031_get_format,
@@ -989,7 +989,7 @@ static int mt9p031_close(struct v4l2_subdev *subdev, struct 
v4l2_subdev_fh *fh)
.set_selection = mt9p031_set_selection,
 };
 
-static struct v4l2_subdev_ops mt9p031_subdev_ops = {
+static const struct v4l2_subdev_ops mt9p031_subdev_ops = {
.core   = _subdev_core_ops,
.video  = _subdev_video_ops,
.pad= _subdev_pad_ops,
diff --git a/drivers/media/i2c/mt9v032.c b/drivers/media/i2c/mt9v032.c
index 58eb62f..88b7890 100644
--- a/drivers/media/i2c/mt9v032.c
+++ b/drivers/media/i2c/mt9v032.c
@@ -936,15 +936,15 @@ static int mt9v032_close(struct v4l2_subdev *subdev, 
struct v4l2_subdev_fh *fh)
return mt9v032_set_power(subdev, 0);
 }
 
-static struct v4l2_subdev_core_ops mt9v032_subdev_core_ops = {
+static const struct v4l2_subdev_core_ops mt9v032_subdev_core_ops = {
.s_power= mt9v032_set_power,
 };
 
-static struct v4l2_subdev_video_ops mt9v032_subdev_video_ops = {
+static const struct v4l2_subdev_video_ops mt9v032_subdev_video_ops = {
.s_stream   = mt9v032_s_stream,
 };
 
-static struct v4l2_subdev_pad_ops mt9v032_subdev_pad_ops = {
+static const struct v4l2_subdev_pad_ops mt9v032_subdev_pad_ops = {
.enum_mbus_code = mt9v032_enum_mbus_code,
.enum_frame_size = mt9v032_enum_frame_size,
.get_fmt = mt9v032_get_format,
@@ -953,7 +953,7 @@ static int mt9v032_close(struct v4l2_subdev *subdev, struct 
v4l2_subdev_fh *fh)
.set_selection = mt9v032_set_selection,
 };
 
-static struct v4l2_subdev_ops mt9v032_subdev_ops = {
+static const struct v4l2_subdev_ops mt9v032_subdev_ops = {
.core   = _subdev_core_ops,
.video  = _subdev_video_ops,
.pad= _subdev_pad_ops,
diff --git a/drivers/media/i2c/noon010pc30.c b/drivers/media/i2c/noon010pc30.c
index 30cb90b..88c498a 100644
--- a/drivers/media/i2c/noon010pc30.c
+++ b/drivers/media/i2c/noon010pc30.c
@@ -664,13 +664,13 @@ static int noon010_open(struct v4l2_subdev *sd, struct 
v4l2_subdev_fh *fh)
.log_status = noon010_log_status,
 };
 
-static struct v4l2_subdev_pad_ops noon010_pad_ops = {
+static const struct v4l2_subdev_pad_ops noon010_pad_ops = {
.enum_mbus_code = noon010_enum_mbus_code,
.get_fmt= noon010_get_fmt,
.set_fmt= noon010_set_fmt,
 };
 
-static struct v4l2_subdev_video_ops noon010_video_ops = {

[PATCH] drivers: media: i2c: constify v4l2_subdev_* structures

2016-12-11 Thread Bhumika Goyal
v4l2_subdev_{core/pad/video}_ops structures are stored in the
fields of the v4l2_subdev_ops structure which are of type const.
Also, v4l2_subdev_ops structure is passed to a function
having its argument of type const. As these structures are never
modified, so declare them as const.
Done using Coccinelle: (One of the scripts used)

@r1 disable optional_qualifier @
identifier i;
position p;
@@
static struct v4l2_subdev_video_ops i@p = {...};

@ok1@
identifier r1.i;
position p;
struct v4l2_subdev_ops obj;
@@
obj.video=@p;

@bad@
position p!={r1.p,ok1.p};
identifier r1.i;
@@
i@p

@depends on !bad disable optional_qualifier@
identifier r1.i;
@@
+const
struct v4l2_subdev_video_ops i;

File sizes before:
  text data bss dec hex filename
   7810 736  1685622172 drivers/media/i2c/mt9p031.o
   9652 736  24   1041228ac drivers/media/i2c/mt9v032.o
   4613 552  2051851441 drivers/media/i2c/noon010pc30.o
   2615 552   83175 c67 drivers/media/i2c/s5k6a3.o

File sizes after:
  text data bss dec hex filename
   8322 232  168570217a drivers/media/i2c/mt9p031.o
  10164 232  24   1042028b4 drivers/media/i2c/mt9v032.o
   4933 232  2051851441 drivers/media/i2c/noon010pc30.o
   2935 232   83175 c67 drivers/media/i2c/s5k6a3.o

Signed-off-by: Bhumika Goyal 
---
 drivers/media/i2c/mt9p031.c | 8 
 drivers/media/i2c/mt9v032.c | 8 
 drivers/media/i2c/noon010pc30.c | 4 ++--
 drivers/media/i2c/s5k6a3.c  | 6 +++---
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/media/i2c/mt9p031.c b/drivers/media/i2c/mt9p031.c
index 237737f..91d822f 100644
--- a/drivers/media/i2c/mt9p031.c
+++ b/drivers/media/i2c/mt9p031.c
@@ -972,15 +972,15 @@ static int mt9p031_close(struct v4l2_subdev *subdev, 
struct v4l2_subdev_fh *fh)
return mt9p031_set_power(subdev, 0);
 }
 
-static struct v4l2_subdev_core_ops mt9p031_subdev_core_ops = {
+static const struct v4l2_subdev_core_ops mt9p031_subdev_core_ops = {
.s_power= mt9p031_set_power,
 };
 
-static struct v4l2_subdev_video_ops mt9p031_subdev_video_ops = {
+static const struct v4l2_subdev_video_ops mt9p031_subdev_video_ops = {
.s_stream   = mt9p031_s_stream,
 };
 
-static struct v4l2_subdev_pad_ops mt9p031_subdev_pad_ops = {
+static const struct v4l2_subdev_pad_ops mt9p031_subdev_pad_ops = {
.enum_mbus_code = mt9p031_enum_mbus_code,
.enum_frame_size = mt9p031_enum_frame_size,
.get_fmt = mt9p031_get_format,
@@ -989,7 +989,7 @@ static int mt9p031_close(struct v4l2_subdev *subdev, struct 
v4l2_subdev_fh *fh)
.set_selection = mt9p031_set_selection,
 };
 
-static struct v4l2_subdev_ops mt9p031_subdev_ops = {
+static const struct v4l2_subdev_ops mt9p031_subdev_ops = {
.core   = _subdev_core_ops,
.video  = _subdev_video_ops,
.pad= _subdev_pad_ops,
diff --git a/drivers/media/i2c/mt9v032.c b/drivers/media/i2c/mt9v032.c
index 58eb62f..88b7890 100644
--- a/drivers/media/i2c/mt9v032.c
+++ b/drivers/media/i2c/mt9v032.c
@@ -936,15 +936,15 @@ static int mt9v032_close(struct v4l2_subdev *subdev, 
struct v4l2_subdev_fh *fh)
return mt9v032_set_power(subdev, 0);
 }
 
-static struct v4l2_subdev_core_ops mt9v032_subdev_core_ops = {
+static const struct v4l2_subdev_core_ops mt9v032_subdev_core_ops = {
.s_power= mt9v032_set_power,
 };
 
-static struct v4l2_subdev_video_ops mt9v032_subdev_video_ops = {
+static const struct v4l2_subdev_video_ops mt9v032_subdev_video_ops = {
.s_stream   = mt9v032_s_stream,
 };
 
-static struct v4l2_subdev_pad_ops mt9v032_subdev_pad_ops = {
+static const struct v4l2_subdev_pad_ops mt9v032_subdev_pad_ops = {
.enum_mbus_code = mt9v032_enum_mbus_code,
.enum_frame_size = mt9v032_enum_frame_size,
.get_fmt = mt9v032_get_format,
@@ -953,7 +953,7 @@ static int mt9v032_close(struct v4l2_subdev *subdev, struct 
v4l2_subdev_fh *fh)
.set_selection = mt9v032_set_selection,
 };
 
-static struct v4l2_subdev_ops mt9v032_subdev_ops = {
+static const struct v4l2_subdev_ops mt9v032_subdev_ops = {
.core   = _subdev_core_ops,
.video  = _subdev_video_ops,
.pad= _subdev_pad_ops,
diff --git a/drivers/media/i2c/noon010pc30.c b/drivers/media/i2c/noon010pc30.c
index 30cb90b..88c498a 100644
--- a/drivers/media/i2c/noon010pc30.c
+++ b/drivers/media/i2c/noon010pc30.c
@@ -664,13 +664,13 @@ static int noon010_open(struct v4l2_subdev *sd, struct 
v4l2_subdev_fh *fh)
.log_status = noon010_log_status,
 };
 
-static struct v4l2_subdev_pad_ops noon010_pad_ops = {
+static const struct v4l2_subdev_pad_ops noon010_pad_ops = {
.enum_mbus_code = noon010_enum_mbus_code,
.get_fmt= noon010_get_fmt,
.set_fmt= noon010_set_fmt,
 };
 
-static struct v4l2_subdev_video_ops noon010_video_ops = {
+static const 

Re: [PATCH v6 0/8] Add PWM and IIO timer drivers for STM32

2016-12-11 Thread Lee Jones
On Fri, 09 Dec 2016, Benjamin Gaignard wrote:

> version 6:
> - rename stm32-gptimer in stm32-timers.
> - change "st,stm32-gptimer" compatible to "st,stm32-timers".
> - modify "st,breakinput" parameter in pwm part.
> - split DT patch in 2
> 
> version 5:
> - fix comments done on version 4
> - rebased on kernel 4.9-rc8
> - change nodes names and re-order then by addresses
> 
> version 4:
> - fix comments done on version 3
> - don't use interrupts anymore in IIO timer
> - detect hardware capabilities at probe time to simplify binding
> 
> version 3:
> - no change on mfd and pwm divers patches
> - add cross reference between bindings
> - change compatible to "st,stm32-timer-trigger"
> - fix attributes access rights
> - use string instead of int for master_mode and slave_mode
> - document device attributes in sysfs-bus-iio-timer-stm32
> - update DT with the new compatible
> 
> version 2:
> - keep only one compatible per driver
> - use DT parameters to describe hardware block configuration:
>   - pwm channels, complementary output, counter size, break input
>   - triggers accepted and create by IIO timers
> - change DT to limite use of reference to the node
> - interrupt is now in IIO timer driver
> - rename stm32-mfd-timer to stm32-timers (for general purpose timer)
> 
> The following patches enable PWM and IIO Timer features for STM32 platforms.
> 
> Those two features are mixed into the registers of the same hardware block
> (named general purpose timer) which lead to introduce a multifunctions driver 
> on the top of them to be able to share the registers.
> 
> In STM32f4 14 instances of timer hardware block exist, even if they all have
> the same register mapping they could have a different number of pwm channels
> and/or different triggers capabilities. We use various parameters in DT to 
> describe the differences between hardware blocks
> 
> The MFD (stm32-timers.c) takes care of clock and register mapping
> by using regmap. stm32_timers structure is provided to its sub-node to
> share those information.
> 
> PWM driver is implemented into pwm-stm32.c. Depending of the instance we may
> have up to 4 channels, sometime with complementary outputs or 32 bits counter
> instead of 16 bits. Some hardware blocks may also have a break input function
> which allows to stop pwm depending of a level, defined in devicetree, on an
> external pin.
> 
> IIO timer driver (stm32-timer-trigger.c and stm32-timer-trigger.h) define a 
> list
> of hardware triggers usable by hardware blocks like ADC, DAC or other timers. 
> 
> The matrix of possible connections between blocks is quite complex so we use 
> trigger names and is_stm32_iio_timer_trigger() function to be sure that
> triggers are valid and configure the IPs.
> 
> At run time IIO timer hardware blocks can configure (through "master_mode" 
> IIO device attribute) which internal signal (counter enable, reset,
> comparison block, etc...) is used to generate the trigger.
> 
> By using "slave_mode" IIO device attribute timer can also configure on which
> event (level, rising edge) of the block is enabled.
> 
> Since we can use trigger from one hardware to control an other block, we can
> use a pwm to control an other one. The following example shows how to 
> configure
> pwm1 and pwm3 to make pwm3 generate pulse only when pwm1 pulse level is high.
> 
> /sys/bus/iio/devices # ls
> iio:device0  iio:device1  trigger0 trigger1
> 
> configure timer1 to use pwm1 channel 0 as output trigger
> /sys/bus/iio/devices # echo 'OC1REF' > iio\:device0/master_mode
> configure timer3 to enable only when input is high
> /sys/bus/iio/devices # echo 'gated' > iio\:device1/slave_mode
> /sys/bus/iio/devices # cat trigger0/name
> tim1_trgo
> configure timer2 to use timer1 trigger is input
> /sys/bus/iio/devices # echo "tim1_trgo" > iio\:device1/trigger/current_trigger
> 
> configure pwm3 channel 0 to generate a signal with a period of 100ms and a
> duty cycle of 50%
> /sys/devices/platform/soc/4400.timers/4400.timers:pwm/pwm/pwmchip4 # 
> echo 0 > export
> /sys/devices/platform/soc/4400.timers/4400.timers:pwm/pwm/pwmchip4 # 
> echo 1 > pwm0/period
> /sys/devices/platform/soc/4400.timers/4400.timers:pwm/pwm/pwmchip4 # 
> echo 5000 > pwm0/duty_cycle
> /sys/devices/platform/soc/4400.timers/4400.timers:pwm/pwm/pwmchip4 # 
> echo 1 > pwm0/enable
> here pwm3 channel 0, as expected, doesn't start because has to be triggered by
> pwm1 channel 0
> 
> configure pwm1 channel 0 to generate a signal with a period of 1s and a
> duty cycle of 50%
> /sys/devices/platform/soc/4001.timers/4001.timers:pwm/pwm/pwmchip0 # 
> echo 0 > export
> /sys/devices/platform/soc/4001.timers/4001.timers:pwm/pwm/pwmchip0 # 
> echo 10 > pwm0/period
> /sys/devices/platform/soc/4001.timers/4001.timers:pwm/pwm/pwmchip0 # 
> echo 5 > pwm0/duty_cycle
> /sys/devices/platform/soc/4001.timers/4001.timers:pwm/pwm/pwmchip0 # 
> echo 1 > 

Re: [PATCH v6 0/8] Add PWM and IIO timer drivers for STM32

2016-12-11 Thread Lee Jones
On Fri, 09 Dec 2016, Benjamin Gaignard wrote:

> version 6:
> - rename stm32-gptimer in stm32-timers.
> - change "st,stm32-gptimer" compatible to "st,stm32-timers".
> - modify "st,breakinput" parameter in pwm part.
> - split DT patch in 2
> 
> version 5:
> - fix comments done on version 4
> - rebased on kernel 4.9-rc8
> - change nodes names and re-order then by addresses
> 
> version 4:
> - fix comments done on version 3
> - don't use interrupts anymore in IIO timer
> - detect hardware capabilities at probe time to simplify binding
> 
> version 3:
> - no change on mfd and pwm divers patches
> - add cross reference between bindings
> - change compatible to "st,stm32-timer-trigger"
> - fix attributes access rights
> - use string instead of int for master_mode and slave_mode
> - document device attributes in sysfs-bus-iio-timer-stm32
> - update DT with the new compatible
> 
> version 2:
> - keep only one compatible per driver
> - use DT parameters to describe hardware block configuration:
>   - pwm channels, complementary output, counter size, break input
>   - triggers accepted and create by IIO timers
> - change DT to limite use of reference to the node
> - interrupt is now in IIO timer driver
> - rename stm32-mfd-timer to stm32-timers (for general purpose timer)
> 
> The following patches enable PWM and IIO Timer features for STM32 platforms.
> 
> Those two features are mixed into the registers of the same hardware block
> (named general purpose timer) which lead to introduce a multifunctions driver 
> on the top of them to be able to share the registers.
> 
> In STM32f4 14 instances of timer hardware block exist, even if they all have
> the same register mapping they could have a different number of pwm channels
> and/or different triggers capabilities. We use various parameters in DT to 
> describe the differences between hardware blocks
> 
> The MFD (stm32-timers.c) takes care of clock and register mapping
> by using regmap. stm32_timers structure is provided to its sub-node to
> share those information.
> 
> PWM driver is implemented into pwm-stm32.c. Depending of the instance we may
> have up to 4 channels, sometime with complementary outputs or 32 bits counter
> instead of 16 bits. Some hardware blocks may also have a break input function
> which allows to stop pwm depending of a level, defined in devicetree, on an
> external pin.
> 
> IIO timer driver (stm32-timer-trigger.c and stm32-timer-trigger.h) define a 
> list
> of hardware triggers usable by hardware blocks like ADC, DAC or other timers. 
> 
> The matrix of possible connections between blocks is quite complex so we use 
> trigger names and is_stm32_iio_timer_trigger() function to be sure that
> triggers are valid and configure the IPs.
> 
> At run time IIO timer hardware blocks can configure (through "master_mode" 
> IIO device attribute) which internal signal (counter enable, reset,
> comparison block, etc...) is used to generate the trigger.
> 
> By using "slave_mode" IIO device attribute timer can also configure on which
> event (level, rising edge) of the block is enabled.
> 
> Since we can use trigger from one hardware to control an other block, we can
> use a pwm to control an other one. The following example shows how to 
> configure
> pwm1 and pwm3 to make pwm3 generate pulse only when pwm1 pulse level is high.
> 
> /sys/bus/iio/devices # ls
> iio:device0  iio:device1  trigger0 trigger1
> 
> configure timer1 to use pwm1 channel 0 as output trigger
> /sys/bus/iio/devices # echo 'OC1REF' > iio\:device0/master_mode
> configure timer3 to enable only when input is high
> /sys/bus/iio/devices # echo 'gated' > iio\:device1/slave_mode
> /sys/bus/iio/devices # cat trigger0/name
> tim1_trgo
> configure timer2 to use timer1 trigger is input
> /sys/bus/iio/devices # echo "tim1_trgo" > iio\:device1/trigger/current_trigger
> 
> configure pwm3 channel 0 to generate a signal with a period of 100ms and a
> duty cycle of 50%
> /sys/devices/platform/soc/4400.timers/4400.timers:pwm/pwm/pwmchip4 # 
> echo 0 > export
> /sys/devices/platform/soc/4400.timers/4400.timers:pwm/pwm/pwmchip4 # 
> echo 1 > pwm0/period
> /sys/devices/platform/soc/4400.timers/4400.timers:pwm/pwm/pwmchip4 # 
> echo 5000 > pwm0/duty_cycle
> /sys/devices/platform/soc/4400.timers/4400.timers:pwm/pwm/pwmchip4 # 
> echo 1 > pwm0/enable
> here pwm3 channel 0, as expected, doesn't start because has to be triggered by
> pwm1 channel 0
> 
> configure pwm1 channel 0 to generate a signal with a period of 1s and a
> duty cycle of 50%
> /sys/devices/platform/soc/4001.timers/4001.timers:pwm/pwm/pwmchip0 # 
> echo 0 > export
> /sys/devices/platform/soc/4001.timers/4001.timers:pwm/pwm/pwmchip0 # 
> echo 10 > pwm0/period
> /sys/devices/platform/soc/4001.timers/4001.timers:pwm/pwm/pwmchip0 # 
> echo 5 > pwm0/duty_cycle
> /sys/devices/platform/soc/4001.timers/4001.timers:pwm/pwm/pwmchip0 # 
> echo 1 > 

Re: [PATCH v6 1/8] MFD: add bindings for STM32 Timers driver

2016-12-11 Thread Lee Jones
On Fri, 09 Dec 2016, Benjamin Gaignard wrote:

> Add bindings information for STM32 Timers
> 
> version 6:
> - rename stm32-gtimer to stm32-timers
> - change compatible
> - add description about the IPs
> 
> version 2:
> - rename stm32-mfd-timer to stm32-gptimer
> - only keep one compatible string
> 
> Signed-off-by: Benjamin Gaignard 
> ---
>  .../devicetree/bindings/mfd/stm32-timers.txt   | 46 
> ++
>  1 file changed, 46 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/mfd/stm32-timers.txt

For my own reference:
  Acked-for-MFD-by: Lee Jones 

> diff --git a/Documentation/devicetree/bindings/mfd/stm32-timers.txt 
> b/Documentation/devicetree/bindings/mfd/stm32-timers.txt
> new file mode 100644
> index 000..b30868e
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/mfd/stm32-timers.txt
> @@ -0,0 +1,46 @@
> +STM32 Timers driver bindings
> +
> +This IP provides 3 types of timer along with PWM functionality:
> +- advanced-control timers consist of a 16-bit auto-reload counter driven by 
> a programmable
> +  prescaler, break input feature, PWM outputs and complementary PWM ouputs 
> channels.
> +- general-purpose timers consist of a 16-bit or 32-bit auto-reload counter 
> driven by a
> +  programmable prescaler and PWM outputs.
> +- basic timers consist of a 16-bit auto-reload counter driven by a 
> programmable prescaler.
> +
> +Required parameters:
> +- compatible: must be "st,stm32-timers"
> +
> +- reg:   Physical base address and length of the 
> controller's
> + registers.
> +- clock-names:   Set to "clk_int".
> +- clocks:Phandle to the clock used by the timer module.
> + For Clk properties, please refer to 
> ../clock/clock-bindings.txt
> +
> +Optional parameters:
> +- resets:Phandle to the parent reset controller.
> + See ../reset/st,stm32-rcc.txt
> +
> +Optional subnodes:
> +- pwm:   See ../pwm/pwm-stm32.txt
> +- timer: See ../iio/timer/stm32-timer-trigger.txt
> +
> +Example:
> + timers@4001 {
> + #address-cells = <1>;
> + #size-cells = <0>;
> + compatible = "st,stm32-timers";
> + reg = <0x4001 0x400>;
> + clocks = < 0 160>;
> + clock-names = "clk_int";
> +
> + pwm {
> + compatible = "st,stm32-pwm";
> + pinctrl-0   = <_pins>;
> + pinctrl-names   = "default";
> + };
> +
> + timer {
> + compatible = "st,stm32-timer-trigger";
> + reg = <0>;
> + };
> + };

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog


Re: [PATCH v6 1/8] MFD: add bindings for STM32 Timers driver

2016-12-11 Thread Lee Jones
On Fri, 09 Dec 2016, Benjamin Gaignard wrote:

> Add bindings information for STM32 Timers
> 
> version 6:
> - rename stm32-gtimer to stm32-timers
> - change compatible
> - add description about the IPs
> 
> version 2:
> - rename stm32-mfd-timer to stm32-gptimer
> - only keep one compatible string
> 
> Signed-off-by: Benjamin Gaignard 
> ---
>  .../devicetree/bindings/mfd/stm32-timers.txt   | 46 
> ++
>  1 file changed, 46 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/mfd/stm32-timers.txt

For my own reference:
  Acked-for-MFD-by: Lee Jones 

> diff --git a/Documentation/devicetree/bindings/mfd/stm32-timers.txt 
> b/Documentation/devicetree/bindings/mfd/stm32-timers.txt
> new file mode 100644
> index 000..b30868e
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/mfd/stm32-timers.txt
> @@ -0,0 +1,46 @@
> +STM32 Timers driver bindings
> +
> +This IP provides 3 types of timer along with PWM functionality:
> +- advanced-control timers consist of a 16-bit auto-reload counter driven by 
> a programmable
> +  prescaler, break input feature, PWM outputs and complementary PWM ouputs 
> channels.
> +- general-purpose timers consist of a 16-bit or 32-bit auto-reload counter 
> driven by a
> +  programmable prescaler and PWM outputs.
> +- basic timers consist of a 16-bit auto-reload counter driven by a 
> programmable prescaler.
> +
> +Required parameters:
> +- compatible: must be "st,stm32-timers"
> +
> +- reg:   Physical base address and length of the 
> controller's
> + registers.
> +- clock-names:   Set to "clk_int".
> +- clocks:Phandle to the clock used by the timer module.
> + For Clk properties, please refer to 
> ../clock/clock-bindings.txt
> +
> +Optional parameters:
> +- resets:Phandle to the parent reset controller.
> + See ../reset/st,stm32-rcc.txt
> +
> +Optional subnodes:
> +- pwm:   See ../pwm/pwm-stm32.txt
> +- timer: See ../iio/timer/stm32-timer-trigger.txt
> +
> +Example:
> + timers@4001 {
> + #address-cells = <1>;
> + #size-cells = <0>;
> + compatible = "st,stm32-timers";
> + reg = <0x4001 0x400>;
> + clocks = < 0 160>;
> + clock-names = "clk_int";
> +
> + pwm {
> + compatible = "st,stm32-pwm";
> + pinctrl-0   = <_pins>;
> + pinctrl-names   = "default";
> + };
> +
> + timer {
> + compatible = "st,stm32-timer-trigger";
> + reg = <0>;
> + };
> + };

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog


Re: [PATCH v6 2/8] MFD: add STM32 Timers driver

2016-12-11 Thread Lee Jones
On Fri, 09 Dec 2016, Benjamin Gaignard wrote:

> This hardware block could at used at same time for PWM generation
> and IIO timers.
> PWM and IIO timer configuration are mixed in the same registers
> so we need a multi fonction driver to be able to share those registers.
> 
> version 6:
> - rename files to stm32-timers
> - rename functions to stm32_timers_xxx
> 
> version 5:
> - fix Lee comments about detect function
> - add missing dependency on REGMAP_MMIO
> 
> version 4:
> - add a function to detect Auto Reload Register (ARR) size
> - rename the structure shared with other drivers
> 
> version 2:
> - rename driver "stm32-gptimer" to be align with SoC documentation
> - only keep one compatible
> - use of_platform_populate() instead of devm_mfd_add_devices()
> 
> Signed-off-by: Benjamin Gaignard 
> ---
>  drivers/mfd/Kconfig  | 11 ++
>  drivers/mfd/Makefile |  2 +
>  drivers/mfd/stm32-timers.c   | 80 
> 
>  include/linux/mfd/stm32-timers.h | 71 +++
>  4 files changed, 164 insertions(+)
>  create mode 100644 drivers/mfd/stm32-timers.c
>  create mode 100644 include/linux/mfd/stm32-timers.h

For my own reference:
  Acked-for-MFD-by: Lee Jones 

> diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
> index c6df644..4ec1906 100644
> --- a/drivers/mfd/Kconfig
> +++ b/drivers/mfd/Kconfig
> @@ -1607,6 +1607,17 @@ config MFD_STW481X
> in various ST Microelectronics and ST-Ericsson embedded
> Nomadik series.
>  
> +config MFD_STM32_TIMERS
> + tristate "Support for STM32 Timers"
> + depends on (ARCH_STM32 && OF) || COMPILE_TEST
> + select MFD_CORE
> + select REGMAP
> + select REGMAP_MMIO
> + help
> +   Select this option to enable STM32 timers driver used
> +   for PWM and IIO Timer. This driver allow to share the
> +   registers between the others drivers.
> +
>  menu "Multimedia Capabilities Port drivers"
>   depends on ARCH_SA1100
>  
> diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
> index 9834e66..11a52f8 100644
> --- a/drivers/mfd/Makefile
> +++ b/drivers/mfd/Makefile
> @@ -211,3 +211,5 @@ obj-$(CONFIG_INTEL_SOC_PMIC)  += intel-soc-pmic.o
>  obj-$(CONFIG_MFD_MT6397) += mt6397-core.o
>  
>  obj-$(CONFIG_MFD_ALTERA_A10SR)   += altera-a10sr.o
> +
> +obj-$(CONFIG_MFD_STM32_TIMERS)   += stm32-timers.o
> diff --git a/drivers/mfd/stm32-timers.c b/drivers/mfd/stm32-timers.c
> new file mode 100644
> index 000..68d115e
> --- /dev/null
> +++ b/drivers/mfd/stm32-timers.c
> @@ -0,0 +1,80 @@
> +/*
> + * Copyright (C) STMicroelectronics 2016
> + *
> + * Author: Benjamin Gaignard 
> + *
> + * License terms:  GNU General Public License (GPL), version 2
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +static const struct regmap_config stm32_timers_regmap_cfg = {
> + .reg_bits = 32,
> + .val_bits = 32,
> + .reg_stride = sizeof(u32),
> + .max_register = 0x400,
> +};
> +
> +static void stm32_timers_get_arr_size(struct stm32_timers *ddata)
> +{
> + /*
> +  * Only the available bits will be written so when readback
> +  * we get the maximum value of auto reload register
> +  */
> + regmap_write(ddata->regmap, TIM_ARR, ~0L);
> + regmap_read(ddata->regmap, TIM_ARR, >max_arr);
> + regmap_write(ddata->regmap, TIM_ARR, 0x0);
> +}
> +
> +static int stm32_timers_probe(struct platform_device *pdev)
> +{
> + struct device *dev = >dev;
> + struct stm32_timers *ddata;
> + struct resource *res;
> + void __iomem *mmio;
> +
> + ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL);
> + if (!ddata)
> + return -ENOMEM;
> +
> + res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> + mmio = devm_ioremap_resource(dev, res);
> + if (IS_ERR(mmio))
> + return PTR_ERR(mmio);
> +
> + ddata->regmap = devm_regmap_init_mmio_clk(dev, "clk_int", mmio,
> +   _timers_regmap_cfg);
> + if (IS_ERR(ddata->regmap))
> + return PTR_ERR(ddata->regmap);
> +
> + ddata->clk = devm_clk_get(dev, NULL);
> + if (IS_ERR(ddata->clk))
> + return PTR_ERR(ddata->clk);
> +
> + stm32_timers_get_arr_size(ddata);
> +
> + platform_set_drvdata(pdev, ddata);
> +
> + return of_platform_populate(pdev->dev.of_node, NULL, NULL, >dev);
> +}
> +
> +static const struct of_device_id stm32_timers_of_match[] = {
> + { .compatible = "st,stm32-timers", },
> + { /* end node */ },
> +};
> +MODULE_DEVICE_TABLE(of, stm32_timers_of_match);
> +
> +static struct platform_driver stm32_timers_driver = {
> + .probe = stm32_timers_probe,
> + .driver = {
> + .name = "stm32-timers",
> + .of_match_table = stm32_timers_of_match,
> + },
> +};
> +module_platform_driver(stm32_timers_driver);
> 

Re: [PATCH v6 2/8] MFD: add STM32 Timers driver

2016-12-11 Thread Lee Jones
On Fri, 09 Dec 2016, Benjamin Gaignard wrote:

> This hardware block could at used at same time for PWM generation
> and IIO timers.
> PWM and IIO timer configuration are mixed in the same registers
> so we need a multi fonction driver to be able to share those registers.
> 
> version 6:
> - rename files to stm32-timers
> - rename functions to stm32_timers_xxx
> 
> version 5:
> - fix Lee comments about detect function
> - add missing dependency on REGMAP_MMIO
> 
> version 4:
> - add a function to detect Auto Reload Register (ARR) size
> - rename the structure shared with other drivers
> 
> version 2:
> - rename driver "stm32-gptimer" to be align with SoC documentation
> - only keep one compatible
> - use of_platform_populate() instead of devm_mfd_add_devices()
> 
> Signed-off-by: Benjamin Gaignard 
> ---
>  drivers/mfd/Kconfig  | 11 ++
>  drivers/mfd/Makefile |  2 +
>  drivers/mfd/stm32-timers.c   | 80 
> 
>  include/linux/mfd/stm32-timers.h | 71 +++
>  4 files changed, 164 insertions(+)
>  create mode 100644 drivers/mfd/stm32-timers.c
>  create mode 100644 include/linux/mfd/stm32-timers.h

For my own reference:
  Acked-for-MFD-by: Lee Jones 

> diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
> index c6df644..4ec1906 100644
> --- a/drivers/mfd/Kconfig
> +++ b/drivers/mfd/Kconfig
> @@ -1607,6 +1607,17 @@ config MFD_STW481X
> in various ST Microelectronics and ST-Ericsson embedded
> Nomadik series.
>  
> +config MFD_STM32_TIMERS
> + tristate "Support for STM32 Timers"
> + depends on (ARCH_STM32 && OF) || COMPILE_TEST
> + select MFD_CORE
> + select REGMAP
> + select REGMAP_MMIO
> + help
> +   Select this option to enable STM32 timers driver used
> +   for PWM and IIO Timer. This driver allow to share the
> +   registers between the others drivers.
> +
>  menu "Multimedia Capabilities Port drivers"
>   depends on ARCH_SA1100
>  
> diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
> index 9834e66..11a52f8 100644
> --- a/drivers/mfd/Makefile
> +++ b/drivers/mfd/Makefile
> @@ -211,3 +211,5 @@ obj-$(CONFIG_INTEL_SOC_PMIC)  += intel-soc-pmic.o
>  obj-$(CONFIG_MFD_MT6397) += mt6397-core.o
>  
>  obj-$(CONFIG_MFD_ALTERA_A10SR)   += altera-a10sr.o
> +
> +obj-$(CONFIG_MFD_STM32_TIMERS)   += stm32-timers.o
> diff --git a/drivers/mfd/stm32-timers.c b/drivers/mfd/stm32-timers.c
> new file mode 100644
> index 000..68d115e
> --- /dev/null
> +++ b/drivers/mfd/stm32-timers.c
> @@ -0,0 +1,80 @@
> +/*
> + * Copyright (C) STMicroelectronics 2016
> + *
> + * Author: Benjamin Gaignard 
> + *
> + * License terms:  GNU General Public License (GPL), version 2
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +static const struct regmap_config stm32_timers_regmap_cfg = {
> + .reg_bits = 32,
> + .val_bits = 32,
> + .reg_stride = sizeof(u32),
> + .max_register = 0x400,
> +};
> +
> +static void stm32_timers_get_arr_size(struct stm32_timers *ddata)
> +{
> + /*
> +  * Only the available bits will be written so when readback
> +  * we get the maximum value of auto reload register
> +  */
> + regmap_write(ddata->regmap, TIM_ARR, ~0L);
> + regmap_read(ddata->regmap, TIM_ARR, >max_arr);
> + regmap_write(ddata->regmap, TIM_ARR, 0x0);
> +}
> +
> +static int stm32_timers_probe(struct platform_device *pdev)
> +{
> + struct device *dev = >dev;
> + struct stm32_timers *ddata;
> + struct resource *res;
> + void __iomem *mmio;
> +
> + ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL);
> + if (!ddata)
> + return -ENOMEM;
> +
> + res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> + mmio = devm_ioremap_resource(dev, res);
> + if (IS_ERR(mmio))
> + return PTR_ERR(mmio);
> +
> + ddata->regmap = devm_regmap_init_mmio_clk(dev, "clk_int", mmio,
> +   _timers_regmap_cfg);
> + if (IS_ERR(ddata->regmap))
> + return PTR_ERR(ddata->regmap);
> +
> + ddata->clk = devm_clk_get(dev, NULL);
> + if (IS_ERR(ddata->clk))
> + return PTR_ERR(ddata->clk);
> +
> + stm32_timers_get_arr_size(ddata);
> +
> + platform_set_drvdata(pdev, ddata);
> +
> + return of_platform_populate(pdev->dev.of_node, NULL, NULL, >dev);
> +}
> +
> +static const struct of_device_id stm32_timers_of_match[] = {
> + { .compatible = "st,stm32-timers", },
> + { /* end node */ },
> +};
> +MODULE_DEVICE_TABLE(of, stm32_timers_of_match);
> +
> +static struct platform_driver stm32_timers_driver = {
> + .probe = stm32_timers_probe,
> + .driver = {
> + .name = "stm32-timers",
> + .of_match_table = stm32_timers_of_match,
> + },
> +};
> +module_platform_driver(stm32_timers_driver);
> +
> +MODULE_DESCRIPTION("STMicroelectronics STM32 Timers");
> 

Re: [PATCH v8 3/3] iio: adc: add support for Allwinner SoCs ADC

2016-12-11 Thread Quentin Schulz
Hi Maxime,

On 10/12/2016 10:44, Maxime Ripard wrote:
> Hi,
> 
> Just some minor comments.
> 
> On Fri, Dec 09, 2016 at 11:22:36AM +0100, Quentin Schulz wrote:
>> +/*
>> + * Since the thermal sensor needs the IP to be in touchscreen mode and
>> + * there is no register to know if the IP has finished its transition
>> + * between the two modes, a delay is required when switching modes. This
>> + * slows down ADC readings while the latter are critical data to the
> 
> The latter between what and what?
> 
>> + * user. Disabling CONFIG_THERMAL_OF in kernel configuration allows the
>> + * user to avoid registering the thermal sensor (thus unavailable) and
> 
> Isn't it obvious that it's not going to be available if you do not
> register it?
> 
>> + * does not switch between modes thus "quicken" the ADC readings.
>> + * The thermal sensor should be enabled by default since the SoC
>> + * temperature is usually more critical than ADC readings.
> 
> This last sentence should be in the Kconfig help. You cannot expect
> that all your users will read all the source code they want to compile
> :)
> 
> Overall, I think this comment is kind of missing the point, maybe
> something like:
> 
> /*
>  * Since the controller needs to be in touchscreen mode for its
>  * thermal sensor to operate properly, and that switching between the
>  * two modes needs a delay, always registering in the thermal
>  * framework will significantly slow down the conversion rate of the
>  * ADCs.
>  *
>  * Therefore, instead of depending on THERMAL_OF in Kconfig, we only
>  * register the sensor if that option is enabled, eventually leaving
>  * that choice to the user.
>  */
> 
> Would be much clearer.
> 
>> + */
>> +
>> +if (IS_ENABLED(CONFIG_THERMAL_OF)) {
>> +/*
>> + * This driver is a child of an MFD which has a node in the DT but not
>> + * its children. Therefore, the resulting devices of this driver do not
> 
> Wrong indentation for the comment, and saying why the MFD children
> don't have a node in the DT (backward compatibility) would be nice.
> 

Thanks for the comments, that's indeed much clearer.

>> + * have an of_node variable.
>> + * However, its parent (the MFD driver) has an of_node variable and
>> + * since devm_thermal_zone_of_sensor_register uses its first argument to
>> + * match the phandle defined in the node of the thermal driver with the
>> + * of_node of the device passed as first argument and the third argument
>> + * to call ops from thermal_zone_of_device_ops, the solution is to use
>> + * the parent device as first argument to match the phandle with its
>> + * of_node, and the device from this driver as third argument to return
>> + * the temperature.
>> + */
>> +tzd = devm_thermal_zone_of_sensor_register(pdev->dev.parent, 0,
>> +   info,
>> +   _ts_tz_ops);
> 
> I don't think tzd is used anywhere else in your function, it can be
> made local to this block.
> 

ACK.

Thanks,
Quentin

-- 
Quentin Schulz, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com



signature.asc
Description: OpenPGP digital signature


Re: [PATCH v8 3/3] iio: adc: add support for Allwinner SoCs ADC

2016-12-11 Thread Quentin Schulz
Hi Maxime,

On 10/12/2016 10:44, Maxime Ripard wrote:
> Hi,
> 
> Just some minor comments.
> 
> On Fri, Dec 09, 2016 at 11:22:36AM +0100, Quentin Schulz wrote:
>> +/*
>> + * Since the thermal sensor needs the IP to be in touchscreen mode and
>> + * there is no register to know if the IP has finished its transition
>> + * between the two modes, a delay is required when switching modes. This
>> + * slows down ADC readings while the latter are critical data to the
> 
> The latter between what and what?
> 
>> + * user. Disabling CONFIG_THERMAL_OF in kernel configuration allows the
>> + * user to avoid registering the thermal sensor (thus unavailable) and
> 
> Isn't it obvious that it's not going to be available if you do not
> register it?
> 
>> + * does not switch between modes thus "quicken" the ADC readings.
>> + * The thermal sensor should be enabled by default since the SoC
>> + * temperature is usually more critical than ADC readings.
> 
> This last sentence should be in the Kconfig help. You cannot expect
> that all your users will read all the source code they want to compile
> :)
> 
> Overall, I think this comment is kind of missing the point, maybe
> something like:
> 
> /*
>  * Since the controller needs to be in touchscreen mode for its
>  * thermal sensor to operate properly, and that switching between the
>  * two modes needs a delay, always registering in the thermal
>  * framework will significantly slow down the conversion rate of the
>  * ADCs.
>  *
>  * Therefore, instead of depending on THERMAL_OF in Kconfig, we only
>  * register the sensor if that option is enabled, eventually leaving
>  * that choice to the user.
>  */
> 
> Would be much clearer.
> 
>> + */
>> +
>> +if (IS_ENABLED(CONFIG_THERMAL_OF)) {
>> +/*
>> + * This driver is a child of an MFD which has a node in the DT but not
>> + * its children. Therefore, the resulting devices of this driver do not
> 
> Wrong indentation for the comment, and saying why the MFD children
> don't have a node in the DT (backward compatibility) would be nice.
> 

Thanks for the comments, that's indeed much clearer.

>> + * have an of_node variable.
>> + * However, its parent (the MFD driver) has an of_node variable and
>> + * since devm_thermal_zone_of_sensor_register uses its first argument to
>> + * match the phandle defined in the node of the thermal driver with the
>> + * of_node of the device passed as first argument and the third argument
>> + * to call ops from thermal_zone_of_device_ops, the solution is to use
>> + * the parent device as first argument to match the phandle with its
>> + * of_node, and the device from this driver as third argument to return
>> + * the temperature.
>> + */
>> +tzd = devm_thermal_zone_of_sensor_register(pdev->dev.parent, 0,
>> +   info,
>> +   _ts_tz_ops);
> 
> I don't think tzd is used anywhere else in your function, it can be
> made local to this block.
> 

ACK.

Thanks,
Quentin

-- 
Quentin Schulz, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com



signature.asc
Description: OpenPGP digital signature


Re: [media] bt8xx: One function call less in bttv_input_init() after error detection

2016-12-11 Thread Daniele Nicolodi
On 12/12/16 00:33, SF Markus Elfring wrote:
>>> I would prefer a safer coding style for the corresponding
>>> exception handling.
>>
>> Can you please point out what is wrong in the current code
> 
> Is it useful to reconsider the software situation that another memory
> allocation is attempted when it could be determined that a previous one
> failed already?

No.

> Are two successful allocations finally needed to achieve the desired task?

Yes.

>> and how the changes you propose fix the problem?
> 
> I suggest to check return values immediately after each function call.
> An error situation can be detected earlier then and only the required
> clean-up functionality will be executed at the end.

Which improvement does this bring?

>> No one has expressed acceptance for the kind of change you propose with
>> this patch, or to previous patches you proposed changing similar constructs.
> 
> I got a mixed impression from the acceptance statistics about my
> published patches.

Have you proposed a similar patch that was accepted? I don't find record
of it, but I may be wrong.

>> The fact that you propose over and over again a class of changes that
>> has been already vocally rejected would suggest otherwise.
> 
> I dare to propose another look at results from source code search patterns.

Why?

Cheers,
Daniele



Re: [media] bt8xx: One function call less in bttv_input_init() after error detection

2016-12-11 Thread Daniele Nicolodi
On 12/12/16 00:33, SF Markus Elfring wrote:
>>> I would prefer a safer coding style for the corresponding
>>> exception handling.
>>
>> Can you please point out what is wrong in the current code
> 
> Is it useful to reconsider the software situation that another memory
> allocation is attempted when it could be determined that a previous one
> failed already?

No.

> Are two successful allocations finally needed to achieve the desired task?

Yes.

>> and how the changes you propose fix the problem?
> 
> I suggest to check return values immediately after each function call.
> An error situation can be detected earlier then and only the required
> clean-up functionality will be executed at the end.

Which improvement does this bring?

>> No one has expressed acceptance for the kind of change you propose with
>> this patch, or to previous patches you proposed changing similar constructs.
> 
> I got a mixed impression from the acceptance statistics about my
> published patches.

Have you proposed a similar patch that was accepted? I don't find record
of it, but I may be wrong.

>> The fact that you propose over and over again a class of changes that
>> has been already vocally rejected would suggest otherwise.
> 
> I dare to propose another look at results from source code search patterns.

Why?

Cheers,
Daniele



[PATCH RFC] clk: wm831x: fix usleep_range with bad range

2016-12-11 Thread Nicholas Mc Guire
The delay here is not in atomic context and does not seem critical with
respect to precision, but usleep_range(min,max) with min==max results in 
giving the timer subsystem no room to optimize uncritical delays. Fix 
this by setting the range to 2000,3000 us.

Fixes: commit f05259a6ffa4 ("clk: wm831x: Add initial WM831x clock driver")
Signed-off-by: Nicholas Mc Guire 
---

problem was located by coccinelle spatch

The problem is that usleep_range is calculating the delay by
 exp = ktime_add_us(ktime_get(), min)
 delta = (u64)(max - min) * NSEC_PER_USEC
so delta is set to 0
and then calls 
  schedule_hrtimeout_range(exp, 0,...)
effectively this means that the clock subsystem has no room to
optimize and the behavior is no better than using usleep().
As this is not a critical delay it is set to a range of 2 to 3 
milliseconds - this change needs a review by someone that knows
the details of the device though.

Q:It might actually be possible to just use msleep(2) here rather
  than using a hrtimer, at least I do not see what a hrtimer would be
  needed here for - a longer delay e.g. on a HZ100 box should not hurt ?

Patch was only compile tested with: i386_defconfig + CONFIG_X86_INTEL_QUARK=y
CONFIG_MFD_WM831X_I2C=y, COMMON_CLK=y, CONFIG_COMMON_CLK_WM831X=m

Patch is against: 4.9.0-rc8 (localversion-next is -next-20161209)

 drivers/clk/clk-wm831x.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/clk-wm831x.c b/drivers/clk/clk-wm831x.c
index f4fdac5..fe42d46 100644
--- a/drivers/clk/clk-wm831x.c
+++ b/drivers/clk/clk-wm831x.c
@@ -97,7 +97,8 @@ static int wm831x_fll_prepare(struct clk_hw *hw)
if (ret != 0)
dev_crit(wm831x->dev, "Failed to enable FLL: %d\n", ret);
 
-   usleep_range(2000, 2000);
+   /* wait 2-3 ms for new frequency taking effect */
+   usleep_range(2000, 3000);
 
return ret;
 }
-- 
2.1.4



[PATCH RFC] clk: wm831x: fix usleep_range with bad range

2016-12-11 Thread Nicholas Mc Guire
The delay here is not in atomic context and does not seem critical with
respect to precision, but usleep_range(min,max) with min==max results in 
giving the timer subsystem no room to optimize uncritical delays. Fix 
this by setting the range to 2000,3000 us.

Fixes: commit f05259a6ffa4 ("clk: wm831x: Add initial WM831x clock driver")
Signed-off-by: Nicholas Mc Guire 
---

problem was located by coccinelle spatch

The problem is that usleep_range is calculating the delay by
 exp = ktime_add_us(ktime_get(), min)
 delta = (u64)(max - min) * NSEC_PER_USEC
so delta is set to 0
and then calls 
  schedule_hrtimeout_range(exp, 0,...)
effectively this means that the clock subsystem has no room to
optimize and the behavior is no better than using usleep().
As this is not a critical delay it is set to a range of 2 to 3 
milliseconds - this change needs a review by someone that knows
the details of the device though.

Q:It might actually be possible to just use msleep(2) here rather
  than using a hrtimer, at least I do not see what a hrtimer would be
  needed here for - a longer delay e.g. on a HZ100 box should not hurt ?

Patch was only compile tested with: i386_defconfig + CONFIG_X86_INTEL_QUARK=y
CONFIG_MFD_WM831X_I2C=y, COMMON_CLK=y, CONFIG_COMMON_CLK_WM831X=m

Patch is against: 4.9.0-rc8 (localversion-next is -next-20161209)

 drivers/clk/clk-wm831x.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/clk-wm831x.c b/drivers/clk/clk-wm831x.c
index f4fdac5..fe42d46 100644
--- a/drivers/clk/clk-wm831x.c
+++ b/drivers/clk/clk-wm831x.c
@@ -97,7 +97,8 @@ static int wm831x_fll_prepare(struct clk_hw *hw)
if (ret != 0)
dev_crit(wm831x->dev, "Failed to enable FLL: %d\n", ret);
 
-   usleep_range(2000, 2000);
+   /* wait 2-3 ms for new frequency taking effect */
+   usleep_range(2000, 3000);
 
return ret;
 }
-- 
2.1.4



Re: [media] bt8xx: One function call less in bttv_input_init() after error detection

2016-12-11 Thread SF Markus Elfring
>> I would prefer a safer coding style for the corresponding
>> exception handling.
> 
> Can you please point out what is wrong in the current code

Is it useful to reconsider the software situation that another memory
allocation is attempted when it could be determined that a previous one
failed already?
Are two successful allocations finally needed to achieve the desired task?


> and how the changes you propose fix the problem?

I suggest to check return values immediately after each function call.
An error situation can be detected earlier then and only the required
clean-up functionality will be executed at the end.


> No one has expressed acceptance for the kind of change you propose with
> this patch, or to previous patches you proposed changing similar constructs.

I got a mixed impression from the acceptance statistics about my
published patches.


> The fact that you propose over and over again a class of changes that
> has been already vocally rejected would suggest otherwise.

I dare to propose another look at results from source code search patterns.


> The major achievement you obtained so far is that one of the maintainers
> of a large fraction of the kernel refuses to look at your patch submissions.

It can happen that some patterns are occasionally "too special"
to grow the popularity for such change possibilities and desired software
improvements quickly.
There are also different views about affected implementation details
by the software development community, aren't there?

Regards,
Markus


Re: [media] bt8xx: One function call less in bttv_input_init() after error detection

2016-12-11 Thread SF Markus Elfring
>> I would prefer a safer coding style for the corresponding
>> exception handling.
> 
> Can you please point out what is wrong in the current code

Is it useful to reconsider the software situation that another memory
allocation is attempted when it could be determined that a previous one
failed already?
Are two successful allocations finally needed to achieve the desired task?


> and how the changes you propose fix the problem?

I suggest to check return values immediately after each function call.
An error situation can be detected earlier then and only the required
clean-up functionality will be executed at the end.


> No one has expressed acceptance for the kind of change you propose with
> this patch, or to previous patches you proposed changing similar constructs.

I got a mixed impression from the acceptance statistics about my
published patches.


> The fact that you propose over and over again a class of changes that
> has been already vocally rejected would suggest otherwise.

I dare to propose another look at results from source code search patterns.


> The major achievement you obtained so far is that one of the maintainers
> of a large fraction of the kernel refuses to look at your patch submissions.

It can happen that some patterns are occasionally "too special"
to grow the popularity for such change possibilities and desired software
improvements quickly.
There are also different views about affected implementation details
by the software development community, aren't there?

Regards,
Markus


[GIT PULL] RCU changes for v4.10

2016-12-11 Thread Ingo Molnar
Linus,

Please pull the latest core-rcu-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core-rcu-for-linus

   # HEAD: af91a81131aee3e233a977632a23b839857a327b Merge branch 'for-mingo' of 
git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu

The main RCU changes in this development cycle were:

  - Miscellaneous fixes, including a change to call_rcu()'s
rcu_head alignment check.

  - Security-motivated list consistency checks, which are
disabled by default behind DEBUG_LIST.

  - Torture-test updates.

  - Documentation updates, yet again just simple changes.

 Thanks,

Ingo

-->
Arnd Bergmann (1):
  bug: Avoid Kconfig warning for BUG_ON_DATA_CORRUPTION

Kees Cook (5):
  list: Split list_add() debug checking into separate function
  rculist: Consolidate DEBUG_LIST for list_add_rcu()
  list: Split list_del() debug checking into separate function
  bug: Provide toggle for BUG on data corruption
  lkdtm: Add tests for struct list corruption

Nikolay Borisov (1):
  rcu: RCU_TRACE enables event tracing as well as debugfs

Paul E. McKenney (9):
  documentation: Present updated RCU guarantee
  rcu: Tighten up __call_rcu() rcu_head alignment check
  rcu: Remove obsolete rcu_check_callbacks() header comment
  rcu: Remove obsolete comment from __call_rcu()
  torture: Trace long read-side delays
  rcu: Make expedited grace periods recheck dyntick idle state
  rcu: Don't kick unless grace period or request
  torture: Remove obsolete files from rcutorture .gitignore
  torture: Prevent jitter from delaying build-only runs

Pranith Kumar (1):
  Documentation/RCU: Fix minor typo

Valentin Rothberg (1):
  lib/Kconfig.debug: Fix typo in select statement


 .../RCU/Design/Requirements/Requirements.html  | 25 +-
 Documentation/RCU/whatisRCU.txt|  2 +-
 drivers/misc/lkdtm.h   |  2 +
 drivers/misc/lkdtm_bugs.c  | 68 +++
 drivers/misc/lkdtm_core.c  |  2 +
 include/linux/bug.h| 17 
 include/linux/list.h   | 37 +---
 include/linux/rculist.h|  8 +-
 include/trace/events/rcu.h |  5 +-
 kernel/rcu/rcutorture.c| 11 ++-
 kernel/rcu/tree.c  | 17 ++--
 kernel/rcu/tree.h  |  1 +
 kernel/rcu/tree_exp.h  | 12 ++-
 lib/Kconfig.debug  | 15 +++-
 lib/list_debug.c   | 99 +++---
 tools/testing/selftests/rcutorture/.gitignore  |  2 -
 tools/testing/selftests/rcutorture/bin/kvm.sh  |  5 ++
 17 files changed, 221 insertions(+), 107 deletions(-)

diff --git a/Documentation/RCU/Design/Requirements/Requirements.html 
b/Documentation/RCU/Design/Requirements/Requirements.html
index a4d3838130e4..39bcb74ea733 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -547,7 +547,7 @@ The rcu_access_pointer() on line6 is similar 
to
It could reuse a value formerly fetched from this same pointer.
It could also fetch the pointer from gp in a byte-at-a-time
manner, resulting in load tearing, in turn resulting a bytewise
-   mash-up of two distince pointer values.
+   mash-up of two distinct pointer values.
It might even use value-speculation optimizations, where it makes
a wrong guess, but by the time it gets around to checking the
value, an update has changed the pointer to match the wrong guess.
@@ -659,6 +659,29 @@ demonstrates the need for RCU's stringent memory-ordering 
guarantees on
In other words, a given instance of synchronize_rcu()
can avoid waiting on a given RCU read-side critical section only
if it can prove that synchronize_rcu() started first.
+
+   
+   A related question is When rcu_read_lock()
+   doesn't generate any code, why does it matter how it relates
+   to a grace period?
+   The answer is that it is not the relationship of
+   rcu_read_lock() itself that is important, but rather
+   the relationship of the code within the enclosed RCU read-side
+   critical section to the code preceding and following the
+   grace period.
+   If we take this viewpoint, then a given RCU read-side critical
+   section begins before a given grace period when some access
+   preceding the grace period observes the effect of some access
+   within the critical section, in which case none of the accesses
+   within the critical section may observe the effects of any
+   access following the grace period.
+
+   
+   As of 

[GIT PULL] RCU changes for v4.10

2016-12-11 Thread Ingo Molnar
Linus,

Please pull the latest core-rcu-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core-rcu-for-linus

   # HEAD: af91a81131aee3e233a977632a23b839857a327b Merge branch 'for-mingo' of 
git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu

The main RCU changes in this development cycle were:

  - Miscellaneous fixes, including a change to call_rcu()'s
rcu_head alignment check.

  - Security-motivated list consistency checks, which are
disabled by default behind DEBUG_LIST.

  - Torture-test updates.

  - Documentation updates, yet again just simple changes.

 Thanks,

Ingo

-->
Arnd Bergmann (1):
  bug: Avoid Kconfig warning for BUG_ON_DATA_CORRUPTION

Kees Cook (5):
  list: Split list_add() debug checking into separate function
  rculist: Consolidate DEBUG_LIST for list_add_rcu()
  list: Split list_del() debug checking into separate function
  bug: Provide toggle for BUG on data corruption
  lkdtm: Add tests for struct list corruption

Nikolay Borisov (1):
  rcu: RCU_TRACE enables event tracing as well as debugfs

Paul E. McKenney (9):
  documentation: Present updated RCU guarantee
  rcu: Tighten up __call_rcu() rcu_head alignment check
  rcu: Remove obsolete rcu_check_callbacks() header comment
  rcu: Remove obsolete comment from __call_rcu()
  torture: Trace long read-side delays
  rcu: Make expedited grace periods recheck dyntick idle state
  rcu: Don't kick unless grace period or request
  torture: Remove obsolete files from rcutorture .gitignore
  torture: Prevent jitter from delaying build-only runs

Pranith Kumar (1):
  Documentation/RCU: Fix minor typo

Valentin Rothberg (1):
  lib/Kconfig.debug: Fix typo in select statement


 .../RCU/Design/Requirements/Requirements.html  | 25 +-
 Documentation/RCU/whatisRCU.txt|  2 +-
 drivers/misc/lkdtm.h   |  2 +
 drivers/misc/lkdtm_bugs.c  | 68 +++
 drivers/misc/lkdtm_core.c  |  2 +
 include/linux/bug.h| 17 
 include/linux/list.h   | 37 +---
 include/linux/rculist.h|  8 +-
 include/trace/events/rcu.h |  5 +-
 kernel/rcu/rcutorture.c| 11 ++-
 kernel/rcu/tree.c  | 17 ++--
 kernel/rcu/tree.h  |  1 +
 kernel/rcu/tree_exp.h  | 12 ++-
 lib/Kconfig.debug  | 15 +++-
 lib/list_debug.c   | 99 +++---
 tools/testing/selftests/rcutorture/.gitignore  |  2 -
 tools/testing/selftests/rcutorture/bin/kvm.sh  |  5 ++
 17 files changed, 221 insertions(+), 107 deletions(-)

diff --git a/Documentation/RCU/Design/Requirements/Requirements.html 
b/Documentation/RCU/Design/Requirements/Requirements.html
index a4d3838130e4..39bcb74ea733 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -547,7 +547,7 @@ The rcu_access_pointer() on line6 is similar 
to
It could reuse a value formerly fetched from this same pointer.
It could also fetch the pointer from gp in a byte-at-a-time
manner, resulting in load tearing, in turn resulting a bytewise
-   mash-up of two distince pointer values.
+   mash-up of two distinct pointer values.
It might even use value-speculation optimizations, where it makes
a wrong guess, but by the time it gets around to checking the
value, an update has changed the pointer to match the wrong guess.
@@ -659,6 +659,29 @@ demonstrates the need for RCU's stringent memory-ordering 
guarantees on
In other words, a given instance of synchronize_rcu()
can avoid waiting on a given RCU read-side critical section only
if it can prove that synchronize_rcu() started first.
+
+   
+   A related question is When rcu_read_lock()
+   doesn't generate any code, why does it matter how it relates
+   to a grace period?
+   The answer is that it is not the relationship of
+   rcu_read_lock() itself that is important, but rather
+   the relationship of the code within the enclosed RCU read-side
+   critical section to the code preceding and following the
+   grace period.
+   If we take this viewpoint, then a given RCU read-side critical
+   section begins before a given grace period when some access
+   preceding the grace period observes the effect of some access
+   within the critical section, in which case none of the accesses
+   within the critical section may observe the effects of any
+   access following the grace period.
+
+   
+   As of 

[PATCH] ACPI: small formatting fixes

2016-12-11 Thread Nick Desaulniers
A quick cleanup that passes scripts/checkpatch.pl -f .

Signed-off-by: Nick Desaulniers 
---
 arch/x86/kernel/acpi/cstate.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index af15f44..ed52aec 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (C) 2005 Intel Corporation
- * Venkatesh Pallipadi 
- * - Added _PDC for SMP C-states on Intel CPUs
+ * Venkatesh Pallipadi 
+ * - Added _PDC for SMP C-states on Intel CPUs
  */
 
 #include 
@@ -12,7 +12,6 @@
 #include 
 
 #include 
-#include 
 #include 
 #include 
 
@@ -50,8 +49,8 @@ void acpi_processor_power_init_bm_check(struct 
acpi_processor_flags *flags,
 * P4, Core and beyond CPUs
 */
if (c->x86_vendor == X86_VENDOR_INTEL &&
-   (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f)))
-   flags->bm_control = 0;
+   (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f)))
+   flags->bm_control = 0;
 }
 EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
 
@@ -89,7 +88,8 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
retval = 0;
/* If the HW does not support any sub-states in this C-state */
if (num_cstate_subtype == 0) {
-   pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW 
(0x%x)\n", cx->address, edx_part);
+   pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW 
(0x%x)\n",
+   cx->address, edx_part);
retval = -1;
goto out;
}
@@ -103,9 +103,8 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
 
if (!mwait_supported[cstate_type]) {
mwait_supported[cstate_type] = 1;
-   printk(KERN_DEBUG
-   "Monitor-Mwait will be used to enter C-%d "
-   "state\n", cx->type);
+   pr_debug("Monitor-Mwait will be used to enter C-%d state\n",
+   cx->type);
}
snprintf(cx->desc,
ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
@@ -159,13 +158,14 @@ void __cpuidle acpi_processor_ffh_cstate_enter(struct 
acpi_processor_cx *cx)
 
percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
mwait_idle_with_hints(percpu_entry->states[cx->index].eax,
- percpu_entry->states[cx->index].ecx);
+   percpu_entry->states[cx->index].ecx);
 }
 EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter);
 
 static int __init ffh_cstate_init(void)
 {
struct cpuinfo_x86 *c = _cpu_data;
+
if (c->x86_vendor != X86_VENDOR_INTEL)
return -1;
 
-- 
2.9.3



[PATCH] ACPI: small formatting fixes

2016-12-11 Thread Nick Desaulniers
A quick cleanup that passes scripts/checkpatch.pl -f .

Signed-off-by: Nick Desaulniers 
---
 arch/x86/kernel/acpi/cstate.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index af15f44..ed52aec 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (C) 2005 Intel Corporation
- * Venkatesh Pallipadi 
- * - Added _PDC for SMP C-states on Intel CPUs
+ * Venkatesh Pallipadi 
+ * - Added _PDC for SMP C-states on Intel CPUs
  */
 
 #include 
@@ -12,7 +12,6 @@
 #include 
 
 #include 
-#include 
 #include 
 #include 
 
@@ -50,8 +49,8 @@ void acpi_processor_power_init_bm_check(struct 
acpi_processor_flags *flags,
 * P4, Core and beyond CPUs
 */
if (c->x86_vendor == X86_VENDOR_INTEL &&
-   (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f)))
-   flags->bm_control = 0;
+   (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f)))
+   flags->bm_control = 0;
 }
 EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
 
@@ -89,7 +88,8 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
retval = 0;
/* If the HW does not support any sub-states in this C-state */
if (num_cstate_subtype == 0) {
-   pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW 
(0x%x)\n", cx->address, edx_part);
+   pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW 
(0x%x)\n",
+   cx->address, edx_part);
retval = -1;
goto out;
}
@@ -103,9 +103,8 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
 
if (!mwait_supported[cstate_type]) {
mwait_supported[cstate_type] = 1;
-   printk(KERN_DEBUG
-   "Monitor-Mwait will be used to enter C-%d "
-   "state\n", cx->type);
+   pr_debug("Monitor-Mwait will be used to enter C-%d state\n",
+   cx->type);
}
snprintf(cx->desc,
ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
@@ -159,13 +158,14 @@ void __cpuidle acpi_processor_ffh_cstate_enter(struct 
acpi_processor_cx *cx)
 
percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
mwait_idle_with_hints(percpu_entry->states[cx->index].eax,
- percpu_entry->states[cx->index].ecx);
+   percpu_entry->states[cx->index].ecx);
 }
 EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter);
 
 static int __init ffh_cstate_init(void)
 {
struct cpuinfo_x86 *c = _cpu_data;
+
if (c->x86_vendor != X86_VENDOR_INTEL)
return -1;
 
-- 
2.9.3



[PATCH] serial: mxs-auart: support CMSPAR termios cflag

2016-12-11 Thread Wolfgang Ocker
If CMSPAR is set in the c_cflag of termios, "stick" parity is enabled.

Tested on an i.MX28 system

Signed-off-by: Wolfgang Ocker 
---
v2: require PARENB to be also set in termios' c_cflag for CMSPAR
---
 drivers/tty/serial/mxs-auart.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 770454e0dfa3..fd819ea26762 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -95,6 +95,7 @@
 #define AUART_LINECTRL_BAUD_DIVFRAC_SHIFT  8
 #define AUART_LINECTRL_BAUD_DIVFRAC_MASK   0x3f00
 #define AUART_LINECTRL_BAUD_DIVFRAC(v) (((v) & 0x3f) << 8)
+#define AUART_LINECTRL_SPS (1 << 7)
 #define AUART_LINECTRL_WLEN_MASK   0x0060
 #define AUART_LINECTRL_WLEN(v) (((v) & 0x3) << 5)
 #define AUART_LINECTRL_FEN (1 << 4)
@@ -1014,6 +1015,8 @@ static void mxs_auart_settermios(struct uart_port *u,
ctrl |= AUART_LINECTRL_PEN;
if ((cflag & PARODD) == 0)
ctrl |= AUART_LINECTRL_EPS;
+   if (cflag & CMSPAR)
+   ctrl |= AUART_LINECTRL_SPS;
}
 
u->read_status_mask = 0;
-- 
2.10.0



[PATCH] serial: mxs-auart: support CMSPAR termios cflag

2016-12-11 Thread Wolfgang Ocker
If CMSPAR is set in the c_cflag of termios, "stick" parity is enabled.

Tested on an i.MX28 system

Signed-off-by: Wolfgang Ocker 
---
v2: require PARENB to be also set in termios' c_cflag for CMSPAR
---
 drivers/tty/serial/mxs-auart.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 770454e0dfa3..fd819ea26762 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -95,6 +95,7 @@
 #define AUART_LINECTRL_BAUD_DIVFRAC_SHIFT  8
 #define AUART_LINECTRL_BAUD_DIVFRAC_MASK   0x3f00
 #define AUART_LINECTRL_BAUD_DIVFRAC(v) (((v) & 0x3f) << 8)
+#define AUART_LINECTRL_SPS (1 << 7)
 #define AUART_LINECTRL_WLEN_MASK   0x0060
 #define AUART_LINECTRL_WLEN(v) (((v) & 0x3) << 5)
 #define AUART_LINECTRL_FEN (1 << 4)
@@ -1014,6 +1015,8 @@ static void mxs_auart_settermios(struct uart_port *u,
ctrl |= AUART_LINECTRL_PEN;
if ((cflag & PARODD) == 0)
ctrl |= AUART_LINECTRL_EPS;
+   if (cflag & CMSPAR)
+   ctrl |= AUART_LINECTRL_SPS;
}
 
u->read_status_mask = 0;
-- 
2.10.0



Re: [PATCH 1/3] arm: hisi: add ARCH_MULTI_V5 support

2016-12-11 Thread Jiancheng Xue


On 2016/12/9 23:07, Marty Plummer wrote:
> On 12/04/2016 08:03 PM, Jiancheng Xue wrote:
>> Hi Arnd,
>>
>> On 2016/10/17 21:48, Arnd Bergmann wrote:
>>> On Monday, October 17, 2016 8:07:03 PM CEST Pan Wen wrote:
 Add support for some HiSilicon SoCs which depend on ARCH_MULTI_V5.

 Signed-off-by: Pan Wen 

>>>
>>> Looks ok. I've added Marty Plummer to Cc, he was recently proposing
>>> patches for Hi3520, which I think is closely related to this one.
>>> Please try to work together so the patches don't conflict. It should
>>> be fairly straightforward since you are basically doing the same
>>> change here.
>>>
>> Marty hasn't give any replies about this thread until now. I reviewed
>> the patch for Hi3520. And I think this patch won't conflict with Hi3520.
>> Could you help us to ack this patch?
>>
>> Thanks,
>> Jiancheng
>>
>>
> Hello all
> 
> Sorry for my lack of activity, I've just been very busy lately with real
> world considerations (well, real world but related to this; I have
> another board based on hi3521a I've been tinkering with, trying to get
> the manuf. to release gpl source via the sfconfservancy). I've not given
> up on the project, however, since devices like this really need updates
> in light of the recent botnets targeting devices of this sort as
> manpower.

Do you have any objections to this patch?  If not, I hope this patch can
be merged in 4.10.  Thank you.

Regards,
Jiancheng





Re: [PATCH 1/3] arm: hisi: add ARCH_MULTI_V5 support

2016-12-11 Thread Jiancheng Xue


On 2016/12/9 23:07, Marty Plummer wrote:
> On 12/04/2016 08:03 PM, Jiancheng Xue wrote:
>> Hi Arnd,
>>
>> On 2016/10/17 21:48, Arnd Bergmann wrote:
>>> On Monday, October 17, 2016 8:07:03 PM CEST Pan Wen wrote:
 Add support for some HiSilicon SoCs which depend on ARCH_MULTI_V5.

 Signed-off-by: Pan Wen 

>>>
>>> Looks ok. I've added Marty Plummer to Cc, he was recently proposing
>>> patches for Hi3520, which I think is closely related to this one.
>>> Please try to work together so the patches don't conflict. It should
>>> be fairly straightforward since you are basically doing the same
>>> change here.
>>>
>> Marty hasn't give any replies about this thread until now. I reviewed
>> the patch for Hi3520. And I think this patch won't conflict with Hi3520.
>> Could you help us to ack this patch?
>>
>> Thanks,
>> Jiancheng
>>
>>
> Hello all
> 
> Sorry for my lack of activity, I've just been very busy lately with real
> world considerations (well, real world but related to this; I have
> another board based on hi3521a I've been tinkering with, trying to get
> the manuf. to release gpl source via the sfconfservancy). I've not given
> up on the project, however, since devices like this really need updates
> in light of the recent botnets targeting devices of this sort as
> manpower.

Do you have any objections to this patch?  If not, I hope this patch can
be merged in 4.10.  Thank you.

Regards,
Jiancheng





[PATCH] Input: elantech - force a module ignore ABS mode

2016-12-11 Thread KT Liao
One Elan sample which sample version is 0x74 and hw_version is 0x04 has a bug 
in abs mode, so let it run in default mode
Signed-off-by: KT Liao 
---
 drivers/input/mouse/elantech.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index db7d1d6..f1c2d10 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1687,6 +1687,13 @@ int elantech_init(struct psmouse *psmouse)
 etd->samples[0], etd->samples[1], etd->samples[2]);
}
 
+   if (etd->samples[1] == 0x74 && etd->hw_version == 0x03) {
+   /*This module has a bug in absolute mode, let it be defualt 
mode  */
+   psmouse_info(psmouse,
+"This module only active in default mode\n");
+   goto init_fail;
+   }
+
if (elantech_set_absolute_mode(psmouse)) {
psmouse_err(psmouse,
"failed to put touchpad into absolute mode.\n");
-- 
2.7.4



[PATCH] Input: elantech - force a module ignore ABS mode

2016-12-11 Thread KT Liao
One Elan sample which sample version is 0x74 and hw_version is 0x04 has a bug 
in abs mode, so let it run in default mode
Signed-off-by: KT Liao 
---
 drivers/input/mouse/elantech.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index db7d1d6..f1c2d10 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1687,6 +1687,13 @@ int elantech_init(struct psmouse *psmouse)
 etd->samples[0], etd->samples[1], etd->samples[2]);
}
 
+   if (etd->samples[1] == 0x74 && etd->hw_version == 0x03) {
+   /*This module has a bug in absolute mode, let it be defualt 
mode  */
+   psmouse_info(psmouse,
+"This module only active in default mode\n");
+   goto init_fail;
+   }
+
if (elantech_set_absolute_mode(psmouse)) {
psmouse_err(psmouse,
"failed to put touchpad into absolute mode.\n");
-- 
2.7.4



Re: [PATCH 2/2] kcov: make kcov work properly with KASLR enabled

2016-12-11 Thread Dmitry Vyukov
On Sun, Dec 11, 2016 at 10:37 PM, Alexander Popov  wrote:
> On 11.12.2016 12:32, Dmitry Vyukov wrote:
>> On Sun, Dec 11, 2016 at 1:50 AM, Alexander Popov  
>> wrote:
>>> Subtract KASLR offset from the kernel addresses reported by kcov.
>>> Tested on x86_64 and AArch64 (Hikey LeMaker).
>>>
>>> Signed-off-by: Alexander Popov 
>>> ---
>>>  kernel/kcov.c | 8 +++-
>>>  1 file changed, 7 insertions(+), 1 deletion(-)
>>
>> I think generally this is the right thing to do.
>>
>>  There are 2 pending patches for kcov by +Quentin (hopefully in mm):
>> "kcov: add AFL-style tracing"
>> "kcov: size of arena is now given in bytes"
>> https://groups.google.com/forum/#!topic/syzkaller/gcqbIhKjGcY
>> https://groups.google.com/d/msg/syzkaller/gcqbIhKjGcY/KQFryjBKCAAJ
>>
>> Your patch probably conflicts with them.
>> Should you base them on top of these patches, so that Andrew can merge
>> it without conflicts?
>
> Excuse me, I can't find these patches in:
> git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
> git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
> git://git.cmpxchg.org/linux-mmots.git
>
> Could you point at the tree which I can rebase onto?
> Should I cherry-pick Quentin's patches manually?


Quentin, do you know destiny of your patches? They does not seem to be
in mm tree.


Re: [PATCH 2/2] kcov: make kcov work properly with KASLR enabled

2016-12-11 Thread Dmitry Vyukov
On Sun, Dec 11, 2016 at 10:37 PM, Alexander Popov  wrote:
> On 11.12.2016 12:32, Dmitry Vyukov wrote:
>> On Sun, Dec 11, 2016 at 1:50 AM, Alexander Popov  
>> wrote:
>>> Subtract KASLR offset from the kernel addresses reported by kcov.
>>> Tested on x86_64 and AArch64 (Hikey LeMaker).
>>>
>>> Signed-off-by: Alexander Popov 
>>> ---
>>>  kernel/kcov.c | 8 +++-
>>>  1 file changed, 7 insertions(+), 1 deletion(-)
>>
>> I think generally this is the right thing to do.
>>
>>  There are 2 pending patches for kcov by +Quentin (hopefully in mm):
>> "kcov: add AFL-style tracing"
>> "kcov: size of arena is now given in bytes"
>> https://groups.google.com/forum/#!topic/syzkaller/gcqbIhKjGcY
>> https://groups.google.com/d/msg/syzkaller/gcqbIhKjGcY/KQFryjBKCAAJ
>>
>> Your patch probably conflicts with them.
>> Should you base them on top of these patches, so that Andrew can merge
>> it without conflicts?
>
> Excuse me, I can't find these patches in:
> git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
> git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
> git://git.cmpxchg.org/linux-mmots.git
>
> Could you point at the tree which I can rebase onto?
> Should I cherry-pick Quentin's patches manually?


Quentin, do you know destiny of your patches? They does not seem to be
in mm tree.


[tip:sched/core] sched/core: Fix find_idlest_group() for fork

2016-12-11 Thread tip-bot for Vincent Guittot
Commit-ID:  f519a3f1c6b7a990e5aed37a8f853c6ecfdee945
Gitweb: http://git.kernel.org/tip/f519a3f1c6b7a990e5aed37a8f853c6ecfdee945
Author: Vincent Guittot 
AuthorDate: Thu, 8 Dec 2016 17:56:53 +0100
Committer:  Ingo Molnar 
CommitDate: Sun, 11 Dec 2016 13:10:56 +0100

sched/core: Fix find_idlest_group() for fork

During fork, the utilization of a task is init once the rq has been
selected because the current utilization level of the rq is used to
set the utilization of the fork task. As the task's utilization is
still 0 at this step of the fork sequence, it doesn't make sense to
look for some spare capacity that can fit the task's utilization.
Furthermore, I can see perf regressions for the test:

   hackbench -P -g 1

because the least loaded policy is always bypassed and tasks are not
spread during fork.

With this patch and the fix below, we are back to same performances as
for v4.8. The fix below is only a temporary one used for the test
until a smarter solution is found because we can't simply remove the
test which is useful for others benchmarks

| @@ -5708,13 +5708,6 @@ static int select_idle_cpu(struct task_struct *p, 
struct sched_domain *sd, int t
|
|   avg_cost = this_sd->avg_scan_cost;
|
| - /*
| -  * Due to large variance we need a large fuzz factor; hackbench in
| -  * particularly is sensitive here.
| -  */
| - if ((avg_idle / 512) < avg_cost)
| - return -1;
| -
|   time = local_clock();
|
|   for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {

Tested-by: Matt Fleming 
Signed-off-by: Vincent Guittot 
Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Matt Fleming 
Acked-by: Morten Rasmussen 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: dietmar.eggem...@arm.com
Cc: kernel...@gmail.com
Cc: umgwanakikb...@gmail.com
Cc: yuyang...@intel.comc
Link: 
http://lkml.kernel.org/r/1481216215-24651-2-git-send-email-vincent.guit...@linaro.org
Signed-off-by: Ingo Molnar 
---
 kernel/sched/fair.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 18d9e75..ebb815f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5473,13 +5473,21 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,
 * utilized systems if we require spare_capacity > task_util(p),
 * so we allow for some task stuffing by using
 * spare_capacity > task_util(p)/2.
+*
+* Spare capacity can't be used for fork because the utilization has
+* not been set yet, we must first select a rq to compute the initial
+* utilization.
 */
+   if (sd_flag & SD_BALANCE_FORK)
+   goto skip_spare;
+
if (this_spare > task_util(p) / 2 &&
imbalance*this_spare > 100*most_spare)
return NULL;
else if (most_spare > task_util(p) / 2)
return most_spare_sg;
 
+skip_spare:
if (!idlest || 100*this_load < imbalance*min_load)
return NULL;
return idlest;


[tip:sched/core] sched/core: Fix find_idlest_group() for fork

2016-12-11 Thread tip-bot for Vincent Guittot
Commit-ID:  f519a3f1c6b7a990e5aed37a8f853c6ecfdee945
Gitweb: http://git.kernel.org/tip/f519a3f1c6b7a990e5aed37a8f853c6ecfdee945
Author: Vincent Guittot 
AuthorDate: Thu, 8 Dec 2016 17:56:53 +0100
Committer:  Ingo Molnar 
CommitDate: Sun, 11 Dec 2016 13:10:56 +0100

sched/core: Fix find_idlest_group() for fork

During fork, the utilization of a task is init once the rq has been
selected because the current utilization level of the rq is used to
set the utilization of the fork task. As the task's utilization is
still 0 at this step of the fork sequence, it doesn't make sense to
look for some spare capacity that can fit the task's utilization.
Furthermore, I can see perf regressions for the test:

   hackbench -P -g 1

because the least loaded policy is always bypassed and tasks are not
spread during fork.

With this patch and the fix below, we are back to same performances as
for v4.8. The fix below is only a temporary one used for the test
until a smarter solution is found because we can't simply remove the
test which is useful for others benchmarks

| @@ -5708,13 +5708,6 @@ static int select_idle_cpu(struct task_struct *p, 
struct sched_domain *sd, int t
|
|   avg_cost = this_sd->avg_scan_cost;
|
| - /*
| -  * Due to large variance we need a large fuzz factor; hackbench in
| -  * particularly is sensitive here.
| -  */
| - if ((avg_idle / 512) < avg_cost)
| - return -1;
| -
|   time = local_clock();
|
|   for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {

Tested-by: Matt Fleming 
Signed-off-by: Vincent Guittot 
Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Matt Fleming 
Acked-by: Morten Rasmussen 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: dietmar.eggem...@arm.com
Cc: kernel...@gmail.com
Cc: umgwanakikb...@gmail.com
Cc: yuyang...@intel.comc
Link: 
http://lkml.kernel.org/r/1481216215-24651-2-git-send-email-vincent.guit...@linaro.org
Signed-off-by: Ingo Molnar 
---
 kernel/sched/fair.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 18d9e75..ebb815f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5473,13 +5473,21 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,
 * utilized systems if we require spare_capacity > task_util(p),
 * so we allow for some task stuffing by using
 * spare_capacity > task_util(p)/2.
+*
+* Spare capacity can't be used for fork because the utilization has
+* not been set yet, we must first select a rq to compute the initial
+* utilization.
 */
+   if (sd_flag & SD_BALANCE_FORK)
+   goto skip_spare;
+
if (this_spare > task_util(p) / 2 &&
imbalance*this_spare > 100*most_spare)
return NULL;
else if (most_spare > task_util(p) / 2)
return most_spare_sg;
 
+skip_spare:
if (!idlest || 100*this_load < imbalance*min_load)
return NULL;
return idlest;


[tip:perf/core] perf/x86: Fix exclusion of BTS and LBR for Goldmont

2016-12-11 Thread tip-bot for Andi Kleen
Commit-ID:  b0c1ef52959582144bbea9a2b37db7f4c9e399f7
Gitweb: http://git.kernel.org/tip/b0c1ef52959582144bbea9a2b37db7f4c9e399f7
Author: Andi Kleen 
AuthorDate: Thu, 8 Dec 2016 16:14:17 -0800
Committer:  Ingo Molnar 
CommitDate: Sun, 11 Dec 2016 13:06:09 +0100

perf/x86: Fix exclusion of BTS and LBR for Goldmont

An earlier patch allowed enabling PT and LBR at the same
time on Goldmont. However it also allowed enabling BTS and LBR
at the same time, which is still not supported. Fix this by
bypassing the check only for PT.

Signed-off-by: Andi Kleen 
Signed-off-by: Peter Zijlstra (Intel) 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: alexander.shish...@intel.com
Cc: kan.li...@intel.com
Cc: 
Fixes: ccbebba4c6bf ("perf/x86/intel/pt: Bypass PT vs. LBR exclusivity if the 
core supports it")
Link: http://lkml.kernel.org/r/20161209001417.4713-1-a...@firstfloor.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/events/core.c   | 8 ++--
 arch/x86/events/perf_event.h | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 6e395c9..7fe88bb 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -365,7 +365,11 @@ int x86_add_exclusive(unsigned int what)
 {
int i;
 
-   if (x86_pmu.lbr_pt_coexist)
+   /*
+* When lbr_pt_coexist we allow PT to coexist with either LBR or BTS.
+* LBR and BTS are still mutually exclusive.
+*/
+   if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
return 0;
 
if (!atomic_inc_not_zero(_pmu.lbr_exclusive[what])) {
@@ -388,7 +392,7 @@ fail_unlock:
 
 void x86_del_exclusive(unsigned int what)
 {
-   if (x86_pmu.lbr_pt_coexist)
+   if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
return;
 
atomic_dec(_pmu.lbr_exclusive[what]);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a77ee02..bcbb1d2 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -604,7 +604,7 @@ struct x86_pmu {
u64 lbr_sel_mask;  /* LBR_SELECT valid bits */
const int   *lbr_sel_map;  /* lbr_select mappings */
boollbr_double_abort;  /* duplicated lbr aborts */
-   boollbr_pt_coexist;/* LBR may coexist with PT */
+   boollbr_pt_coexist;/* (LBR|BTS) may coexist 
with PT */
 
/*
 * Intel PT/LBR/BTS are exclusive


[tip:perf/core] perf/x86: Fix exclusion of BTS and LBR for Goldmont

2016-12-11 Thread tip-bot for Andi Kleen
Commit-ID:  b0c1ef52959582144bbea9a2b37db7f4c9e399f7
Gitweb: http://git.kernel.org/tip/b0c1ef52959582144bbea9a2b37db7f4c9e399f7
Author: Andi Kleen 
AuthorDate: Thu, 8 Dec 2016 16:14:17 -0800
Committer:  Ingo Molnar 
CommitDate: Sun, 11 Dec 2016 13:06:09 +0100

perf/x86: Fix exclusion of BTS and LBR for Goldmont

An earlier patch allowed enabling PT and LBR at the same
time on Goldmont. However it also allowed enabling BTS and LBR
at the same time, which is still not supported. Fix this by
bypassing the check only for PT.

Signed-off-by: Andi Kleen 
Signed-off-by: Peter Zijlstra (Intel) 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: alexander.shish...@intel.com
Cc: kan.li...@intel.com
Cc: 
Fixes: ccbebba4c6bf ("perf/x86/intel/pt: Bypass PT vs. LBR exclusivity if the 
core supports it")
Link: http://lkml.kernel.org/r/20161209001417.4713-1-a...@firstfloor.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/events/core.c   | 8 ++--
 arch/x86/events/perf_event.h | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 6e395c9..7fe88bb 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -365,7 +365,11 @@ int x86_add_exclusive(unsigned int what)
 {
int i;
 
-   if (x86_pmu.lbr_pt_coexist)
+   /*
+* When lbr_pt_coexist we allow PT to coexist with either LBR or BTS.
+* LBR and BTS are still mutually exclusive.
+*/
+   if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
return 0;
 
if (!atomic_inc_not_zero(_pmu.lbr_exclusive[what])) {
@@ -388,7 +392,7 @@ fail_unlock:
 
 void x86_del_exclusive(unsigned int what)
 {
-   if (x86_pmu.lbr_pt_coexist)
+   if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
return;
 
atomic_dec(_pmu.lbr_exclusive[what]);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a77ee02..bcbb1d2 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -604,7 +604,7 @@ struct x86_pmu {
u64 lbr_sel_mask;  /* LBR_SELECT valid bits */
const int   *lbr_sel_map;  /* lbr_select mappings */
boollbr_double_abort;  /* duplicated lbr aborts */
-   boollbr_pt_coexist;/* LBR may coexist with PT */
+   boollbr_pt_coexist;/* (LBR|BTS) may coexist 
with PT */
 
/*
 * Intel PT/LBR/BTS are exclusive


[tip:locking/core] x86/paravirt: Fix bool return type for PVOP_CALL()

2016-12-11 Thread tip-bot for Peter Zijlstra
Commit-ID:  11f254dbb3a2e3f0d8552d0dd37f4faa432b6b16
Gitweb: http://git.kernel.org/tip/11f254dbb3a2e3f0d8552d0dd37f4faa432b6b16
Author: Peter Zijlstra 
AuthorDate: Thu, 8 Dec 2016 16:42:15 +0100
Committer:  Ingo Molnar 
CommitDate: Sun, 11 Dec 2016 13:09:20 +0100

x86/paravirt: Fix bool return type for PVOP_CALL()

Commit:

  3cded4179481 ("x86/paravirt: Optimize native pv_lock_ops.vcpu_is_preempted()")

introduced a paravirt op with bool return type [*]

It turns out that the PVOP_CALL*() macros miscompile when rettype is
bool. Code that looked like:

   83 ef 01sub$0x1,%edi
   ff 15 32 a0 d8 00   callq  *0xd8a032(%rip)# 81e28120 

   84 c0   test   %al,%al

ended up looking like so after PVOP_CALL1() was applied:

   83 ef 01sub$0x1,%edi
   48 63 ffmovslq %edi,%rdi
   ff 14 25 20 81 e2 81callq  *0x81e28120
   48 85 c0test   %rax,%rax

Note how it tests the whole of %rax, even though a typical bool return
function only sets %al, like:

  0f 95 c0setne  %al
  c3  retq

This is because PVOP_CALL() does:

__ret = (rettype)__eax;

and while regular integer type casts truncate the result, a cast to
bool tests for any !0 value. Fix this by explicitly truncating to
sizeof(rettype) before casting.

[*] The actual bug should've been exposed in commit:
  446f3dc8cc0a ("locking/core, x86/paravirt: Implement 
vcpu_is_preempted(cpu) for KVM and Xen guests")
but that didn't properly implement the paravirt call.

Reported-by: kernel test robot 
Signed-off-by: Peter Zijlstra (Intel) 
Cc: Alok Kataria 
Cc: Borislav Petkov 
Cc: Chris Wright 
Cc: Jeremy Fitzhardinge 
Cc: Linus Torvalds 
Cc: Pan Xinhui 
Cc: Paolo Bonzini 
Cc: Peter Anvin 
Cc: Peter Zijlstra 
Cc: Rusty Russell 
Cc: Thomas Gleixner 
Fixes: 3cded4179481 ("x86/paravirt: Optimize native 
pv_lock_ops.vcpu_is_preempted()")
Link: http://lkml.kernel.org/r/20161208154349.346057...@infradead.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/paravirt_types.h | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 2614bd7..3f2bc0f 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -510,6 +510,18 @@ int paravirt_disable_iospace(void);
 #define PVOP_TEST_NULL(op) ((void)op)
 #endif
 
+#define PVOP_RETMASK(rettype)  \
+   ({  unsigned long __mask = ~0UL;\
+   switch (sizeof(rettype)) {  \
+   case 1: __mask =   0xffUL; break;   \
+   case 2: __mask = 0xUL; break;   \
+   case 4: __mask = 0xUL; break;   \
+   default: break; \
+   }   \
+   __mask; \
+   })
+
+
 #define PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr,
\
  pre, post, ...)   \
({  \
@@ -537,7 +549,7 @@ int paravirt_disable_iospace(void);
   paravirt_clobber(clbr),  \
   ##__VA_ARGS__\
 : "memory", "cc" extra_clbr);  \
-   __ret = (rettype)__eax; \
+   __ret = (rettype)(__eax & PVOP_RETMASK(rettype));   
\
}   \
__ret;  \
})


[tip:locking/core] x86/paravirt: Fix bool return type for PVOP_CALL()

2016-12-11 Thread tip-bot for Peter Zijlstra
Commit-ID:  11f254dbb3a2e3f0d8552d0dd37f4faa432b6b16
Gitweb: http://git.kernel.org/tip/11f254dbb3a2e3f0d8552d0dd37f4faa432b6b16
Author: Peter Zijlstra 
AuthorDate: Thu, 8 Dec 2016 16:42:15 +0100
Committer:  Ingo Molnar 
CommitDate: Sun, 11 Dec 2016 13:09:20 +0100

x86/paravirt: Fix bool return type for PVOP_CALL()

Commit:

  3cded4179481 ("x86/paravirt: Optimize native pv_lock_ops.vcpu_is_preempted()")

introduced a paravirt op with bool return type [*]

It turns out that the PVOP_CALL*() macros miscompile when rettype is
bool. Code that looked like:

   83 ef 01sub$0x1,%edi
   ff 15 32 a0 d8 00   callq  *0xd8a032(%rip)# 81e28120 

   84 c0   test   %al,%al

ended up looking like so after PVOP_CALL1() was applied:

   83 ef 01sub$0x1,%edi
   48 63 ffmovslq %edi,%rdi
   ff 14 25 20 81 e2 81callq  *0x81e28120
   48 85 c0test   %rax,%rax

Note how it tests the whole of %rax, even though a typical bool return
function only sets %al, like:

  0f 95 c0setne  %al
  c3  retq

This is because PVOP_CALL() does:

__ret = (rettype)__eax;

and while regular integer type casts truncate the result, a cast to
bool tests for any !0 value. Fix this by explicitly truncating to
sizeof(rettype) before casting.

[*] The actual bug should've been exposed in commit:
  446f3dc8cc0a ("locking/core, x86/paravirt: Implement 
vcpu_is_preempted(cpu) for KVM and Xen guests")
but that didn't properly implement the paravirt call.

Reported-by: kernel test robot 
Signed-off-by: Peter Zijlstra (Intel) 
Cc: Alok Kataria 
Cc: Borislav Petkov 
Cc: Chris Wright 
Cc: Jeremy Fitzhardinge 
Cc: Linus Torvalds 
Cc: Pan Xinhui 
Cc: Paolo Bonzini 
Cc: Peter Anvin 
Cc: Peter Zijlstra 
Cc: Rusty Russell 
Cc: Thomas Gleixner 
Fixes: 3cded4179481 ("x86/paravirt: Optimize native 
pv_lock_ops.vcpu_is_preempted()")
Link: http://lkml.kernel.org/r/20161208154349.346057...@infradead.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/paravirt_types.h | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 2614bd7..3f2bc0f 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -510,6 +510,18 @@ int paravirt_disable_iospace(void);
 #define PVOP_TEST_NULL(op) ((void)op)
 #endif
 
+#define PVOP_RETMASK(rettype)  \
+   ({  unsigned long __mask = ~0UL;\
+   switch (sizeof(rettype)) {  \
+   case 1: __mask =   0xffUL; break;   \
+   case 2: __mask = 0xUL; break;   \
+   case 4: __mask = 0xUL; break;   \
+   default: break; \
+   }   \
+   __mask; \
+   })
+
+
 #define PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr,
\
  pre, post, ...)   \
({  \
@@ -537,7 +549,7 @@ int paravirt_disable_iospace(void);
   paravirt_clobber(clbr),  \
   ##__VA_ARGS__\
 : "memory", "cc" extra_clbr);  \
-   __ret = (rettype)__eax; \
+   __ret = (rettype)(__eax & PVOP_RETMASK(rettype));   
\
}   \
__ret;  \
})


[tip:sched/core] sched/core: Use load_avg for selecting idlest group

2016-12-11 Thread tip-bot for Vincent Guittot
Commit-ID:  6b94780e45c17b83e3e75f8aaca5a328db583c74
Gitweb: http://git.kernel.org/tip/6b94780e45c17b83e3e75f8aaca5a328db583c74
Author: Vincent Guittot 
AuthorDate: Thu, 8 Dec 2016 17:56:54 +0100
Committer:  Ingo Molnar 
CommitDate: Sun, 11 Dec 2016 13:10:57 +0100

sched/core: Use load_avg for selecting idlest group

find_idlest_group() only compares the runnable_load_avg when looking
for the least loaded group. But on fork intensive use case like
hackbench where tasks blocked quickly after the fork, this can lead to
selecting the same CPU instead of other CPUs, which have similar
runnable load but a lower load_avg.

When the runnable_load_avg of 2 CPUs are close, we now take into
account the amount of blocked load as a 2nd selection factor. There is
now 3 zones for the runnable_load of the rq:

 - [0 .. (runnable_load - imbalance)]:
Select the new rq which has significantly less runnable_load

 - [(runnable_load - imbalance) .. (runnable_load + imbalance)]:
The runnable loads are close so we use load_avg to chose
between the 2 rq

 - [(runnable_load + imbalance) .. ULONG_MAX]:
Keep the current rq which has significantly less runnable_load

The scale factor that is currently used for comparing runnable_load,
doesn't work well with small value. As an example, the use of a
scaling factor fails as soon as this_runnable_load == 0 because we
always select local rq even if min_runnable_load is only 1, which
doesn't really make sense because they are just the same. So instead
of scaling factor, we use an absolute margin for runnable_load to
detect CPUs with similar runnable_load and we keep using scaling
factor for blocked load.

For use case like hackbench, this enable the scheduler to select
different CPUs during the fork sequence and to spread tasks across the
system.

Tests have been done on a Hikey board (ARM based octo cores) for
several kernel. The result below gives min, max, avg and stdev values
of 18 runs with each configuration.

The patches depend on the "no missing update_rq_clock()" work.

hackbench -P -g 1

 ea86cb4b7621  7dc603c9028e  v4.8v4.8+patches
  min0.049 0.050 0.051   0,048
  avg0.057 0.057(0%) 0.057(0%)   0,055(+5%)
  max0.066 0.068 0.070   0,063
  stdev  +/-9% +/-9% +/-8%   +/-9%

More performance numbers here:

  https://lkml.kernel.org/r/20161203214707.gi20...@codeblueprint.co.uk

Tested-by: Matt Fleming 
Signed-off-by: Vincent Guittot 
Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Matt Fleming 
Cc: Linus Torvalds 
Cc: morten.rasmus...@arm.com
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: dietmar.eggem...@arm.com
Cc: kernel...@gmail.com
Cc: umgwanakikb...@gmail.com
Cc: yuyang...@intel.comc
Link: 
http://lkml.kernel.org/r/1481216215-24651-3-git-send-email-vincent.guit...@linaro.org
Signed-off-by: Ingo Molnar 
---
 kernel/sched/fair.c | 55 ++---
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ebb815f..6559d19 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5405,16 +5405,20 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,
 {
struct sched_group *idlest = NULL, *group = sd->groups;
struct sched_group *most_spare_sg = NULL;
-   unsigned long min_load = ULONG_MAX, this_load = 0;
+   unsigned long min_runnable_load = ULONG_MAX, this_runnable_load = 0;
+   unsigned long min_avg_load = ULONG_MAX, this_avg_load = 0;
unsigned long most_spare = 0, this_spare = 0;
int load_idx = sd->forkexec_idx;
-   int imbalance = 100 + (sd->imbalance_pct-100)/2;
+   int imbalance_scale = 100 + (sd->imbalance_pct-100)/2;
+   unsigned long imbalance = scale_load_down(NICE_0_LOAD) *
+   (sd->imbalance_pct-100) / 100;
 
if (sd_flag & SD_BALANCE_WAKE)
load_idx = sd->wake_idx;
 
do {
-   unsigned long load, avg_load, spare_cap, max_spare_cap;
+   unsigned long load, avg_load, runnable_load;
+   unsigned long spare_cap, max_spare_cap;
int local_group;
int i;
 
@@ -5431,6 +5435,7 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,
 * the group containing the CPU with most spare capacity.
 */
avg_load = 0;
+   runnable_load = 0;
max_spare_cap = 0;
 
for_each_cpu(i, sched_group_cpus(group)) {
@@ -5440,7 +5445,9 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,
else
   

[tip:sched/core] sched/core: Use load_avg for selecting idlest group

2016-12-11 Thread tip-bot for Vincent Guittot
Commit-ID:  6b94780e45c17b83e3e75f8aaca5a328db583c74
Gitweb: http://git.kernel.org/tip/6b94780e45c17b83e3e75f8aaca5a328db583c74
Author: Vincent Guittot 
AuthorDate: Thu, 8 Dec 2016 17:56:54 +0100
Committer:  Ingo Molnar 
CommitDate: Sun, 11 Dec 2016 13:10:57 +0100

sched/core: Use load_avg for selecting idlest group

find_idlest_group() only compares the runnable_load_avg when looking
for the least loaded group. But on fork intensive use case like
hackbench where tasks blocked quickly after the fork, this can lead to
selecting the same CPU instead of other CPUs, which have similar
runnable load but a lower load_avg.

When the runnable_load_avg of 2 CPUs are close, we now take into
account the amount of blocked load as a 2nd selection factor. There is
now 3 zones for the runnable_load of the rq:

 - [0 .. (runnable_load - imbalance)]:
Select the new rq which has significantly less runnable_load

 - [(runnable_load - imbalance) .. (runnable_load + imbalance)]:
The runnable loads are close so we use load_avg to chose
between the 2 rq

 - [(runnable_load + imbalance) .. ULONG_MAX]:
Keep the current rq which has significantly less runnable_load

The scale factor that is currently used for comparing runnable_load,
doesn't work well with small value. As an example, the use of a
scaling factor fails as soon as this_runnable_load == 0 because we
always select local rq even if min_runnable_load is only 1, which
doesn't really make sense because they are just the same. So instead
of scaling factor, we use an absolute margin for runnable_load to
detect CPUs with similar runnable_load and we keep using scaling
factor for blocked load.

For use case like hackbench, this enable the scheduler to select
different CPUs during the fork sequence and to spread tasks across the
system.

Tests have been done on a Hikey board (ARM based octo cores) for
several kernel. The result below gives min, max, avg and stdev values
of 18 runs with each configuration.

The patches depend on the "no missing update_rq_clock()" work.

hackbench -P -g 1

 ea86cb4b7621  7dc603c9028e  v4.8v4.8+patches
  min0.049 0.050 0.051   0,048
  avg0.057 0.057(0%) 0.057(0%)   0,055(+5%)
  max0.066 0.068 0.070   0,063
  stdev  +/-9% +/-9% +/-8%   +/-9%

More performance numbers here:

  https://lkml.kernel.org/r/20161203214707.gi20...@codeblueprint.co.uk

Tested-by: Matt Fleming 
Signed-off-by: Vincent Guittot 
Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Matt Fleming 
Cc: Linus Torvalds 
Cc: morten.rasmus...@arm.com
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: dietmar.eggem...@arm.com
Cc: kernel...@gmail.com
Cc: umgwanakikb...@gmail.com
Cc: yuyang...@intel.comc
Link: 
http://lkml.kernel.org/r/1481216215-24651-3-git-send-email-vincent.guit...@linaro.org
Signed-off-by: Ingo Molnar 
---
 kernel/sched/fair.c | 55 ++---
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ebb815f..6559d19 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5405,16 +5405,20 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,
 {
struct sched_group *idlest = NULL, *group = sd->groups;
struct sched_group *most_spare_sg = NULL;
-   unsigned long min_load = ULONG_MAX, this_load = 0;
+   unsigned long min_runnable_load = ULONG_MAX, this_runnable_load = 0;
+   unsigned long min_avg_load = ULONG_MAX, this_avg_load = 0;
unsigned long most_spare = 0, this_spare = 0;
int load_idx = sd->forkexec_idx;
-   int imbalance = 100 + (sd->imbalance_pct-100)/2;
+   int imbalance_scale = 100 + (sd->imbalance_pct-100)/2;
+   unsigned long imbalance = scale_load_down(NICE_0_LOAD) *
+   (sd->imbalance_pct-100) / 100;
 
if (sd_flag & SD_BALANCE_WAKE)
load_idx = sd->wake_idx;
 
do {
-   unsigned long load, avg_load, spare_cap, max_spare_cap;
+   unsigned long load, avg_load, runnable_load;
+   unsigned long spare_cap, max_spare_cap;
int local_group;
int i;
 
@@ -5431,6 +5435,7 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,
 * the group containing the CPU with most spare capacity.
 */
avg_load = 0;
+   runnable_load = 0;
max_spare_cap = 0;
 
for_each_cpu(i, sched_group_cpus(group)) {
@@ -5440,7 +5445,9 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,
else
load = target_load(i, load_idx);
 
-   avg_load += load;
+   runnable_load += load;
+
+   avg_load += cfs_rq_load_avg(_rq(i)->cfs);
 

[tip:locking/core] x86/paravirt: Fix native_patch()

2016-12-11 Thread tip-bot for Peter Zijlstra
Commit-ID:  45dbea5f55c05980cbb4c30047c71a820cd3f282
Gitweb: http://git.kernel.org/tip/45dbea5f55c05980cbb4c30047c71a820cd3f282
Author: Peter Zijlstra 
AuthorDate: Thu, 8 Dec 2016 16:42:14 +0100
Committer:  Ingo Molnar 
CommitDate: Sun, 11 Dec 2016 13:09:19 +0100

x86/paravirt: Fix native_patch()

While chasing a regression I noticed we potentially patch the wrong
code in native_patch().

If we do not select the native code sequence, we must use the default
patcher, not fall-through the switch case.

Signed-off-by: Peter Zijlstra (Intel) 
Cc: Alok Kataria 
Cc: Borislav Petkov 
Cc: Chris Wright 
Cc: Jeremy Fitzhardinge 
Cc: Linus Torvalds 
Cc: Pan Xinhui 
Cc: Paolo Bonzini 
Cc: Peter Anvin 
Cc: Peter Zijlstra 
Cc: Rusty Russell 
Cc: Thomas Gleixner 
Cc: kernel test robot 
Fixes: 3cded4179481 ("x86/paravirt: Optimize native 
pv_lock_ops.vcpu_is_preempted()")
Link: http://lkml.kernel.org/r/20161208154349.270616...@infradead.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/paravirt_patch_32.c | 4 
 arch/x86/kernel/paravirt_patch_64.c | 4 
 2 files changed, 8 insertions(+)

diff --git a/arch/x86/kernel/paravirt_patch_32.c 
b/arch/x86/kernel/paravirt_patch_32.c
index ff03dbd..33cdec2 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -58,15 +58,19 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
end   = end_pv_lock_ops_queued_spin_unlock;
goto patch_site;
}
+   goto patch_default;
+
case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted):
if (pv_is_native_vcpu_is_preempted()) {
start = start_pv_lock_ops_vcpu_is_preempted;
end   = end_pv_lock_ops_vcpu_is_preempted;
goto patch_site;
}
+   goto patch_default;
 #endif
 
default:
+patch_default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
 
diff --git a/arch/x86/kernel/paravirt_patch_64.c 
b/arch/x86/kernel/paravirt_patch_64.c
index e61dd97..b0fceff 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -70,15 +70,19 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
end   = end_pv_lock_ops_queued_spin_unlock;
goto patch_site;
}
+   goto patch_default;
+
case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted):
if (pv_is_native_vcpu_is_preempted()) {
start = start_pv_lock_ops_vcpu_is_preempted;
end   = end_pv_lock_ops_vcpu_is_preempted;
goto patch_site;
}
+   goto patch_default;
 #endif
 
default:
+patch_default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
 


[tip:locking/core] x86/paravirt: Fix native_patch()

2016-12-11 Thread tip-bot for Peter Zijlstra
Commit-ID:  45dbea5f55c05980cbb4c30047c71a820cd3f282
Gitweb: http://git.kernel.org/tip/45dbea5f55c05980cbb4c30047c71a820cd3f282
Author: Peter Zijlstra 
AuthorDate: Thu, 8 Dec 2016 16:42:14 +0100
Committer:  Ingo Molnar 
CommitDate: Sun, 11 Dec 2016 13:09:19 +0100

x86/paravirt: Fix native_patch()

While chasing a regression I noticed we potentially patch the wrong
code in native_patch().

If we do not select the native code sequence, we must use the default
patcher, not fall-through the switch case.

Signed-off-by: Peter Zijlstra (Intel) 
Cc: Alok Kataria 
Cc: Borislav Petkov 
Cc: Chris Wright 
Cc: Jeremy Fitzhardinge 
Cc: Linus Torvalds 
Cc: Pan Xinhui 
Cc: Paolo Bonzini 
Cc: Peter Anvin 
Cc: Peter Zijlstra 
Cc: Rusty Russell 
Cc: Thomas Gleixner 
Cc: kernel test robot 
Fixes: 3cded4179481 ("x86/paravirt: Optimize native 
pv_lock_ops.vcpu_is_preempted()")
Link: http://lkml.kernel.org/r/20161208154349.270616...@infradead.org
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/paravirt_patch_32.c | 4 
 arch/x86/kernel/paravirt_patch_64.c | 4 
 2 files changed, 8 insertions(+)

diff --git a/arch/x86/kernel/paravirt_patch_32.c 
b/arch/x86/kernel/paravirt_patch_32.c
index ff03dbd..33cdec2 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -58,15 +58,19 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
end   = end_pv_lock_ops_queued_spin_unlock;
goto patch_site;
}
+   goto patch_default;
+
case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted):
if (pv_is_native_vcpu_is_preempted()) {
start = start_pv_lock_ops_vcpu_is_preempted;
end   = end_pv_lock_ops_vcpu_is_preempted;
goto patch_site;
}
+   goto patch_default;
 #endif
 
default:
+patch_default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
 
diff --git a/arch/x86/kernel/paravirt_patch_64.c 
b/arch/x86/kernel/paravirt_patch_64.c
index e61dd97..b0fceff 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -70,15 +70,19 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
end   = end_pv_lock_ops_queued_spin_unlock;
goto patch_site;
}
+   goto patch_default;
+
case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted):
if (pv_is_native_vcpu_is_preempted()) {
start = start_pv_lock_ops_vcpu_is_preempted;
end   = end_pv_lock_ops_vcpu_is_preempted;
goto patch_site;
}
+   goto patch_default;
 #endif
 
default:
+patch_default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
 


linux-next: Tree for Dec 12

2016-12-11 Thread Stephen Rothwell
Hi all,

Please do not add any material for v4.11 to your linux-next included
branches until after v4.10-rc1 has been released.

Changes since 20161209:

The vfs tree gained conflicts against the overlayfs and xfs trees.

The vfs-miklos tree gained a cofnlict against the vfs tree.

The hid tree gained a build failure so I used the version from
next-20161209.

The drm tree gained a conflict against the jc_docs tree.

The block tree gained a conflict against the vfs tree.

The devicetree tree gained a conflict against the drm-panel tree.

The spi tree still had its build failure so I used the version from
next-20161208.

The tip tree gained a conflict against the net-next tree.

The staging tree gained a conflict against the vfs tree.

The akpm tree gained conflicts against Linus' and the vfs trees.

Non-merge commits (relative to Linus' tree): 10624
 9761 files changed, 666573 insertions(+), 236340 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc and an allmodconfig (with
CONFIG_BUILD_DOCSRC=n) for x86_64, a multi_v7_defconfig for arm and a
native build of tools/perf. After the final fixups (if any), I do an
x86_64 modules_install followed by builds for x86_64 allnoconfig,
powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig, allyesconfig
(with KALLSYMS_EXTRA_PASS=1) and pseries_le_defconfig and i386, sparc
and sparc64 defconfig.

Below is a summary of the state of the merge.

I am currently merging 245 trees (counting Linus' and 35 trees of bug
fix patches pending for the current merge release).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwell

$ git checkout master
$ git reset --hard stable
Merging origin/master (69973b830859 Linux 4.9)
Merging fixes/master (30066ce675d3 Merge branch 'linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6)
Merging kbuild-current/rc-fixes (152b695d7437 builddeb: fix cross-building to 
arm64 producing host-arch debs)
Merging arc-current/for-curr (7badf6fefca8 ARC: axs10x: really enable ARC PGU)
Merging arm-current/fixes (8478132a8784 Revert "arm: move exports to 
definitions")
Merging m68k-current/for-linus (7e251bb21ae0 m68k: Fix ndelay() macro)
Merging metag-fixes/fixes (35d04077ad96 metag: Only define 
atomic_dec_if_positive conditionally)
Merging powerpc-fixes/fixes (dadc4a1bb9f0 powerpc/64: Fix placement of .text to 
be immediately following .head.text)
Merging sparc/master (bc3913a5378c Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc)
Merging net/master (045169816b31 Merge branch 'linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6)
Merging ipsec/master (bc3913a5378c Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc)
Merging netfilter/master (045169816b31 Merge branch 'linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6)
Merging ipvs/master (9b6c14d51bd2 net: tcp response should set oif only if it 
is L3 master)
Merging wireless-drivers/master (fcd2042e8d36 mwifiex: printk() overflow with 
32-byte SSIDs)
Merging mac80211/master (9590112241ba tipc: fix link statistics counter errors)
Merging sound-current/for-linus (82ffb6fc6371 ALSA: usb-audio: Add QuickCam 
Communicate Deluxe/S7500 to volume_control_quirks)
Merging pci-current/for-linus (e42010d8207f PCI: Set Read Completion Boundary 
to 128 iff Root Port supports it (_HPX))
Merging driver-core.current/driver-core-linus (a25f0944ba9b Linux 4.9-rc5)
Merging tty.current/tty-linus (a909d3e63699 Linux 4.9-rc3)
Merging usb.current/usb-linus (e5517c2a5a49 Linux 4.9-rc7)
Merging usb-gadget-fixes/fixes (05e78c6933d6 usb: gadget: f_fs: fix wrong 
parenthesis in ffs_func_req_match())
Merging usb-serial-fixes/usb-linus (46490c347df4 USB: serial: option: add dlink 
dwm-158)
Merging usb-chipidea-fixes/ci-for-usb-stable (c7fbb09b2ea1 usb: chipidea: move 
the lock initialization to core file)
Merging phy/fixes (4320f9d4c183 phy: sun4i: check PMU presence when poking 
unknown 

linux-next: Tree for Dec 12

2016-12-11 Thread Stephen Rothwell
Hi all,

Please do not add any material for v4.11 to your linux-next included
branches until after v4.10-rc1 has been released.

Changes since 20161209:

The vfs tree gained conflicts against the overlayfs and xfs trees.

The vfs-miklos tree gained a cofnlict against the vfs tree.

The hid tree gained a build failure so I used the version from
next-20161209.

The drm tree gained a conflict against the jc_docs tree.

The block tree gained a conflict against the vfs tree.

The devicetree tree gained a conflict against the drm-panel tree.

The spi tree still had its build failure so I used the version from
next-20161208.

The tip tree gained a conflict against the net-next tree.

The staging tree gained a conflict against the vfs tree.

The akpm tree gained conflicts against Linus' and the vfs trees.

Non-merge commits (relative to Linus' tree): 10624
 9761 files changed, 666573 insertions(+), 236340 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc and an allmodconfig (with
CONFIG_BUILD_DOCSRC=n) for x86_64, a multi_v7_defconfig for arm and a
native build of tools/perf. After the final fixups (if any), I do an
x86_64 modules_install followed by builds for x86_64 allnoconfig,
powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig, allyesconfig
(with KALLSYMS_EXTRA_PASS=1) and pseries_le_defconfig and i386, sparc
and sparc64 defconfig.

Below is a summary of the state of the merge.

I am currently merging 245 trees (counting Linus' and 35 trees of bug
fix patches pending for the current merge release).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwell

$ git checkout master
$ git reset --hard stable
Merging origin/master (69973b830859 Linux 4.9)
Merging fixes/master (30066ce675d3 Merge branch 'linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6)
Merging kbuild-current/rc-fixes (152b695d7437 builddeb: fix cross-building to 
arm64 producing host-arch debs)
Merging arc-current/for-curr (7badf6fefca8 ARC: axs10x: really enable ARC PGU)
Merging arm-current/fixes (8478132a8784 Revert "arm: move exports to 
definitions")
Merging m68k-current/for-linus (7e251bb21ae0 m68k: Fix ndelay() macro)
Merging metag-fixes/fixes (35d04077ad96 metag: Only define 
atomic_dec_if_positive conditionally)
Merging powerpc-fixes/fixes (dadc4a1bb9f0 powerpc/64: Fix placement of .text to 
be immediately following .head.text)
Merging sparc/master (bc3913a5378c Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc)
Merging net/master (045169816b31 Merge branch 'linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6)
Merging ipsec/master (bc3913a5378c Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc)
Merging netfilter/master (045169816b31 Merge branch 'linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6)
Merging ipvs/master (9b6c14d51bd2 net: tcp response should set oif only if it 
is L3 master)
Merging wireless-drivers/master (fcd2042e8d36 mwifiex: printk() overflow with 
32-byte SSIDs)
Merging mac80211/master (9590112241ba tipc: fix link statistics counter errors)
Merging sound-current/for-linus (82ffb6fc6371 ALSA: usb-audio: Add QuickCam 
Communicate Deluxe/S7500 to volume_control_quirks)
Merging pci-current/for-linus (e42010d8207f PCI: Set Read Completion Boundary 
to 128 iff Root Port supports it (_HPX))
Merging driver-core.current/driver-core-linus (a25f0944ba9b Linux 4.9-rc5)
Merging tty.current/tty-linus (a909d3e63699 Linux 4.9-rc3)
Merging usb.current/usb-linus (e5517c2a5a49 Linux 4.9-rc7)
Merging usb-gadget-fixes/fixes (05e78c6933d6 usb: gadget: f_fs: fix wrong 
parenthesis in ffs_func_req_match())
Merging usb-serial-fixes/usb-linus (46490c347df4 USB: serial: option: add dlink 
dwm-158)
Merging usb-chipidea-fixes/ci-for-usb-stable (c7fbb09b2ea1 usb: chipidea: move 
the lock initialization to core file)
Merging phy/fixes (4320f9d4c183 phy: sun4i: check PMU presence when poking 
unknown 

RE: [PATCH v6 2/2] crypto: add virtio-crypto driver

2016-12-11 Thread Gonglei (Arei)
Hi, Michael & Herbert

Because the virtio-crypto device emulation had been in QEMU 2.8,
would you please merge the virtio-crypto driver for 4.10 if no other
comments? If so, Miachel pls ack and/or review the patch, then
Herbert will take it (I asked him last week). Thank you!

Ps: Note on 4.10 merge window timing from Linus
 https://lkml.org/lkml/2016/12/7/506

Dec 23rd is the deadline for 4.10 merge window.

Regards,
-Gonglei


> -Original Message-
> From: Gonglei (Arei)
> Sent: Thursday, December 08, 2016 12:37 PM
> Subject: [PATCH v6 2/2] crypto: add virtio-crypto driver
> 
> This patch introduces virtio-crypto driver for Linux Kernel.
> 
> The virtio crypto device is a virtual cryptography device
> as well as a kind of virtual hardware accelerator for
> virtual machines. The encryption anddecryption requests
> are placed in the data queue and are ultimately handled by
> thebackend crypto accelerators. The second queue is the
> control queue used to create or destroy sessions for
> symmetric algorithms and will control some advanced features
> in the future. The virtio crypto device provides the following
> cryptoservices: CIPHER, MAC, HASH, and AEAD.
> 
> For more information about virtio-crypto device, please see:
>   http://qemu-project.org/Features/VirtioCrypto
> 
> CC: Michael S. Tsirkin 
> CC: Cornelia Huck 
> CC: Stefan Hajnoczi 
> CC: Herbert Xu 
> CC: Halil Pasic 
> CC: David S. Miller 
> CC: Zeng Xin 
> Signed-off-by: Gonglei 
> ---
>  MAINTAINERS  |   9 +
>  drivers/crypto/Kconfig   |   2 +
>  drivers/crypto/Makefile  |   1 +
>  drivers/crypto/virtio/Kconfig|  10 +
>  drivers/crypto/virtio/Makefile   |   5 +
>  drivers/crypto/virtio/virtio_crypto_algs.c   | 541
> +++
>  drivers/crypto/virtio/virtio_crypto_common.h | 122 ++
>  drivers/crypto/virtio/virtio_crypto_core.c   | 464
> +++
>  drivers/crypto/virtio/virtio_crypto_mgr.c| 264 +
>  include/uapi/linux/Kbuild|   1 +
>  include/uapi/linux/virtio_crypto.h   | 450
> ++
>  include/uapi/linux/virtio_ids.h  |   1 +
>  12 files changed, 1870 insertions(+)
>  create mode 100644 drivers/crypto/virtio/Kconfig
>  create mode 100644 drivers/crypto/virtio/Makefile
>  create mode 100644 drivers/crypto/virtio/virtio_crypto_algs.c
>  create mode 100644 drivers/crypto/virtio/virtio_crypto_common.h
>  create mode 100644 drivers/crypto/virtio/virtio_crypto_core.c
>  create mode 100644 drivers/crypto/virtio/virtio_crypto_mgr.c
>  create mode 100644 include/uapi/linux/virtio_crypto.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index ad9b965..cccaaf0 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -12810,6 +12810,7 @@ F:drivers/net/virtio_net.c
>  F:   drivers/block/virtio_blk.c
>  F:   include/linux/virtio_*.h
>  F:   include/uapi/linux/virtio_*.h
> +F:   drivers/crypto/virtio/
> 
>  VIRTIO DRIVERS FOR S390
>  M:   Christian Borntraeger 
> @@ -12846,6 +12847,14 @@ S:   Maintained
>  F:   drivers/virtio/virtio_input.c
>  F:   include/uapi/linux/virtio_input.h
> 
> +VIRTIO CRYPTO DRIVER
> +M:  Gonglei 
> +L:  virtualizat...@lists.linux-foundation.org
> +L:  linux-cry...@vger.kernel.org
> +S:  Maintained
> +F:  drivers/crypto/virtio/
> +F:  include/uapi/linux/virtio_crypto.h
> +
>  VIA RHINE NETWORK DRIVER
>  S:   Orphan
>  F:   drivers/net/ethernet/via/via-rhine.c
> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
> index 4d2b81f..7956478 100644
> --- a/drivers/crypto/Kconfig
> +++ b/drivers/crypto/Kconfig
> @@ -555,4 +555,6 @@ config CRYPTO_DEV_ROCKCHIP
> 
>  source "drivers/crypto/chelsio/Kconfig"
> 
> +source "drivers/crypto/virtio/Kconfig"
> +
>  endif # CRYPTO_HW
> diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
> index ad7250f..bc53cb8 100644
> --- a/drivers/crypto/Makefile
> +++ b/drivers/crypto/Makefile
> @@ -32,3 +32,4 @@ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
>  obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
>  obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
>  obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
> +obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
> diff --git a/drivers/crypto/virtio/Kconfig b/drivers/crypto/virtio/Kconfig
> new file mode 100644
> index 000..d80f733
> --- /dev/null
> +++ b/drivers/crypto/virtio/Kconfig
> @@ -0,0 +1,10 @@
> +config CRYPTO_DEV_VIRTIO
> + tristate "VirtIO crypto driver"
> + depends on VIRTIO
> + select CRYPTO_AEAD
> + select CRYPTO_AUTHENC
> + select CRYPTO_BLKCIPHER
> + default m
> + help
> +   This driver provides support for virtio crypto device. If you
> +   choose 'M' here, this 

RE: [PATCH v6 2/2] crypto: add virtio-crypto driver

2016-12-11 Thread Gonglei (Arei)
Hi, Michael & Herbert

Because the virtio-crypto device emulation had been in QEMU 2.8,
would you please merge the virtio-crypto driver for 4.10 if no other
comments? If so, Miachel pls ack and/or review the patch, then
Herbert will take it (I asked him last week). Thank you!

Ps: Note on 4.10 merge window timing from Linus
 https://lkml.org/lkml/2016/12/7/506

Dec 23rd is the deadline for 4.10 merge window.

Regards,
-Gonglei


> -Original Message-
> From: Gonglei (Arei)
> Sent: Thursday, December 08, 2016 12:37 PM
> Subject: [PATCH v6 2/2] crypto: add virtio-crypto driver
> 
> This patch introduces virtio-crypto driver for Linux Kernel.
> 
> The virtio crypto device is a virtual cryptography device
> as well as a kind of virtual hardware accelerator for
> virtual machines. The encryption anddecryption requests
> are placed in the data queue and are ultimately handled by
> thebackend crypto accelerators. The second queue is the
> control queue used to create or destroy sessions for
> symmetric algorithms and will control some advanced features
> in the future. The virtio crypto device provides the following
> cryptoservices: CIPHER, MAC, HASH, and AEAD.
> 
> For more information about virtio-crypto device, please see:
>   http://qemu-project.org/Features/VirtioCrypto
> 
> CC: Michael S. Tsirkin 
> CC: Cornelia Huck 
> CC: Stefan Hajnoczi 
> CC: Herbert Xu 
> CC: Halil Pasic 
> CC: David S. Miller 
> CC: Zeng Xin 
> Signed-off-by: Gonglei 
> ---
>  MAINTAINERS  |   9 +
>  drivers/crypto/Kconfig   |   2 +
>  drivers/crypto/Makefile  |   1 +
>  drivers/crypto/virtio/Kconfig|  10 +
>  drivers/crypto/virtio/Makefile   |   5 +
>  drivers/crypto/virtio/virtio_crypto_algs.c   | 541
> +++
>  drivers/crypto/virtio/virtio_crypto_common.h | 122 ++
>  drivers/crypto/virtio/virtio_crypto_core.c   | 464
> +++
>  drivers/crypto/virtio/virtio_crypto_mgr.c| 264 +
>  include/uapi/linux/Kbuild|   1 +
>  include/uapi/linux/virtio_crypto.h   | 450
> ++
>  include/uapi/linux/virtio_ids.h  |   1 +
>  12 files changed, 1870 insertions(+)
>  create mode 100644 drivers/crypto/virtio/Kconfig
>  create mode 100644 drivers/crypto/virtio/Makefile
>  create mode 100644 drivers/crypto/virtio/virtio_crypto_algs.c
>  create mode 100644 drivers/crypto/virtio/virtio_crypto_common.h
>  create mode 100644 drivers/crypto/virtio/virtio_crypto_core.c
>  create mode 100644 drivers/crypto/virtio/virtio_crypto_mgr.c
>  create mode 100644 include/uapi/linux/virtio_crypto.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index ad9b965..cccaaf0 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -12810,6 +12810,7 @@ F:drivers/net/virtio_net.c
>  F:   drivers/block/virtio_blk.c
>  F:   include/linux/virtio_*.h
>  F:   include/uapi/linux/virtio_*.h
> +F:   drivers/crypto/virtio/
> 
>  VIRTIO DRIVERS FOR S390
>  M:   Christian Borntraeger 
> @@ -12846,6 +12847,14 @@ S:   Maintained
>  F:   drivers/virtio/virtio_input.c
>  F:   include/uapi/linux/virtio_input.h
> 
> +VIRTIO CRYPTO DRIVER
> +M:  Gonglei 
> +L:  virtualizat...@lists.linux-foundation.org
> +L:  linux-cry...@vger.kernel.org
> +S:  Maintained
> +F:  drivers/crypto/virtio/
> +F:  include/uapi/linux/virtio_crypto.h
> +
>  VIA RHINE NETWORK DRIVER
>  S:   Orphan
>  F:   drivers/net/ethernet/via/via-rhine.c
> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
> index 4d2b81f..7956478 100644
> --- a/drivers/crypto/Kconfig
> +++ b/drivers/crypto/Kconfig
> @@ -555,4 +555,6 @@ config CRYPTO_DEV_ROCKCHIP
> 
>  source "drivers/crypto/chelsio/Kconfig"
> 
> +source "drivers/crypto/virtio/Kconfig"
> +
>  endif # CRYPTO_HW
> diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
> index ad7250f..bc53cb8 100644
> --- a/drivers/crypto/Makefile
> +++ b/drivers/crypto/Makefile
> @@ -32,3 +32,4 @@ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
>  obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
>  obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
>  obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
> +obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
> diff --git a/drivers/crypto/virtio/Kconfig b/drivers/crypto/virtio/Kconfig
> new file mode 100644
> index 000..d80f733
> --- /dev/null
> +++ b/drivers/crypto/virtio/Kconfig
> @@ -0,0 +1,10 @@
> +config CRYPTO_DEV_VIRTIO
> + tristate "VirtIO crypto driver"
> + depends on VIRTIO
> + select CRYPTO_AEAD
> + select CRYPTO_AUTHENC
> + select CRYPTO_BLKCIPHER
> + default m
> + help
> +   This driver provides support for virtio crypto device. If you
> +   choose 'M' here, this module will be called virtio_crypto.
> diff --git a/drivers/crypto/virtio/Makefile b/drivers/crypto/virtio/Makefile
> new file mode 100644
> index 000..dd342c9
> --- /dev/null
> +++ b/drivers/crypto/virtio/Makefile
> @@ -0,0 +1,5 @@
> 

Re: Documenting the ioctl interfaces to discover relationships between namespaces

2016-12-11 Thread Michael Kerrisk (man-pages)
[Fixing Serge's address in my original CC]

On 12/11/2016 11:30 PM, Eric W. Biederman wrote:
> "Michael Kerrisk (man-pages)"  writes:
> 
>> [was: [PATCH 0/4 v3] Add an interface to discover relationships
>> between namespaces]
> 
> One small comment below.
> 
>>
>>Introspecting namespace relationships
>>Since Linux 4.9, two ioctl(2) operations  are  provided  to  allow
>>introspection  of  namespace relationships (see user_namespaces(7)
>>and pid_namespaces(7)).  The form of the calls is:
>>
>>ioctl(fd, request);
>>
>>In each case, fd refers to a /proc/[pid]/ns/* file.
>>
>>NS_GET_USERNS
>>   Returns a file descriptor that refers to  the  owning  user
>>   namespace for the namespace referred to by fd.
>>
>>NS_GET_PARENT
>>   Returns  a file descriptor that refers to the parent names‐
>>   pace of the namespace referred to by fd.  This operation is
>>   valid  only for hierarchical namespaces (i.e., PID and user
>>   namespaces).  For user namespaces, NS_GET_PARENT is synony‐
>>   mous with NS_GET_USERNS.
>>
>>In each case, the returned file descriptor is opened with O_RDONLY
>>and O_CLOEXEC (close-on-exec).
>>
>>By applying fstat(2) to the returned file descriptor, one  obtains
>>a  stat structure whose st_ino (inode number) field identifies the
>>owning/parent namespace.  This inode number can  be  matched  with
>>the  inode  number  of  another  /proc/[pid]/ns/{pid,user} file to
>>determine whether that is the owning/parent namespace.
> 
> Like all fstat inode comparisons to be fully accurate you need to
> compare both the st_ino and st_dev.  I reserve the right for st_dev to
> be significant when comparing namespaces.  Otherwise I might have to
> create a namespace of namespaces someday and that is ugly.

Ah yes. Thanks for catching that. I've adjusted the text,
and the example program.

Cheers,

Michael

>>Either of these ioctl(2) operations can fail  with  the  following
>>error:
>>
>>EPERM  The  requested  namespace is outside of the caller's names‐
>>   pace scope.  This error can occur if, for example, the own‐
>>   ing  user  namespace is an ancestor of the caller's current
>>   user namespace.  It can also occur on  attempts  to  obtain
>>   the parent of the initial user or PID namespace.
>>
>>Additionally,  the  NS_GET_PARENT operation can fail with the fol‐
>>lowing error:
>>
>>EINVAL fd refers to a nonhierarchical namespace.
>>
>>See the EXAMPLE section for an example of the use of these  opera‐
>>tions.
>>
>>[...]
> 
> Eric
> 


-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/


Re: Documenting the ioctl interfaces to discover relationships between namespaces

2016-12-11 Thread Michael Kerrisk (man-pages)
[Fixing Serge's address in my original CC]

On 12/11/2016 11:30 PM, Eric W. Biederman wrote:
> "Michael Kerrisk (man-pages)"  writes:
> 
>> [was: [PATCH 0/4 v3] Add an interface to discover relationships
>> between namespaces]
> 
> One small comment below.
> 
>>
>>Introspecting namespace relationships
>>Since Linux 4.9, two ioctl(2) operations  are  provided  to  allow
>>introspection  of  namespace relationships (see user_namespaces(7)
>>and pid_namespaces(7)).  The form of the calls is:
>>
>>ioctl(fd, request);
>>
>>In each case, fd refers to a /proc/[pid]/ns/* file.
>>
>>NS_GET_USERNS
>>   Returns a file descriptor that refers to  the  owning  user
>>   namespace for the namespace referred to by fd.
>>
>>NS_GET_PARENT
>>   Returns  a file descriptor that refers to the parent names‐
>>   pace of the namespace referred to by fd.  This operation is
>>   valid  only for hierarchical namespaces (i.e., PID and user
>>   namespaces).  For user namespaces, NS_GET_PARENT is synony‐
>>   mous with NS_GET_USERNS.
>>
>>In each case, the returned file descriptor is opened with O_RDONLY
>>and O_CLOEXEC (close-on-exec).
>>
>>By applying fstat(2) to the returned file descriptor, one  obtains
>>a  stat structure whose st_ino (inode number) field identifies the
>>owning/parent namespace.  This inode number can  be  matched  with
>>the  inode  number  of  another  /proc/[pid]/ns/{pid,user} file to
>>determine whether that is the owning/parent namespace.
> 
> Like all fstat inode comparisons to be fully accurate you need to
> compare both the st_ino and st_dev.  I reserve the right for st_dev to
> be significant when comparing namespaces.  Otherwise I might have to
> create a namespace of namespaces someday and that is ugly.

Ah yes. Thanks for catching that. I've adjusted the text,
and the example program.

Cheers,

Michael

>>Either of these ioctl(2) operations can fail  with  the  following
>>error:
>>
>>EPERM  The  requested  namespace is outside of the caller's names‐
>>   pace scope.  This error can occur if, for example, the own‐
>>   ing  user  namespace is an ancestor of the caller's current
>>   user namespace.  It can also occur on  attempts  to  obtain
>>   the parent of the initial user or PID namespace.
>>
>>Additionally,  the  NS_GET_PARENT operation can fail with the fol‐
>>lowing error:
>>
>>EINVAL fd refers to a nonhierarchical namespace.
>>
>>See the EXAMPLE section for an example of the use of these  opera‐
>>tions.
>>
>>[...]
> 
> Eric
> 


-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/


[PATCH RFC 1/1] mm, page_alloc: fix incorrect zone_statistics data

2016-12-11 Thread Jia He
In commit b9f00e147f27 ("mm, page_alloc: reduce branches in
zone_statistics"), it reconstructed codes to reduce the branch miss rate.
Compared with the original logic, it assumed if !(flag & __GFP_OTHER_NODE)
 z->node would not be equal to preferred_zone->node. That seems to be
incorrect.

Fixes: commit b9f00e147f27 ("mm, page_alloc: reduce branches in
zone_statistics")

Signed-off-by: Jia He 
---
 mm/page_alloc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6de9440..474757e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2568,6 +2568,9 @@ static inline void zone_statistics(struct zone 
*preferred_zone, struct zone *z,
if (z->node == local_nid) {
__inc_zone_state(z, NUMA_HIT);
__inc_zone_state(z, local_stat);
+   } else if (z->node == preferred_zone->node) {
+   __inc_zone_state(z, NUMA_HIT);
+   __inc_zone_state(z, NUMA_OTHER);
} else {
__inc_zone_state(z, NUMA_MISS);
__inc_zone_state(preferred_zone, NUMA_FOREIGN);
-- 
2.5.5



[PATCH RFC 1/1] mm, page_alloc: fix incorrect zone_statistics data

2016-12-11 Thread Jia He
In commit b9f00e147f27 ("mm, page_alloc: reduce branches in
zone_statistics"), it reconstructed codes to reduce the branch miss rate.
Compared with the original logic, it assumed if !(flag & __GFP_OTHER_NODE)
 z->node would not be equal to preferred_zone->node. That seems to be
incorrect.

Fixes: commit b9f00e147f27 ("mm, page_alloc: reduce branches in
zone_statistics")

Signed-off-by: Jia He 
---
 mm/page_alloc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6de9440..474757e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2568,6 +2568,9 @@ static inline void zone_statistics(struct zone 
*preferred_zone, struct zone *z,
if (z->node == local_nid) {
__inc_zone_state(z, NUMA_HIT);
__inc_zone_state(z, local_stat);
+   } else if (z->node == preferred_zone->node) {
+   __inc_zone_state(z, NUMA_HIT);
+   __inc_zone_state(z, NUMA_OTHER);
} else {
__inc_zone_state(z, NUMA_MISS);
__inc_zone_state(preferred_zone, NUMA_FOREIGN);
-- 
2.5.5



[PATCH RFC 0/1] mm, page_alloc: fix incorrect zone_statistics data

2016-12-11 Thread Jia He

In commit b9f00e147f27 ("mm, page_alloc: reduce branches in
zone_statistics"), it reconstructed the code to reduce the branch miss rate.
Compared with the original logic, it assumed if !(flag & __GFP_OTHER_NODE)
 z->node would not be equal to preferred_zone->node. That seems to be
incorrect.

Here is what I catch, dumpstack() is triggered when z->node ==
preferred_zone->node and z->node != numa_node_id()

z=5,prefer=5,local=4, flag_NODE=0
[c00cdcaef440] [c02e88cc] cache_grow_begin+0xcc/0x500
[c00cdcaef6f0] [c02ecb44] do_tune_cpucache+0x64/0x100
[c00cdcaef750] [c02ecc7c] enable_cpucache+0x9c/0x180
[c00cdcaef7d0] [c02ed01c] __kmem_cache_create+0x1ec/00x2c0
[c00cdcaef820] [c0291c98] create_cache+0xb8/0x240
[c00cdcaef890] [c0291fa8] kmem_cache_create+0x188/0x2290
[c00cdcaef950] [d00011dc5c70] ext4_mb_init+0x3c0/0x5e0 [eext4]
[c00cdcaef9f0] [d00011daaedc] ext4_fill_super+0x266c/0x33390 [ext4]
[c00cdcaefb30] [c0328b8c] mount_bdev+0x22c/0x260
[c00cdcaefbd0] [d00011da1fa8] ext4_mount+0x48/0x60 [ext4]
[c00cdcaefc10] [c032a11c] mount_fs+0x8c/0x230
[c00cdcaefcb0] [c0351f98] vfs_kern_mount+0x78/0x180
[c00cdcaefd00] [c0356d68] do_mount+0x258/0xea0
[c00cdcaefde0] [c0357da0] SyS_mount+0xa0/0x110
[c00cdcaefe30] [c000bd84] system_call+0x38/0xe0

Before this patch, the numa_miss and numa_foreign looked very odd:
linux:~ # numastat
   node0   node1   node2   
node3   node4   node5   node6
numa_hit   42216   0   0   
0   96755   0   0
numa_miss  1 718 711 
726 860 712 719
numa_foreign   1 718 711 
726 860 712 719
interleave_hit   631 638 632 
641 621 633 636
local_node 42216   0   0   
0   96755   0   0
other_node 0   0   0   
0   0   0   0

After this patch
linux:~ # numastat  
   node0   node1   node2   
node3   node4   node5   node6
numa_hit  177891 718 711 
726   60302 712 719
numa_miss  0  196944  237222  
253424   0   36265   0
numa_foreign  723855   0   0   
0   0   0   0
interleave_hit   631 638 632 
641 621 633 636
local_node177891   0   0   
0   59444   0   0
other_node 0 718 711 
726 858 712 719
Jia He (1):
  mm, page_alloc: fix incorrect zone_statistics data

 mm/page_alloc.c | 3 +++
 1 file changed, 3 insertions(+)

-- 
2.5.5



[PATCH RFC 0/1] mm, page_alloc: fix incorrect zone_statistics data

2016-12-11 Thread Jia He

In commit b9f00e147f27 ("mm, page_alloc: reduce branches in
zone_statistics"), it reconstructed the code to reduce the branch miss rate.
Compared with the original logic, it assumed if !(flag & __GFP_OTHER_NODE)
 z->node would not be equal to preferred_zone->node. That seems to be
incorrect.

Here is what I catch, dumpstack() is triggered when z->node ==
preferred_zone->node and z->node != numa_node_id()

z=5,prefer=5,local=4, flag_NODE=0
[c00cdcaef440] [c02e88cc] cache_grow_begin+0xcc/0x500
[c00cdcaef6f0] [c02ecb44] do_tune_cpucache+0x64/0x100
[c00cdcaef750] [c02ecc7c] enable_cpucache+0x9c/0x180
[c00cdcaef7d0] [c02ed01c] __kmem_cache_create+0x1ec/00x2c0
[c00cdcaef820] [c0291c98] create_cache+0xb8/0x240
[c00cdcaef890] [c0291fa8] kmem_cache_create+0x188/0x2290
[c00cdcaef950] [d00011dc5c70] ext4_mb_init+0x3c0/0x5e0 [eext4]
[c00cdcaef9f0] [d00011daaedc] ext4_fill_super+0x266c/0x33390 [ext4]
[c00cdcaefb30] [c0328b8c] mount_bdev+0x22c/0x260
[c00cdcaefbd0] [d00011da1fa8] ext4_mount+0x48/0x60 [ext4]
[c00cdcaefc10] [c032a11c] mount_fs+0x8c/0x230
[c00cdcaefcb0] [c0351f98] vfs_kern_mount+0x78/0x180
[c00cdcaefd00] [c0356d68] do_mount+0x258/0xea0
[c00cdcaefde0] [c0357da0] SyS_mount+0xa0/0x110
[c00cdcaefe30] [c000bd84] system_call+0x38/0xe0

Before this patch, the numa_miss and numa_foreign looked very odd:
linux:~ # numastat
   node0   node1   node2   
node3   node4   node5   node6
numa_hit   42216   0   0   
0   96755   0   0
numa_miss  1 718 711 
726 860 712 719
numa_foreign   1 718 711 
726 860 712 719
interleave_hit   631 638 632 
641 621 633 636
local_node 42216   0   0   
0   96755   0   0
other_node 0   0   0   
0   0   0   0

After this patch
linux:~ # numastat  
   node0   node1   node2   
node3   node4   node5   node6
numa_hit  177891 718 711 
726   60302 712 719
numa_miss  0  196944  237222  
253424   0   36265   0
numa_foreign  723855   0   0   
0   0   0   0
interleave_hit   631 638 632 
641 621 633 636
local_node177891   0   0   
0   59444   0   0
other_node 0 718 711 
726 858 712 719
Jia He (1):
  mm, page_alloc: fix incorrect zone_statistics data

 mm/page_alloc.c | 3 +++
 1 file changed, 3 insertions(+)

-- 
2.5.5



linux-next: manual merge of the akpm tree with the vfs tree

2016-12-11 Thread Stephen Rothwell
Hi all,

Al let me know that he had put a newer version of the autofs patches
into his vfs tree, so I have dropped the following patches from the akpm
tree today:

  vfs: change d_manage() to take a struct path
  vfs: add path_is_mountpoint() helper
  vfs: fix boolreturn.cocci warnings
  vfs: add path_has_submounts()
  autofs: change autofs4_expire_wait() to take struct path
  autofs: change autofs4_wait() to take struct path
  autofs: use path_is_mountpoint() to fix unreliable d_mountpoint() checks
  autofs: use path_has_submounts() to fix unreliable have_submount() checks
  vfs: remove unused have_submounts() function
  vfs: merge path_is_mountpoint() and path_is_mountpoint_rcu()
  autofs: make struct path const in autofs4_dir_open()
  autofs: change struct path to const in autofs4_expire_wait() and 
autofs4_wait()
  vfs: change struct path to const in d_manage()
  vfs: constify path parameter of path_has_submounts()
  autofs: don't hold spinlock over direct mount expire
  vfs: make may_umount_tree() mount propagation aware
  vfs-make-may_umount_tree-mount-propogation-aware-checkpatch-fixes

I hope that was the correct ones.
-- 
Cheers,
Stephen Rothwell


linux-next: manual merge of the akpm tree with the vfs tree

2016-12-11 Thread Stephen Rothwell
Hi all,

Al let me know that he had put a newer version of the autofs patches
into his vfs tree, so I have dropped the following patches from the akpm
tree today:

  vfs: change d_manage() to take a struct path
  vfs: add path_is_mountpoint() helper
  vfs: fix boolreturn.cocci warnings
  vfs: add path_has_submounts()
  autofs: change autofs4_expire_wait() to take struct path
  autofs: change autofs4_wait() to take struct path
  autofs: use path_is_mountpoint() to fix unreliable d_mountpoint() checks
  autofs: use path_has_submounts() to fix unreliable have_submount() checks
  vfs: remove unused have_submounts() function
  vfs: merge path_is_mountpoint() and path_is_mountpoint_rcu()
  autofs: make struct path const in autofs4_dir_open()
  autofs: change struct path to const in autofs4_expire_wait() and 
autofs4_wait()
  vfs: change struct path to const in d_manage()
  vfs: constify path parameter of path_has_submounts()
  autofs: don't hold spinlock over direct mount expire
  vfs: make may_umount_tree() mount propagation aware
  vfs-make-may_umount_tree-mount-propogation-aware-checkpatch-fixes

I hope that was the correct ones.
-- 
Cheers,
Stephen Rothwell


linux-next: manual merge of the akpm tree with Linus' tree

2016-12-11 Thread Stephen Rothwell
Hi Andrew,

Today's linux-next merge of the akpm tree got a conflict in:

  lib/radix-tree.c

between commit:

  2b41226b39b6 ("Revert "radix tree test suite: fix compilation"")

from Linus' tree and patch:

  "reimplement IDR and IDA using the radix tree"

from the akpm tree.

I fixed it up (I added back the include of notifier.h) and can carry the
fix as necessary. This is now fixed as far as linux-next is concerned,
but any non trivial conflicts should be mentioned to your upstream
maintainer when your tree is submitted for merging.  You may also want
to consider cooperating with the maintainer of the conflicting tree to
minimise any particularly complex conflicts.

-- 
Cheers,
Stephen Rothwell


linux-next: manual merge of the akpm tree with Linus' tree

2016-12-11 Thread Stephen Rothwell
Hi Andrew,

Today's linux-next merge of the akpm tree got a conflict in:

  lib/radix-tree.c

between commit:

  2b41226b39b6 ("Revert "radix tree test suite: fix compilation"")

from Linus' tree and patch:

  "reimplement IDR and IDA using the radix tree"

from the akpm tree.

I fixed it up (I added back the include of notifier.h) and can carry the
fix as necessary. This is now fixed as far as linux-next is concerned,
but any non trivial conflicts should be mentioned to your upstream
maintainer when your tree is submitted for merging.  You may also want
to consider cooperating with the maintainer of the conflicting tree to
minimise any particularly complex conflicts.

-- 
Cheers,
Stephen Rothwell


Re: [PATCH v2] siphash: add cryptographically secure hashtable function

2016-12-11 Thread Jason A. Donenfeld
Hey Linus,

On Mon, Dec 12, 2016 at 5:01 AM, Linus Torvalds
 wrote:
> The above is extremely inefficient. Considering that most kernel data
> would be expected to be smallish, that matters (ie the usual benchmark
> would not be about hashing megabytes of data, but instead millions of
> hashes of small data).
>
> I think this could be rewritten (at least for 64-bit architectures) as
>
> #ifdef CONFIG_DCACHE_WORD_ACCESS
>
> if (left)
> b |= le64_to_cpu(load_unaligned_zeropad(data) &
> bytemask_from_count(left));
>
> #else
>
> .. do the duff's device thing with the switch() ..
>
> #endif
>
> which should give you basically perfect code generation (ie a single
> 64-bit load and a byte mask).

I modified the test to hash data of size 0 through 7 repeatedly
1 times, and benchmarked that a few times on a Skylake laptop.
The `load_unaligned_zeropad & bytemask_from_count` version was
consistently 7% slower.

I then modified it again to simply hash a 4 byte constant repeatedly
10 times. The `load_unaligned_zeropad & bytemask_from_count`
version was around 6% faster. I tried again with a 7 byte constant and
got more or less a similar result.

Then I tried with a 1 byte constant, and found that the
`load_unaligned_zeropad & bytemask_from_count` version was slower.

So, it would seem that between the `if (left)` and the `switch
(left)`, there's the same number of branches. But for small values of
`left`, the duff's device just has simpler arithmetic, whereas for
large values of `left`, the `load_unaligned_zeropad` prevails. If
micro-optimization is really appealing, one could imagine a hybrid of
the two:

switch (left) {
case 7:
case 6:
case 5:
case 4:
b |= le64_to_cpu(load_unaligned_zeropad(data) &
bytemask_from_count(left));
break;
case 3: b |= ((u64)data[2]) << 16;
case 2: b |= ((u64)data[1]) <<  8;
case 1: b |= ((u64)data[0]); break;
case 0: break;
}

But I'm not sure this complication is worth it, and it might be more
likely that the left-over size is 4 bytes most of the time, so we
should just use your trick on platforms that support it.

Jason


Re: [PATCH v2] siphash: add cryptographically secure hashtable function

2016-12-11 Thread Jason A. Donenfeld
Hey Linus,

On Mon, Dec 12, 2016 at 5:01 AM, Linus Torvalds
 wrote:
> The above is extremely inefficient. Considering that most kernel data
> would be expected to be smallish, that matters (ie the usual benchmark
> would not be about hashing megabytes of data, but instead millions of
> hashes of small data).
>
> I think this could be rewritten (at least for 64-bit architectures) as
>
> #ifdef CONFIG_DCACHE_WORD_ACCESS
>
> if (left)
> b |= le64_to_cpu(load_unaligned_zeropad(data) &
> bytemask_from_count(left));
>
> #else
>
> .. do the duff's device thing with the switch() ..
>
> #endif
>
> which should give you basically perfect code generation (ie a single
> 64-bit load and a byte mask).

I modified the test to hash data of size 0 through 7 repeatedly
1 times, and benchmarked that a few times on a Skylake laptop.
The `load_unaligned_zeropad & bytemask_from_count` version was
consistently 7% slower.

I then modified it again to simply hash a 4 byte constant repeatedly
10 times. The `load_unaligned_zeropad & bytemask_from_count`
version was around 6% faster. I tried again with a 7 byte constant and
got more or less a similar result.

Then I tried with a 1 byte constant, and found that the
`load_unaligned_zeropad & bytemask_from_count` version was slower.

So, it would seem that between the `if (left)` and the `switch
(left)`, there's the same number of branches. But for small values of
`left`, the duff's device just has simpler arithmetic, whereas for
large values of `left`, the `load_unaligned_zeropad` prevails. If
micro-optimization is really appealing, one could imagine a hybrid of
the two:

switch (left) {
case 7:
case 6:
case 5:
case 4:
b |= le64_to_cpu(load_unaligned_zeropad(data) &
bytemask_from_count(left));
break;
case 3: b |= ((u64)data[2]) << 16;
case 2: b |= ((u64)data[1]) <<  8;
case 1: b |= ((u64)data[0]); break;
case 0: break;
}

But I'm not sure this complication is worth it, and it might be more
likely that the left-over size is 4 bytes most of the time, so we
should just use your trick on platforms that support it.

Jason


Re: [PATCH v2] siphash: add cryptographically secure hashtable function

2016-12-11 Thread Eric Biggers
On Mon, Dec 12, 2016 at 04:48:17AM +0100, Jason A. Donenfeld wrote:
>
> diff --git a/lib/Makefile b/lib/Makefile
> index 50144a3aeebd..71d398b04a74 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -22,7 +22,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
>sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
>flex_proportions.o ratelimit.o show_mem.o \
>is_single_threaded.o plist.o decompress.o kobject_uevent.o \
> -  earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
> +  earlycpio.o seq_buf.o siphash.o \
> +  nmi_backtrace.o nodemask.o win_minmax.o
>  
>  lib-$(CONFIG_MMU) += ioremap.o
>  lib-$(CONFIG_SMP) += cpumask.o
> @@ -44,7 +45,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
>  obj-y += kstrtox.o
>  obj-$(CONFIG_TEST_BPF) += test_bpf.o
>  obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
> -obj-$(CONFIG_TEST_HASH) += test_hash.o
> +obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o

Maybe add to the help text for CONFIG_TEST_HASH that it now tests siphash too?

> +static inline u64 le64_to_cpuvp(const void *p)
> +{
> + return le64_to_cpup(p);
> +}

This assumes the key and message buffers are aligned to __alignof__(u64).
Unless that's going to be a clearly documented requirement for callers, you
should use get_unaligned_le64() instead.  And you can pass a 'u8 *' directly to
get_unaligned_le64(), no need for a helper function.

> + b = (v0 ^ v1) ^ (v2 ^ v3);
> + return (__force u64)cpu_to_le64(b);
> +}

It makes sense for this to return a u64, but that means the cpu_to_le64() is
wrong, since u64 indicates CPU endianness.  It should just return 'b'.

> +++ b/lib/test_siphash.c
> @@ -0,0 +1,116 @@
> +/* Test cases for siphash.c
> + *
> + * Copyright (C) 2015-2016 Jason A. Donenfeld 
> + *
> + * This file is provided under a dual BSD/GPLv2 license.
> + *
> + * SipHash: a fast short-input PRF
> + * https://131002.net/siphash/
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +static const u8 test_vectors[64][8] = {
> + { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72 },

Can you mention in a comment where the test vectors came from?

> + if (memcmp(, test_vectors[i], 8)) {
> + pr_info("self-test %u: FAIL\n", i + 1);
> + ret = -EINVAL;
> + }

If you make the output really be CPU-endian like I'm suggesting then this will
need to be something like:

if (out != get_unaligned_le64(test_vectors[i])) {

Or else make the test vectors be an array of u64.

- Eric


Re: [PATCH v2] siphash: add cryptographically secure hashtable function

2016-12-11 Thread Eric Biggers
On Mon, Dec 12, 2016 at 04:48:17AM +0100, Jason A. Donenfeld wrote:
>
> diff --git a/lib/Makefile b/lib/Makefile
> index 50144a3aeebd..71d398b04a74 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -22,7 +22,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
>sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
>flex_proportions.o ratelimit.o show_mem.o \
>is_single_threaded.o plist.o decompress.o kobject_uevent.o \
> -  earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
> +  earlycpio.o seq_buf.o siphash.o \
> +  nmi_backtrace.o nodemask.o win_minmax.o
>  
>  lib-$(CONFIG_MMU) += ioremap.o
>  lib-$(CONFIG_SMP) += cpumask.o
> @@ -44,7 +45,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
>  obj-y += kstrtox.o
>  obj-$(CONFIG_TEST_BPF) += test_bpf.o
>  obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
> -obj-$(CONFIG_TEST_HASH) += test_hash.o
> +obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o

Maybe add to the help text for CONFIG_TEST_HASH that it now tests siphash too?

> +static inline u64 le64_to_cpuvp(const void *p)
> +{
> + return le64_to_cpup(p);
> +}

This assumes the key and message buffers are aligned to __alignof__(u64).
Unless that's going to be a clearly documented requirement for callers, you
should use get_unaligned_le64() instead.  And you can pass a 'u8 *' directly to
get_unaligned_le64(), no need for a helper function.

> + b = (v0 ^ v1) ^ (v2 ^ v3);
> + return (__force u64)cpu_to_le64(b);
> +}

It makes sense for this to return a u64, but that means the cpu_to_le64() is
wrong, since u64 indicates CPU endianness.  It should just return 'b'.

> +++ b/lib/test_siphash.c
> @@ -0,0 +1,116 @@
> +/* Test cases for siphash.c
> + *
> + * Copyright (C) 2015-2016 Jason A. Donenfeld 
> + *
> + * This file is provided under a dual BSD/GPLv2 license.
> + *
> + * SipHash: a fast short-input PRF
> + * https://131002.net/siphash/
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +static const u8 test_vectors[64][8] = {
> + { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72 },

Can you mention in a comment where the test vectors came from?

> + if (memcmp(, test_vectors[i], 8)) {
> + pr_info("self-test %u: FAIL\n", i + 1);
> + ret = -EINVAL;
> + }

If you make the output really be CPU-endian like I'm suggesting then this will
need to be something like:

if (out != get_unaligned_le64(test_vectors[i])) {

Or else make the test vectors be an array of u64.

- Eric


[lkp-developer] [scsi] 8eea81e090: BUG:unable_to_handle_kernel

2016-12-11 Thread kernel test robot
FYI, we noticed the following commit:

commit: 8eea81e0903fcde1c28044ea66acc4c5c578f553 ("scsi: enable IO scheduling 
for scsi-mq")
https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git 
blk-mq-legacy-sched.1

in testcase: boot

on test machine: qemu-system-i386 -enable-kvm -cpu Haswell,+smep,+smap -m 360M

caused below changes:


++++
|| 4fecac6753 | 8eea81e090 |
++++
| boot_successes | 0  | 0  |
| boot_failures  | 8  | 8  |
| IP-Config:Auto-configuration_of_network_failed | 8  ||
| BUG:unable_to_handle_kernel| 0  | 8  |
| Oops   | 0  | 8  |
| EIP_is_at_remove_element   | 0  | 8  |
| calltrace:scsi_debug_init  | 0  | 8  |
| Kernel_panic-not_syncing:Fatal_exception   | 0  | 8  |
++++



[4.082237] blk-mq: sq sched init failed
[4.082967] scsi 0:0:0:0: Direct-Access Linuxscsi_debug   0186 
PQ: 0 ANSI: 7
[4.088634] scsi 0:0:0:0: Attached scsi generic sg0 type 0
[4.090968] BUG: unable to handle kernel paging request at f20bc5d2
[4.091900] IP: [<41177b90>] remove_element+0x1c/0x97
[4.092706] *pde =  
[4.093139] Oops:  [#1] SMP
[4.093566] Modules linked in:
[4.094013] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 
4.9.0-rc1-00159-g8eea81e #1
[4.095017] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Debian-1.8.2-1 04/01/2014
[4.096210] task: 4032 task.stack: 4031a000
[4.096822] EIP: 0060:[<41177b90>] EFLAGS: 00210246 CPU: 0
[4.097566] EIP is at remove_element+0x1c/0x97
[4.098267] EAX: 303a303a EBX: 30746566 ECX:  EDX: 
[4.099113] ESI: 4dbacba0 EDI: 4b624454 EBP: 4031bcd0 ESP: 4031bcc0
[4.100090]  DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
[4.100814] CR0: 80050033 CR2: f20bc5d2 CR3: 02325000 CR4: 000406d0
[4.101671] Stack:
[4.101960]   4dbacba0  4b624454 4031bce0 41177c4e 4b624454 
4b624000
[4.104688]  4031bce8 414218a6 4031bcf8 41426009 41f17760  4031bd10 
41448e06
[4.105815]   0001 4dbaac14 4b62 4031bd1c 41448caa 4b6203d8 
4031bd24
[4.106867] Call Trace:
[4.107180]  [<41177c4e>] mempool_destroy+0x43/0x5e
[4.107821]  [<414218a6>] blk_exit_rl+0x2a/0x2c
[4.108459]  [<41426009>] blk_release_queue+0x6a/0x10a
[4.109184]  [<41448e06>] kobject_release+0xb3/0xda
[4.109854]  [<41448caa>] kobject_put+0x8a/0x8f
[4.110464]  [<4141e46e>] blk_put_queue+0x12/0x14
[4.114154]  [<415998ab>] scsi_device_dev_release_usercontext+0xb1/0x10c
[4.115065]  [<4196369b>] ? _raw_spin_lock_irqsave+0x40/0x49
[4.115843]  [<415997fa>] ? scsi_device_dev_release+0x1a/0x1a
[4.116607]  [<41087993>] execute_in_process_context+0x35/0x87
[4.117394]  [<415997f8>] scsi_device_dev_release+0x18/0x1a
[4.118154]  [<415467d8>] device_release+0x6e/0xd6
[4.118820]  [<41448e06>] kobject_release+0xb3/0xda
[4.119480]  [<41448caa>] kobject_put+0x8a/0x8f
[4.120100]  [<415468f6>] put_device+0x2a/0x2c
[4.120705]  [<4159ab6b>] __scsi_remove_device+0x116/0x11a
[4.121445]  [<41598cff>] scsi_forget_host+0x5c/0x6b
[4.122128]  [<4158b9e1>] scsi_remove_host+0xa1/0x13e
[4.122816]  [<415c6c23>] sdebug_driver_remove+0x4b/0x87
[4.123539]  [<4154b3c4>] driver_probe_device+0x274/0x457
[4.126619]  [<4154b7ca>] __device_attach_driver+0xe9/0xf5
[4.127293]  [<41549520>] bus_for_each_drv+0x8f/0x9c
[4.127924]  [<4154b0b4>] __device_attach+0xbc/0x117
[4.128610]  [<4154b6e1>] ? driver_allows_async_probing+0x37/0x37
[4.129429]  [<4154b963>] device_initial_probe+0x12/0x14
[4.130150]  [<4154a5ec>] bus_probe_device+0x5b/0xb3
[4.130828]  [<415484fd>] device_add+0x62d/0x7ac
[4.131453]  [<410c535f>] ? __raw_spin_lock_init+0x2b/0x4e
[4.132207]  [<41548693>] device_register+0x17/0x1a
[4.132873]  [<415ce07a>] sdebug_add_adapter+0x12a/0x189
[4.133590]  [<4228ed72>] scsi_debug_init+0x868/0x8fc
[4.134272]  [<4144a67c>] ? kobject_uevent+0xa/0xc
[4.134930]  [<4154be67>] ? driver_register+0x14a/0x163
[4.135641]  [<4159a0e3>] ? scsi_register_driver+0x14/0x16
[4.136388]  [<4228e50a>] ? init_ch_module+0xfe/0xfe
[4.137058]  [<41000542>] do_one_initcall+0xa3/0x19d
[4.137731]  [<42241500>] ? do_early_param+0x8c/0xab
[4.138407]  [<4108f593>] ? parse_args+0x347/0x3f5
[4.139067]  [<422420c1>] kernel_init_freeable+0x1a4/0x267
[4.139811]  [<419550aa>] ? rest_init+0x134/0x134
[4.140448]  [<419550b7>] 

[lkp-developer] [kernel/fork] cc639db4ac: BUG:using_smp_processor_id()in_preemptible

2016-12-11 Thread kernel test robot
FYI, we noticed the following commit:

commit: cc639db4acfeb459f3dcec080c6cfe11e36266e0 ("kernel/fork: use 
vfree_atomic() to free thread stack")
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master

in testcase: iperf
with following parameters:

runtime: 300s
cluster: cs-localhost
protocol: tcp

test-description: iperf is a tool for active measurements of the maximum 
achievable bandwidth on IP networks.
test-url: https://iperf.fr/


on test machine: qemu-system-x86_64 -enable-kvm -cpu Nehalem -smp 2 -m 1G

caused below changes:


+-+++
| | 1d2bc599c4 | cc639db4ac |
+-+++
| boot_successes  | 48 | 41 |
| boot_failures   | 24 | 29 |
| invoked_oom-killer:gfp_mask=0x  | 21 | 19 |
| Mem-Info| 21 | 19 |
| BUG:kernel_reboot-without-warning_in_test_stage | 3  | 2  |
| BUG:using_smp_processor_id()in_preemptible  | 0  | 13 |
| calltrace:SyS_clone | 0  | 8  |
+-+++



[   36.609372] 
[   37.466792] skip http request: 
cgi-bin/lkp-jobfile-append-var?job_file=/lkp/scheduled/vm-vp-1G-7/iperf-300s-cs-localhost-tcp-debian-x86_64-2016-08-31.cgz-cc639db4acfeb459f3dcec080c6cfe11e36266e0-20161209-43576-1ch14l2-25.yaml_state=running
 -o /dev/null
[   37.470889] 
[   38.354206] BUG: using smp_processor_id() in preemptible [] code: 
iperf-300s-cs-l/277
[   38.364617] caller is debug_smp_processor_id+0x17/0x19
[   38.369836] CPU: 1 PID: 277 Comm: iperf-300s-cs-l Not tainted 
4.9.0-rc8-00140-gcc639db #2
[   38.371241]  c93f3cf0 8123ae6f 0001 
818181da
[   38.372656]  c93f3d20 81252f41 00012de0 
fdff
[   38.375556]  880009328f40 88000592c400 c93f3d30 
81252f6a
[   38.381015] Call Trace:
[   38.381439]  [] dump_stack+0x9a/0xd0
[   38.382302]  [] check_preemption_disabled+0xdd/0xef
[   38.383365]  [] debug_smp_processor_id+0x17/0x19
[   38.384389]  [] __vfree_deferred+0x16/0x4c
[   38.389428]  [] vfree_atomic+0x22/0x24
[   38.390334]  [] free_thread_stack+0xc2/0x106
[   38.391330]  [] put_task_stack+0x4c/0x62
[   38.392239]  [] copy_process+0x7e0/0x16e8
[   38.397439]  [] _do_fork+0xbb/0x2d3
[   38.398285]  [] ? __do_page_fault+0x2e1/0x384
[   38.399275]  [] ? trace_hardirqs_off_caller+0x12/0x24
[   38.400382]  [] SyS_clone+0x19/0x1b
[   38.405409]  [] do_syscall_64+0x143/0x173
[   38.406340]  [] entry_SYSCALL64_slow_path+0x25/0x25
[   39.757101] {
[   39.757377] 


To reproduce:

git clone 
git://git.kernel.org/pub/scm/linux/kernel/git/wfg/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k  job-script  # job-script is attached in this 
email



Thanks,
Ying Huang
#
# Automatically generated file; DO NOT EDIT.
# Linux/x86_64 4.9.0-rc8 Kernel Configuration
#
CONFIG_64BIT=y
CONFIG_X86_64=y
CONFIG_X86=y
CONFIG_INSTRUCTION_DECODER=y
CONFIG_OUTPUT_FORMAT="elf64-x86-64"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_MMU=y
CONFIG_ARCH_MMAP_RND_BITS_MIN=28
CONFIG_ARCH_MMAP_RND_BITS_MAX=32
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
CONFIG_NEED_DMA_MAP_STATE=y
CONFIG_NEED_SG_DMA_LENGTH=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
CONFIG_ZONE_DMA32=y
CONFIG_AUDIT_ARCH=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_X86_64_SMP=y
CONFIG_ARCH_SUPPORTS_UPROBES=y
CONFIG_FIX_EARLYCON_MEM=y
CONFIG_DEBUG_RODATA=y
CONFIG_PGTABLE_LEVELS=4
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
CONFIG_CONSTRUCTORS=y
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_EXTABLE_SORT=y
CONFIG_THREAD_INFO_IN_TASK=y

#
# General setup
#
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_CROSS_COMPILE=""
# CONFIG_COMPILE_TEST is not set
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_HAVE_KERNEL_XZ=y
CONFIG_HAVE_KERNEL_LZO=y
CONFIG_HAVE_KERNEL_LZ4=y
# CONFIG_KERNEL_GZIP is not set
# CONFIG_KERNEL_BZIP2 is not 

[lkp-developer] [scsi] 8eea81e090: BUG:unable_to_handle_kernel

2016-12-11 Thread kernel test robot
FYI, we noticed the following commit:

commit: 8eea81e0903fcde1c28044ea66acc4c5c578f553 ("scsi: enable IO scheduling 
for scsi-mq")
https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git 
blk-mq-legacy-sched.1

in testcase: boot

on test machine: qemu-system-i386 -enable-kvm -cpu Haswell,+smep,+smap -m 360M

caused below changes:


++++
|| 4fecac6753 | 8eea81e090 |
++++
| boot_successes | 0  | 0  |
| boot_failures  | 8  | 8  |
| IP-Config:Auto-configuration_of_network_failed | 8  ||
| BUG:unable_to_handle_kernel| 0  | 8  |
| Oops   | 0  | 8  |
| EIP_is_at_remove_element   | 0  | 8  |
| calltrace:scsi_debug_init  | 0  | 8  |
| Kernel_panic-not_syncing:Fatal_exception   | 0  | 8  |
++++



[4.082237] blk-mq: sq sched init failed
[4.082967] scsi 0:0:0:0: Direct-Access Linuxscsi_debug   0186 
PQ: 0 ANSI: 7
[4.088634] scsi 0:0:0:0: Attached scsi generic sg0 type 0
[4.090968] BUG: unable to handle kernel paging request at f20bc5d2
[4.091900] IP: [<41177b90>] remove_element+0x1c/0x97
[4.092706] *pde =  
[4.093139] Oops:  [#1] SMP
[4.093566] Modules linked in:
[4.094013] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 
4.9.0-rc1-00159-g8eea81e #1
[4.095017] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Debian-1.8.2-1 04/01/2014
[4.096210] task: 4032 task.stack: 4031a000
[4.096822] EIP: 0060:[<41177b90>] EFLAGS: 00210246 CPU: 0
[4.097566] EIP is at remove_element+0x1c/0x97
[4.098267] EAX: 303a303a EBX: 30746566 ECX:  EDX: 
[4.099113] ESI: 4dbacba0 EDI: 4b624454 EBP: 4031bcd0 ESP: 4031bcc0
[4.100090]  DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
[4.100814] CR0: 80050033 CR2: f20bc5d2 CR3: 02325000 CR4: 000406d0
[4.101671] Stack:
[4.101960]   4dbacba0  4b624454 4031bce0 41177c4e 4b624454 
4b624000
[4.104688]  4031bce8 414218a6 4031bcf8 41426009 41f17760  4031bd10 
41448e06
[4.105815]   0001 4dbaac14 4b62 4031bd1c 41448caa 4b6203d8 
4031bd24
[4.106867] Call Trace:
[4.107180]  [<41177c4e>] mempool_destroy+0x43/0x5e
[4.107821]  [<414218a6>] blk_exit_rl+0x2a/0x2c
[4.108459]  [<41426009>] blk_release_queue+0x6a/0x10a
[4.109184]  [<41448e06>] kobject_release+0xb3/0xda
[4.109854]  [<41448caa>] kobject_put+0x8a/0x8f
[4.110464]  [<4141e46e>] blk_put_queue+0x12/0x14
[4.114154]  [<415998ab>] scsi_device_dev_release_usercontext+0xb1/0x10c
[4.115065]  [<4196369b>] ? _raw_spin_lock_irqsave+0x40/0x49
[4.115843]  [<415997fa>] ? scsi_device_dev_release+0x1a/0x1a
[4.116607]  [<41087993>] execute_in_process_context+0x35/0x87
[4.117394]  [<415997f8>] scsi_device_dev_release+0x18/0x1a
[4.118154]  [<415467d8>] device_release+0x6e/0xd6
[4.118820]  [<41448e06>] kobject_release+0xb3/0xda
[4.119480]  [<41448caa>] kobject_put+0x8a/0x8f
[4.120100]  [<415468f6>] put_device+0x2a/0x2c
[4.120705]  [<4159ab6b>] __scsi_remove_device+0x116/0x11a
[4.121445]  [<41598cff>] scsi_forget_host+0x5c/0x6b
[4.122128]  [<4158b9e1>] scsi_remove_host+0xa1/0x13e
[4.122816]  [<415c6c23>] sdebug_driver_remove+0x4b/0x87
[4.123539]  [<4154b3c4>] driver_probe_device+0x274/0x457
[4.126619]  [<4154b7ca>] __device_attach_driver+0xe9/0xf5
[4.127293]  [<41549520>] bus_for_each_drv+0x8f/0x9c
[4.127924]  [<4154b0b4>] __device_attach+0xbc/0x117
[4.128610]  [<4154b6e1>] ? driver_allows_async_probing+0x37/0x37
[4.129429]  [<4154b963>] device_initial_probe+0x12/0x14
[4.130150]  [<4154a5ec>] bus_probe_device+0x5b/0xb3
[4.130828]  [<415484fd>] device_add+0x62d/0x7ac
[4.131453]  [<410c535f>] ? __raw_spin_lock_init+0x2b/0x4e
[4.132207]  [<41548693>] device_register+0x17/0x1a
[4.132873]  [<415ce07a>] sdebug_add_adapter+0x12a/0x189
[4.133590]  [<4228ed72>] scsi_debug_init+0x868/0x8fc
[4.134272]  [<4144a67c>] ? kobject_uevent+0xa/0xc
[4.134930]  [<4154be67>] ? driver_register+0x14a/0x163
[4.135641]  [<4159a0e3>] ? scsi_register_driver+0x14/0x16
[4.136388]  [<4228e50a>] ? init_ch_module+0xfe/0xfe
[4.137058]  [<41000542>] do_one_initcall+0xa3/0x19d
[4.137731]  [<42241500>] ? do_early_param+0x8c/0xab
[4.138407]  [<4108f593>] ? parse_args+0x347/0x3f5
[4.139067]  [<422420c1>] kernel_init_freeable+0x1a4/0x267
[4.139811]  [<419550aa>] ? rest_init+0x134/0x134
[4.140448]  [<419550b7>] 

[lkp-developer] [kernel/fork] cc639db4ac: BUG:using_smp_processor_id()in_preemptible

2016-12-11 Thread kernel test robot
FYI, we noticed the following commit:

commit: cc639db4acfeb459f3dcec080c6cfe11e36266e0 ("kernel/fork: use 
vfree_atomic() to free thread stack")
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master

in testcase: iperf
with following parameters:

runtime: 300s
cluster: cs-localhost
protocol: tcp

test-description: iperf is a tool for active measurements of the maximum 
achievable bandwidth on IP networks.
test-url: https://iperf.fr/


on test machine: qemu-system-x86_64 -enable-kvm -cpu Nehalem -smp 2 -m 1G

caused below changes:


+-+++
| | 1d2bc599c4 | cc639db4ac |
+-+++
| boot_successes  | 48 | 41 |
| boot_failures   | 24 | 29 |
| invoked_oom-killer:gfp_mask=0x  | 21 | 19 |
| Mem-Info| 21 | 19 |
| BUG:kernel_reboot-without-warning_in_test_stage | 3  | 2  |
| BUG:using_smp_processor_id()in_preemptible  | 0  | 13 |
| calltrace:SyS_clone | 0  | 8  |
+-+++



[   36.609372] 
[   37.466792] skip http request: 
cgi-bin/lkp-jobfile-append-var?job_file=/lkp/scheduled/vm-vp-1G-7/iperf-300s-cs-localhost-tcp-debian-x86_64-2016-08-31.cgz-cc639db4acfeb459f3dcec080c6cfe11e36266e0-20161209-43576-1ch14l2-25.yaml_state=running
 -o /dev/null
[   37.470889] 
[   38.354206] BUG: using smp_processor_id() in preemptible [] code: 
iperf-300s-cs-l/277
[   38.364617] caller is debug_smp_processor_id+0x17/0x19
[   38.369836] CPU: 1 PID: 277 Comm: iperf-300s-cs-l Not tainted 
4.9.0-rc8-00140-gcc639db #2
[   38.371241]  c93f3cf0 8123ae6f 0001 
818181da
[   38.372656]  c93f3d20 81252f41 00012de0 
fdff
[   38.375556]  880009328f40 88000592c400 c93f3d30 
81252f6a
[   38.381015] Call Trace:
[   38.381439]  [] dump_stack+0x9a/0xd0
[   38.382302]  [] check_preemption_disabled+0xdd/0xef
[   38.383365]  [] debug_smp_processor_id+0x17/0x19
[   38.384389]  [] __vfree_deferred+0x16/0x4c
[   38.389428]  [] vfree_atomic+0x22/0x24
[   38.390334]  [] free_thread_stack+0xc2/0x106
[   38.391330]  [] put_task_stack+0x4c/0x62
[   38.392239]  [] copy_process+0x7e0/0x16e8
[   38.397439]  [] _do_fork+0xbb/0x2d3
[   38.398285]  [] ? __do_page_fault+0x2e1/0x384
[   38.399275]  [] ? trace_hardirqs_off_caller+0x12/0x24
[   38.400382]  [] SyS_clone+0x19/0x1b
[   38.405409]  [] do_syscall_64+0x143/0x173
[   38.406340]  [] entry_SYSCALL64_slow_path+0x25/0x25
[   39.757101] {
[   39.757377] 


To reproduce:

git clone 
git://git.kernel.org/pub/scm/linux/kernel/git/wfg/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k  job-script  # job-script is attached in this 
email



Thanks,
Ying Huang
#
# Automatically generated file; DO NOT EDIT.
# Linux/x86_64 4.9.0-rc8 Kernel Configuration
#
CONFIG_64BIT=y
CONFIG_X86_64=y
CONFIG_X86=y
CONFIG_INSTRUCTION_DECODER=y
CONFIG_OUTPUT_FORMAT="elf64-x86-64"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_MMU=y
CONFIG_ARCH_MMAP_RND_BITS_MIN=28
CONFIG_ARCH_MMAP_RND_BITS_MAX=32
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
CONFIG_NEED_DMA_MAP_STATE=y
CONFIG_NEED_SG_DMA_LENGTH=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
CONFIG_ZONE_DMA32=y
CONFIG_AUDIT_ARCH=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_X86_64_SMP=y
CONFIG_ARCH_SUPPORTS_UPROBES=y
CONFIG_FIX_EARLYCON_MEM=y
CONFIG_DEBUG_RODATA=y
CONFIG_PGTABLE_LEVELS=4
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
CONFIG_CONSTRUCTORS=y
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_EXTABLE_SORT=y
CONFIG_THREAD_INFO_IN_TASK=y

#
# General setup
#
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_CROSS_COMPILE=""
# CONFIG_COMPILE_TEST is not set
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_HAVE_KERNEL_XZ=y
CONFIG_HAVE_KERNEL_LZO=y
CONFIG_HAVE_KERNEL_LZ4=y
# CONFIG_KERNEL_GZIP is not set
# CONFIG_KERNEL_BZIP2 is not 

Re: [PATCH v7 1/2] usb: xhci: plat: Enable runtime PM

2016-12-11 Thread Baolin Wang
Hi Robert,

On 2 December 2016 at 05:46, Robert Foss  wrote:
> Enable runtime PM for the xhci-plat device so that the parent device
> may implement runtime PM.
>
> Signed-off-by: Robert Foss 
>
> Tested-by: Robert Foss 
> ---
>  drivers/usb/host/xhci-plat.c | 29 +++--
>  1 file changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
> index ed56bf9ed885..ba4efe74f537 100644
> --- a/drivers/usb/host/xhci-plat.c
> +++ b/drivers/usb/host/xhci-plat.c
> @@ -246,6 +246,9 @@ static int xhci_plat_probe(struct platform_device *pdev)
> if (ret)
> goto dealloc_usb2_hcd;
>
> +   pm_runtime_set_active(>dev);
> +   pm_runtime_enable(>dev);

You did not implement the runtime callbacks of xHCI device, then how
can the parent device control the resume/suspend of xHCI device by
runtime PM mechanism? Could you see my previous patch which implement
the runtime callbacks for xHCI device?
https://lkml.org/lkml/2016/11/28/25

> +
> return 0;
>
>
> @@ -274,6 +277,8 @@ static int xhci_plat_remove(struct platform_device *dev)
> struct xhci_hcd *xhci = hcd_to_xhci(hcd);
> struct clk *clk = xhci->clk;
>
> +   pm_runtime_disable(>dev);
> +
> usb_remove_hcd(xhci->shared_hcd);
> usb_phy_shutdown(hcd->usb_phy);
>
> @@ -292,6 +297,13 @@ static int xhci_plat_suspend(struct device *dev)
>  {
> struct usb_hcd  *hcd = dev_get_drvdata(dev);
> struct xhci_hcd *xhci = hcd_to_xhci(hcd);
> +   int ret;
> +
> +   ret = pm_runtime_get_sync(dev);
> +   if (ret < 0) {
> +   pm_runtime_put(dev);
> +   return ret;
> +   }
>
> /*
>  * xhci_suspend() needs `do_wakeup` to know whether host is allowed
> @@ -301,15 +313,28 @@ static int xhci_plat_suspend(struct device *dev)
>  * reconsider this when xhci_plat_suspend enlarges its scope, e.g.,
>  * also applies to runtime suspend.
>  */
> -   return xhci_suspend(xhci, device_may_wakeup(dev));
> +   ret = xhci_suspend(xhci, device_may_wakeup(dev));
> +   pm_runtime_put(dev);
> +
> +   return ret;
>  }
>
>  static int xhci_plat_resume(struct device *dev)
>  {
> struct usb_hcd  *hcd = dev_get_drvdata(dev);
> struct xhci_hcd *xhci = hcd_to_xhci(hcd);
> +   int ret;
>
> -   return xhci_resume(xhci, 0);
> +   ret = pm_runtime_get_sync(dev);
> +   if (ret < 0) {
> +   pm_runtime_put(dev);
> +   return ret;
> +   }
> +
> +   ret = xhci_resume(xhci, 0);
> +   pm_runtime_put(dev);
> +
> +   return ret;
>  }
>
>  static const struct dev_pm_ops xhci_plat_pm_ops = {
> --
> 2.11.0
>



-- 
Baolin.wang
Best Regards


Re: [PATCH v7 1/2] usb: xhci: plat: Enable runtime PM

2016-12-11 Thread Baolin Wang
Hi Robert,

On 2 December 2016 at 05:46, Robert Foss  wrote:
> Enable runtime PM for the xhci-plat device so that the parent device
> may implement runtime PM.
>
> Signed-off-by: Robert Foss 
>
> Tested-by: Robert Foss 
> ---
>  drivers/usb/host/xhci-plat.c | 29 +++--
>  1 file changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
> index ed56bf9ed885..ba4efe74f537 100644
> --- a/drivers/usb/host/xhci-plat.c
> +++ b/drivers/usb/host/xhci-plat.c
> @@ -246,6 +246,9 @@ static int xhci_plat_probe(struct platform_device *pdev)
> if (ret)
> goto dealloc_usb2_hcd;
>
> +   pm_runtime_set_active(>dev);
> +   pm_runtime_enable(>dev);

You did not implement the runtime callbacks of xHCI device, then how
can the parent device control the resume/suspend of xHCI device by
runtime PM mechanism? Could you see my previous patch which implement
the runtime callbacks for xHCI device?
https://lkml.org/lkml/2016/11/28/25

> +
> return 0;
>
>
> @@ -274,6 +277,8 @@ static int xhci_plat_remove(struct platform_device *dev)
> struct xhci_hcd *xhci = hcd_to_xhci(hcd);
> struct clk *clk = xhci->clk;
>
> +   pm_runtime_disable(>dev);
> +
> usb_remove_hcd(xhci->shared_hcd);
> usb_phy_shutdown(hcd->usb_phy);
>
> @@ -292,6 +297,13 @@ static int xhci_plat_suspend(struct device *dev)
>  {
> struct usb_hcd  *hcd = dev_get_drvdata(dev);
> struct xhci_hcd *xhci = hcd_to_xhci(hcd);
> +   int ret;
> +
> +   ret = pm_runtime_get_sync(dev);
> +   if (ret < 0) {
> +   pm_runtime_put(dev);
> +   return ret;
> +   }
>
> /*
>  * xhci_suspend() needs `do_wakeup` to know whether host is allowed
> @@ -301,15 +313,28 @@ static int xhci_plat_suspend(struct device *dev)
>  * reconsider this when xhci_plat_suspend enlarges its scope, e.g.,
>  * also applies to runtime suspend.
>  */
> -   return xhci_suspend(xhci, device_may_wakeup(dev));
> +   ret = xhci_suspend(xhci, device_may_wakeup(dev));
> +   pm_runtime_put(dev);
> +
> +   return ret;
>  }
>
>  static int xhci_plat_resume(struct device *dev)
>  {
> struct usb_hcd  *hcd = dev_get_drvdata(dev);
> struct xhci_hcd *xhci = hcd_to_xhci(hcd);
> +   int ret;
>
> -   return xhci_resume(xhci, 0);
> +   ret = pm_runtime_get_sync(dev);
> +   if (ret < 0) {
> +   pm_runtime_put(dev);
> +   return ret;
> +   }
> +
> +   ret = xhci_resume(xhci, 0);
> +   pm_runtime_put(dev);
> +
> +   return ret;
>  }
>
>  static const struct dev_pm_ops xhci_plat_pm_ops = {
> --
> 2.11.0
>



-- 
Baolin.wang
Best Regards


Re: Tearing down DMA transfer setup after DMA client has finished

2016-12-11 Thread Vinod Koul
On Fri, Dec 09, 2016 at 07:23:17PM +0100, Mason wrote:
> [ Dropping Mans to preserve his peace-of-mind ]
> 
> On 09/12/2016 18:56, Vinod Koul wrote:
> > On Fri, Dec 09, 2016 at 06:34:15PM +0100, Mason wrote:
> >> On 09/12/2016 18:17, Vinod Koul wrote:
> >>
> >>> On Fri, Dec 09, 2016 at 11:25:57AM +0100, Sebastian Frias wrote:
> 
>  What concrete solution do you propose?
> >>>
> >>> I have already proposed two solutions.
> >>>
> >>> A) Request a channel only when you need it. Obviously we can't do virtual
> >>> channels with this (though we should still use virt-channels framework).
> >>> The sbox setup and teardown can be done as part of channel request and
> >>> freeup. PL08x already does this.
> >>>
> >>> Downside is that we can only have as many consumers at a time as channels.
> >>>
> >>> I have not heard any technical reason for not doing this apart from 
> >>> drivers
> >>> grab the channel at probe, which is incorrect and needs to be fixed
> >>> irrespective of the problem at hand.
> >>>
> >>> This is my preferred option.
> >>
> >> There is one important drawback with this solution. If a driver calls
> >> dma_request_chan() when no channels are currently available, it will
> >> get -EBUSY. If there were a flag in dma_request_chan to be put to
> >> sleep (with timeout) until a channel is available, then it would
> >> work. But busy waiting in the client driver is a waste of power.
> > 
> > Right, but in that case the fallback would be PIO mode, and if that is
> > not availble (IIRC some f your devices don't) then reject the usage with
> > EAGAIN.
> 
> Maybe I'm missing something, but I don't see how that would help.
> Take the NAND Flash controller driver, for instance. PIO is not
> an option, because the ECC engine is tied to DMA.
> 
> And failing with -EAGAIN doesn't help the busy looping situation.
> The caller should be put on some kind of queue to wait for a
> "channel ready" event.

So if you go down this route then we have do a bit of engineering for this
solutions to solve the hardware issue.

Can you tell me the clients for this controller and channels available?

In this case possibly we can dedicate one for NAND and keep the ones with
PIO mode dynamic in nature.. But yes it is not an elegant one.

-- 
~Vinod


Re: Tearing down DMA transfer setup after DMA client has finished

2016-12-11 Thread Vinod Koul
On Fri, Dec 09, 2016 at 07:23:17PM +0100, Mason wrote:
> [ Dropping Mans to preserve his peace-of-mind ]
> 
> On 09/12/2016 18:56, Vinod Koul wrote:
> > On Fri, Dec 09, 2016 at 06:34:15PM +0100, Mason wrote:
> >> On 09/12/2016 18:17, Vinod Koul wrote:
> >>
> >>> On Fri, Dec 09, 2016 at 11:25:57AM +0100, Sebastian Frias wrote:
> 
>  What concrete solution do you propose?
> >>>
> >>> I have already proposed two solutions.
> >>>
> >>> A) Request a channel only when you need it. Obviously we can't do virtual
> >>> channels with this (though we should still use virt-channels framework).
> >>> The sbox setup and teardown can be done as part of channel request and
> >>> freeup. PL08x already does this.
> >>>
> >>> Downside is that we can only have as many consumers at a time as channels.
> >>>
> >>> I have not heard any technical reason for not doing this apart from 
> >>> drivers
> >>> grab the channel at probe, which is incorrect and needs to be fixed
> >>> irrespective of the problem at hand.
> >>>
> >>> This is my preferred option.
> >>
> >> There is one important drawback with this solution. If a driver calls
> >> dma_request_chan() when no channels are currently available, it will
> >> get -EBUSY. If there were a flag in dma_request_chan to be put to
> >> sleep (with timeout) until a channel is available, then it would
> >> work. But busy waiting in the client driver is a waste of power.
> > 
> > Right, but in that case the fallback would be PIO mode, and if that is
> > not availble (IIRC some f your devices don't) then reject the usage with
> > EAGAIN.
> 
> Maybe I'm missing something, but I don't see how that would help.
> Take the NAND Flash controller driver, for instance. PIO is not
> an option, because the ECC engine is tied to DMA.
> 
> And failing with -EAGAIN doesn't help the busy looping situation.
> The caller should be put on some kind of queue to wait for a
> "channel ready" event.

So if you go down this route then we have do a bit of engineering for this
solutions to solve the hardware issue.

Can you tell me the clients for this controller and channels available?

In this case possibly we can dedicate one for NAND and keep the ones with
PIO mode dynamic in nature.. But yes it is not an elegant one.

-- 
~Vinod


Re: [PATCH] trace: extend trace_clock to support arch_arm clock counter

2016-12-11 Thread Srinivas Ramana

On 12/06/2016 05:43 PM, Will Deacon wrote:

On Sun, Dec 04, 2016 at 02:06:23PM +0530, Srinivas Ramana wrote:

On 12/02/2016 04:38 PM, Will Deacon wrote:

On Fri, Dec 02, 2016 at 01:44:55PM +0530, Srinivas Ramana wrote:

Extend the trace_clock to support the arch timer cycle
counter so that we can get the monotonic cycle count
in the traces. This will help in correlating the traces with the
timestamps/events in other subsystems in the soc which share
this common counter for driving their timers.


I'm not sure I follow this reasoning. What's wrong with nanoseconds? In
particular, the "perf" trace_clock hangs off sched_clock, which should
be backed by the architected counter anyway. What does the cycle counter in
isolation tell you, given that the frequency isn't architected?

I think I'm missing something here.



Having cycle counter would help in the cases where we want to correlate the
time with other subsystems which are outside cpu subsystem.


Do you have an example of these subsystems? Can they be used to generate
trace data with mainline?


Some of the subsystems i can list are Modem(on a mobilephone), GPU or 
video subsystem, or a DSP among others.





local_clock or even the perf track_clock uses sched_clock which gets
suspended during system suspend. Yes, they are backed up by the
architected counter but they ignore the cycles spent in suspend.i


Does mono_raw solve this (also hangs off the architected counter and is
supported in the vdso)?


Doesn't seem like. Any of the existing clock sources are designed not 
show the jump, when there is a suspend and resume. Even though they run 
out of architected counter they just cane give exact correlation with 
the counter. Furthermore, during the initial kernel boot, these just run 
out of jiffies clock source. They also not account for the time spent in 
boot loaders.





so, when comparing with monotonically increasing cycle counter, other
clocks doesn't help. It seems X86 uses the TSC counter to help such cases.


Does this mean we need a way to expose the frequency to userspace, too?


Not really. The CNTFRQ_EL0 of timer subsystem holds the clock frequency 
of system timer and is available to EL0.




Will




Thanks,
-- Srinivas R

--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, 
Inc., is a member of Code Aurora Forum, a Linux Foundation Collaborative 
Project.


Re: [PATCH] trace: extend trace_clock to support arch_arm clock counter

2016-12-11 Thread Srinivas Ramana

On 12/06/2016 05:43 PM, Will Deacon wrote:

On Sun, Dec 04, 2016 at 02:06:23PM +0530, Srinivas Ramana wrote:

On 12/02/2016 04:38 PM, Will Deacon wrote:

On Fri, Dec 02, 2016 at 01:44:55PM +0530, Srinivas Ramana wrote:

Extend the trace_clock to support the arch timer cycle
counter so that we can get the monotonic cycle count
in the traces. This will help in correlating the traces with the
timestamps/events in other subsystems in the soc which share
this common counter for driving their timers.


I'm not sure I follow this reasoning. What's wrong with nanoseconds? In
particular, the "perf" trace_clock hangs off sched_clock, which should
be backed by the architected counter anyway. What does the cycle counter in
isolation tell you, given that the frequency isn't architected?

I think I'm missing something here.



Having cycle counter would help in the cases where we want to correlate the
time with other subsystems which are outside cpu subsystem.


Do you have an example of these subsystems? Can they be used to generate
trace data with mainline?


Some of the subsystems i can list are Modem(on a mobilephone), GPU or 
video subsystem, or a DSP among others.





local_clock or even the perf track_clock uses sched_clock which gets
suspended during system suspend. Yes, they are backed up by the
architected counter but they ignore the cycles spent in suspend.i


Does mono_raw solve this (also hangs off the architected counter and is
supported in the vdso)?


Doesn't seem like. Any of the existing clock sources are designed not 
show the jump, when there is a suspend and resume. Even though they run 
out of architected counter they just cane give exact correlation with 
the counter. Furthermore, during the initial kernel boot, these just run 
out of jiffies clock source. They also not account for the time spent in 
boot loaders.





so, when comparing with monotonically increasing cycle counter, other
clocks doesn't help. It seems X86 uses the TSC counter to help such cases.


Does this mean we need a way to expose the frequency to userspace, too?


Not really. The CNTFRQ_EL0 of timer subsystem holds the clock frequency 
of system timer and is available to EL0.




Will




Thanks,
-- Srinivas R

--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, 
Inc., is a member of Code Aurora Forum, a Linux Foundation Collaborative 
Project.


Re: [PATCH v8 2/4] vcodec: mediatek: Add Mediatek JPEG Decoder Driver

2016-12-11 Thread Ricky Liang
Hi Rick,

On Wed, Nov 30, 2016 at 11:08 AM, Rick Chang  wrote:
> Add v4l2 driver for Mediatek JPEG Decoder
>
> Signed-off-by: Rick Chang 
> Signed-off-by: Minghsiu Tsai 



> +static bool mtk_jpeg_check_resolution_change(struct mtk_jpeg_ctx *ctx,
> +struct mtk_jpeg_dec_param *param)
> +{
> +   struct mtk_jpeg_dev *jpeg = ctx->jpeg;
> +   struct mtk_jpeg_q_data *q_data;
> +
> +   q_data = >out_q;
> +   if (q_data->w != param->pic_w || q_data->h != param->pic_h) {
> +   v4l2_dbg(1, debug, >v4l2_dev, "Picture size change\n");
> +   return true;
> +   }
> +
> +   q_data = >cap_q;
> +   if (q_data->fmt != mtk_jpeg_find_format(ctx, param->dst_fourcc,
> +   MTK_JPEG_FMT_TYPE_CAPTURE)) {
> +   v4l2_dbg(1, debug, >v4l2_dev, "format change\n");
> +   return true;
> +   }
> +   return false;



> +static void mtk_jpeg_device_run(void *priv)
> +{
> +   struct mtk_jpeg_ctx *ctx = priv;
> +   struct mtk_jpeg_dev *jpeg = ctx->jpeg;
> +   struct vb2_buffer *src_buf, *dst_buf;
> +   enum vb2_buffer_state buf_state = VB2_BUF_STATE_ERROR;
> +   unsigned long flags;
> +   struct mtk_jpeg_src_buf *jpeg_src_buf;
> +   struct mtk_jpeg_bs bs;
> +   struct mtk_jpeg_fb fb;
> +   int i;
> +
> +   src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
> +   dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
> +   jpeg_src_buf = mtk_jpeg_vb2_to_srcbuf(src_buf);
> +
> +   if (jpeg_src_buf->flags & MTK_JPEG_BUF_FLAGS_LAST_FRAME) {
> +   for (i = 0; i < dst_buf->num_planes; i++)
> +   vb2_set_plane_payload(dst_buf, i, 0);
> +   buf_state = VB2_BUF_STATE_DONE;
> +   goto dec_end;
> +   }
> +
> +   if (mtk_jpeg_check_resolution_change(ctx, _src_buf->dec_param)) {
> +   mtk_jpeg_queue_src_chg_event(ctx);
> +   ctx->state = MTK_JPEG_SOURCE_CHANGE;
> +   v4l2_m2m_job_finish(jpeg->m2m_dev, ctx->fh.m2m_ctx);
> +   return;
> +   }

This only detects source change if multiple OUPUT buffers are queued.
It does not catch the source change in the following scenario:

- OUPUT buffers for jpeg1 enqueued
- OUTPUT queue STREAMON
- userspace creates CAPTURE buffers
- CAPTURE buffers enqueued
- CAPTURE queue STREAMON
- decode
- OUTPUT queue STREAMOFF
- userspace recreates OUTPUT buffers for jpeg2
- OUTPUT buffers for jpeg2 enqueued
- OUTPUT queue STREAMON

In the above sequence if jpeg2's decoded size is larger than jpeg1 the
function fails to detect that the existing CAPTURE buffers are not big
enough to hold the decoded data.

A possible fix is to pass *dst_buf to
mtk_jpeg_check_resolution_change(), and check in the function that all
the dst_buf planes are large enough to hold the decoded data.

> +
> +   mtk_jpeg_set_dec_src(ctx, src_buf, );
> +   if (mtk_jpeg_set_dec_dst(ctx, _src_buf->dec_param, dst_buf, ))
> +   goto dec_end;
> +
> +   spin_lock_irqsave(>hw_lock, flags);
> +   mtk_jpeg_dec_reset(jpeg->dec_reg_base);
> +   mtk_jpeg_dec_set_config(jpeg->dec_reg_base,
> +   _src_buf->dec_param, , );
> +
> +   mtk_jpeg_dec_start(jpeg->dec_reg_base);
> +   spin_unlock_irqrestore(>hw_lock, flags);
> +   return;
> +
> +dec_end:
> +   v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> +   v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> +   v4l2_m2m_buf_done(to_vb2_v4l2_buffer(src_buf), buf_state);
> +   v4l2_m2m_buf_done(to_vb2_v4l2_buffer(dst_buf), buf_state);
> +   v4l2_m2m_job_finish(jpeg->m2m_dev, ctx->fh.m2m_ctx);
> +}




Re: [PATCH v8 2/4] vcodec: mediatek: Add Mediatek JPEG Decoder Driver

2016-12-11 Thread Ricky Liang
Hi Rick,

On Wed, Nov 30, 2016 at 11:08 AM, Rick Chang  wrote:
> Add v4l2 driver for Mediatek JPEG Decoder
>
> Signed-off-by: Rick Chang 
> Signed-off-by: Minghsiu Tsai 



> +static bool mtk_jpeg_check_resolution_change(struct mtk_jpeg_ctx *ctx,
> +struct mtk_jpeg_dec_param *param)
> +{
> +   struct mtk_jpeg_dev *jpeg = ctx->jpeg;
> +   struct mtk_jpeg_q_data *q_data;
> +
> +   q_data = >out_q;
> +   if (q_data->w != param->pic_w || q_data->h != param->pic_h) {
> +   v4l2_dbg(1, debug, >v4l2_dev, "Picture size change\n");
> +   return true;
> +   }
> +
> +   q_data = >cap_q;
> +   if (q_data->fmt != mtk_jpeg_find_format(ctx, param->dst_fourcc,
> +   MTK_JPEG_FMT_TYPE_CAPTURE)) {
> +   v4l2_dbg(1, debug, >v4l2_dev, "format change\n");
> +   return true;
> +   }
> +   return false;



> +static void mtk_jpeg_device_run(void *priv)
> +{
> +   struct mtk_jpeg_ctx *ctx = priv;
> +   struct mtk_jpeg_dev *jpeg = ctx->jpeg;
> +   struct vb2_buffer *src_buf, *dst_buf;
> +   enum vb2_buffer_state buf_state = VB2_BUF_STATE_ERROR;
> +   unsigned long flags;
> +   struct mtk_jpeg_src_buf *jpeg_src_buf;
> +   struct mtk_jpeg_bs bs;
> +   struct mtk_jpeg_fb fb;
> +   int i;
> +
> +   src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
> +   dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
> +   jpeg_src_buf = mtk_jpeg_vb2_to_srcbuf(src_buf);
> +
> +   if (jpeg_src_buf->flags & MTK_JPEG_BUF_FLAGS_LAST_FRAME) {
> +   for (i = 0; i < dst_buf->num_planes; i++)
> +   vb2_set_plane_payload(dst_buf, i, 0);
> +   buf_state = VB2_BUF_STATE_DONE;
> +   goto dec_end;
> +   }
> +
> +   if (mtk_jpeg_check_resolution_change(ctx, _src_buf->dec_param)) {
> +   mtk_jpeg_queue_src_chg_event(ctx);
> +   ctx->state = MTK_JPEG_SOURCE_CHANGE;
> +   v4l2_m2m_job_finish(jpeg->m2m_dev, ctx->fh.m2m_ctx);
> +   return;
> +   }

This only detects source change if multiple OUPUT buffers are queued.
It does not catch the source change in the following scenario:

- OUPUT buffers for jpeg1 enqueued
- OUTPUT queue STREAMON
- userspace creates CAPTURE buffers
- CAPTURE buffers enqueued
- CAPTURE queue STREAMON
- decode
- OUTPUT queue STREAMOFF
- userspace recreates OUTPUT buffers for jpeg2
- OUTPUT buffers for jpeg2 enqueued
- OUTPUT queue STREAMON

In the above sequence if jpeg2's decoded size is larger than jpeg1 the
function fails to detect that the existing CAPTURE buffers are not big
enough to hold the decoded data.

A possible fix is to pass *dst_buf to
mtk_jpeg_check_resolution_change(), and check in the function that all
the dst_buf planes are large enough to hold the decoded data.

> +
> +   mtk_jpeg_set_dec_src(ctx, src_buf, );
> +   if (mtk_jpeg_set_dec_dst(ctx, _src_buf->dec_param, dst_buf, ))
> +   goto dec_end;
> +
> +   spin_lock_irqsave(>hw_lock, flags);
> +   mtk_jpeg_dec_reset(jpeg->dec_reg_base);
> +   mtk_jpeg_dec_set_config(jpeg->dec_reg_base,
> +   _src_buf->dec_param, , );
> +
> +   mtk_jpeg_dec_start(jpeg->dec_reg_base);
> +   spin_unlock_irqrestore(>hw_lock, flags);
> +   return;
> +
> +dec_end:
> +   v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> +   v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> +   v4l2_m2m_buf_done(to_vb2_v4l2_buffer(src_buf), buf_state);
> +   v4l2_m2m_buf_done(to_vb2_v4l2_buffer(dst_buf), buf_state);
> +   v4l2_m2m_job_finish(jpeg->m2m_dev, ctx->fh.m2m_ctx);
> +}




linux-next: manual merge of the staging tree with the vfs tree

2016-12-11 Thread Stephen Rothwell
Hi Greg,

Today's linux-next merge of the staging tree got a conflict in:

  drivers/staging/lustre/lustre/llite/statahead.c

between commit:

  7126bc2e8d60 ("lustre: switch to use of ->d_init()")

from the vfs tree and commit:

  3c8fb1b105cd ("staging: lustre: statahead: set sai_index_wait with 
lli_sa_lock held")

from the staging tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/staging/lustre/lustre/llite/statahead.c
index 166323fddc44,4769a2230ae1..
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ b/drivers/staging/lustre/lustre/llite/statahead.c
@@@ -1512,8 -1519,9 +1519,7 @@@ out_unplug
 * dentry_may_statahead().
 */
ldd = ll_d2d(*dentryp);
-   lli = ll_i2info(dir);
 -  /* ldd can be NULL if llite lookup failed. */
 -  if (ldd)
 -  ldd->lld_sa_generation = lli->lli_sa_generation;
 +  ldd->lld_sa_generation = lli->lli_sa_generation;
sa_put(sai, entry);
return rc;
  }


linux-next: manual merge of the staging tree with the vfs tree

2016-12-11 Thread Stephen Rothwell
Hi Greg,

Today's linux-next merge of the staging tree got a conflict in:

  drivers/staging/lustre/lustre/llite/statahead.c

between commit:

  7126bc2e8d60 ("lustre: switch to use of ->d_init()")

from the vfs tree and commit:

  3c8fb1b105cd ("staging: lustre: statahead: set sai_index_wait with 
lli_sa_lock held")

from the staging tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/staging/lustre/lustre/llite/statahead.c
index 166323fddc44,4769a2230ae1..
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ b/drivers/staging/lustre/lustre/llite/statahead.c
@@@ -1512,8 -1519,9 +1519,7 @@@ out_unplug
 * dentry_may_statahead().
 */
ldd = ll_d2d(*dentryp);
-   lli = ll_i2info(dir);
 -  /* ldd can be NULL if llite lookup failed. */
 -  if (ldd)
 -  ldd->lld_sa_generation = lli->lli_sa_generation;
 +  ldd->lld_sa_generation = lli->lli_sa_generation;
sa_put(sai, entry);
return rc;
  }


Re: [V2] mtd: devices: docg3:- Handle return value of devm_ioremap.

2016-12-11 Thread Marek Vasut
On 12/12/2016 04:00 AM, Arvind Yadav wrote:
> Here, If devm_ioremap will fail. It will return NULL.
> Kernel can run into a NULL-pointer dereference.
> 
> Signed-off-by: Arvind Yadav 
> ---
>  drivers/mtd/devices/docg3.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
> index b833e6c..ffe3db0 100644
> --- a/drivers/mtd/devices/docg3.c
> +++ b/drivers/mtd/devices/docg3.c
> @@ -2083,9 +2083,14 @@ static int __init docg3_probe(struct platform_device 
> *pdev)
>   dev_err(dev, "No I/O memory resource defined\n");
>   return ret;
>   }
> - base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
>  
>   ret = -ENOMEM;
> + base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
> + if (!base) {
> + dev_err(dev, "failed to map I/O memory\n");
> + return ret;

Um, return -ENOMEM right away ?

Otherwise,
Acked-by: Marek Vasut 

> + }
> +
>   cascade = devm_kzalloc(dev, sizeof(*cascade) * DOC_MAX_NBFLOORS,
>  GFP_KERNEL);
>   if (!cascade)
> 


-- 
Best regards,
Marek Vasut


Re: [V2] mtd: devices: docg3:- Handle return value of devm_ioremap.

2016-12-11 Thread Marek Vasut
On 12/12/2016 04:00 AM, Arvind Yadav wrote:
> Here, If devm_ioremap will fail. It will return NULL.
> Kernel can run into a NULL-pointer dereference.
> 
> Signed-off-by: Arvind Yadav 
> ---
>  drivers/mtd/devices/docg3.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
> index b833e6c..ffe3db0 100644
> --- a/drivers/mtd/devices/docg3.c
> +++ b/drivers/mtd/devices/docg3.c
> @@ -2083,9 +2083,14 @@ static int __init docg3_probe(struct platform_device 
> *pdev)
>   dev_err(dev, "No I/O memory resource defined\n");
>   return ret;
>   }
> - base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
>  
>   ret = -ENOMEM;
> + base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
> + if (!base) {
> + dev_err(dev, "failed to map I/O memory\n");
> + return ret;

Um, return -ENOMEM right away ?

Otherwise,
Acked-by: Marek Vasut 

> + }
> +
>   cascade = devm_kzalloc(dev, sizeof(*cascade) * DOC_MAX_NBFLOORS,
>  GFP_KERNEL);
>   if (!cascade)
> 


-- 
Best regards,
Marek Vasut


Re: [PATCH v2] siphash: add cryptographically secure hashtable function

2016-12-11 Thread Linus Torvalds
On Sun, Dec 11, 2016 at 7:48 PM, Jason A. Donenfeld  wrote:
> +   switch (left) {
> +   case 7: b |= ((u64)data[6]) << 48;
> +   case 6: b |= ((u64)data[5]) << 40;
> +   case 5: b |= ((u64)data[4]) << 32;
> +   case 4: b |= ((u64)data[3]) << 24;
> +   case 3: b |= ((u64)data[2]) << 16;
> +   case 2: b |= ((u64)data[1]) <<  8;
> +   case 1: b |= ((u64)data[0]); break;
> +   case 0: break;
> +   }

The above is extremely inefficient. Considering that most kernel data
would be expected to be smallish, that matters (ie the usual benchmark
would not be about hashing megabytes of data, but instead millions of
hashes of small data).

I think this could be rewritten (at least for 64-bit architectures) as

#ifdef CONFIG_DCACHE_WORD_ACCESS

if (left)
b |= le64_to_cpu(load_unaligned_zeropad(data) &
bytemask_from_count(left));

#else

.. do the duff's device thing with the switch() ..

#endif

which should give you basically perfect code generation (ie a single
64-bit load and a byte mask).

Totally untested, just looking at the code and trying to make sense of it.

... and obviously, it requires an actual high-performance use-case to
make any difference.

  Linus


Re: [PATCH v2] siphash: add cryptographically secure hashtable function

2016-12-11 Thread Linus Torvalds
On Sun, Dec 11, 2016 at 7:48 PM, Jason A. Donenfeld  wrote:
> +   switch (left) {
> +   case 7: b |= ((u64)data[6]) << 48;
> +   case 6: b |= ((u64)data[5]) << 40;
> +   case 5: b |= ((u64)data[4]) << 32;
> +   case 4: b |= ((u64)data[3]) << 24;
> +   case 3: b |= ((u64)data[2]) << 16;
> +   case 2: b |= ((u64)data[1]) <<  8;
> +   case 1: b |= ((u64)data[0]); break;
> +   case 0: break;
> +   }

The above is extremely inefficient. Considering that most kernel data
would be expected to be smallish, that matters (ie the usual benchmark
would not be about hashing megabytes of data, but instead millions of
hashes of small data).

I think this could be rewritten (at least for 64-bit architectures) as

#ifdef CONFIG_DCACHE_WORD_ACCESS

if (left)
b |= le64_to_cpu(load_unaligned_zeropad(data) &
bytemask_from_count(left));

#else

.. do the duff's device thing with the switch() ..

#endif

which should give you basically perfect code generation (ie a single
64-bit load and a byte mask).

Totally untested, just looking at the code and trying to make sense of it.

... and obviously, it requires an actual high-performance use-case to
make any difference.

  Linus


Re: usb:xhci: support disable usb2 LPM Remote Wakeup

2016-12-11 Thread Thang Q. Nguyen
On Sat, Dec 10, 2016 at 4:36 AM, Rob Herring  wrote:
> On Sun, Dec 04, 2016 at 07:42:01PM +0700, Thang Q. Nguyen wrote:
>> From: Thang Nguyen 
>>
>> As per USB 2.0 link power management addendum ECN, table 1-2, page 4,
>> device or host initiated via resume signaling; device-initiated resumes
>> can be optionally enabled/disabled by software. This patch adds support
>> to control enabling the USB2 RWE feature via DT/ACPI attribute.
>>
>> Signed-off-by: Vu Nguyen 
>> Signed-off-by: Thang Nguyen 
>> ---
>>  Documentation/devicetree/bindings/usb/usb-xhci.txt | 1 +
>>  drivers/usb/host/xhci-plat.c   | 3 +++
>>  drivers/usb/host/xhci.c| 5 -
>>  drivers/usb/host/xhci.h| 1 +
>>  4 files changed, 9 insertions(+), 1 deletion(-)
>>
>> diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt 
>> b/Documentation/devicetree/bindings/usb/usb-xhci.txt
>> index 966885c..9b4cd14 100644
>> --- a/Documentation/devicetree/bindings/usb/usb-xhci.txt
>> +++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt
>> @@ -25,6 +25,7 @@ Required properties:
>>
>>  Optional properties:
>>- clocks: reference to a clock
>> +  - usb2-rwe-disable: disable USB2 LPM Remote Wakeup capable
>
> Remote wakeup has been around since USB 1.0 days. Does this need to be
> USB2 or XHCI specific?
This is XHCI specific. Per XHCI specification 1.1, remote wakeup is
optional for XHCI 1.0 and required for XHCI 1.1. This patch provides
ability for software to disable RWE for USB2 in XHCI1.0 controller.
>
>>- usb3-lpm-capable: determines if platform is USB3 LPM capable
>>
>>  Example:



-- 

Thang Q. Nguyen| Staff SW Eng.

C: +849.7684.7606 | O: +848.3770.0640

F: +848.3770.0641  | tqngu...@apm.com


Re: usb:xhci: support disable usb2 LPM Remote Wakeup

2016-12-11 Thread Thang Q. Nguyen
On Sat, Dec 10, 2016 at 4:36 AM, Rob Herring  wrote:
> On Sun, Dec 04, 2016 at 07:42:01PM +0700, Thang Q. Nguyen wrote:
>> From: Thang Nguyen 
>>
>> As per USB 2.0 link power management addendum ECN, table 1-2, page 4,
>> device or host initiated via resume signaling; device-initiated resumes
>> can be optionally enabled/disabled by software. This patch adds support
>> to control enabling the USB2 RWE feature via DT/ACPI attribute.
>>
>> Signed-off-by: Vu Nguyen 
>> Signed-off-by: Thang Nguyen 
>> ---
>>  Documentation/devicetree/bindings/usb/usb-xhci.txt | 1 +
>>  drivers/usb/host/xhci-plat.c   | 3 +++
>>  drivers/usb/host/xhci.c| 5 -
>>  drivers/usb/host/xhci.h| 1 +
>>  4 files changed, 9 insertions(+), 1 deletion(-)
>>
>> diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt 
>> b/Documentation/devicetree/bindings/usb/usb-xhci.txt
>> index 966885c..9b4cd14 100644
>> --- a/Documentation/devicetree/bindings/usb/usb-xhci.txt
>> +++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt
>> @@ -25,6 +25,7 @@ Required properties:
>>
>>  Optional properties:
>>- clocks: reference to a clock
>> +  - usb2-rwe-disable: disable USB2 LPM Remote Wakeup capable
>
> Remote wakeup has been around since USB 1.0 days. Does this need to be
> USB2 or XHCI specific?
This is XHCI specific. Per XHCI specification 1.1, remote wakeup is
optional for XHCI 1.0 and required for XHCI 1.1. This patch provides
ability for software to disable RWE for USB2 in XHCI1.0 controller.
>
>>- usb3-lpm-capable: determines if platform is USB3 LPM capable
>>
>>  Example:



-- 

Thang Q. Nguyen| Staff SW Eng.

C: +849.7684.7606 | O: +848.3770.0640

F: +848.3770.0641  | tqngu...@apm.com


Re: [PATCH] x86/kbuild: enable modversions for symbols exported from asm

2016-12-11 Thread Nicholas Piggin
On Sat, 10 Dec 2016 13:41:03 +0100
Greg Kroah-Hartman  wrote:

> On Fri, Dec 09, 2016 at 11:46:54PM +0100, Dodji Seketeli wrote:
> > Hello,
> > 
> > Nicholas Piggin  a écrit:
> > 
> > [...]
> >   
> > > That said, a dwarf based checker tool should be able to do as good a job
> > > (maybe a bit better because report is very informative and it may pick up
> > > compiler alignments or padding options).  
> > 
> > So, Nicholas was kind enough to send me the two Linux Kernel binaries
> > that he built with the tiny little interface change that we were
> > discussing earlier.  Here is what the abidiff[1] tools says about that
> > interface change:
> > 
> > $ time ~/git/libabigail/kabidiff/build/tools/abidiff vmlinux.abi1.abi 
> > vmlinux.abi2.abi
> > Functions changes summary: 0 Removed, 1 Changed, 0 Added function
> > Variables changes summary: 0 Removed, 0 Changed, 0 Added variable
> > 
> > 1 function with some indirect sub-type change:
> > 
> >   [C]'function int foo(blah*)' at memory.c:82:1 has some indirect 
> > sub-type changes:
> > parameter 1 of type 'blah*' has sub-type changes:
> >   in pointed to type 'struct blah' at memory.c:78:1:
> > type size changed from 32 to 64 bits
> > 1 data member insertion:
> >   'int blah::y', at offset 0 (in bits) at memory.c:79:1
> > 1 data member change:
> >  'int blah::x' offset changed from 0 to 32 (in bits) (by +32 
> > bits)
> > 
> > 
> > 
> > real0m2.595s
> > user0m2.489s
> > sys 0m0.108s
> > $ 
> > 
> > I kept the timing information to give you an idea of the time it takes
> > on a non-optimized build of abidiff.
> > 
> > One could for instance want that types that are not defined in header
> > files be kept out of the change report.  In that case it's possible to
> > write a little suppression specification file like this one:
> > 
> > $ cat vmlinux.abignore 
> > [suppress_type]
> >   source_location_not_regexp = .*\\.h
> > $
> > 
> > You can then pass that suppression file to the tool:
> > 
> > $ ~/git/libabigail/kabidiff/build/tools/abidiff --suppr 
> > vmlinux.abignore vmlinux.abi1.abi vmlinux.abi2.abi
> > Functions changes summary: 0 Removed, 0 Changed (1 filtered out), 0 
> > Added function
> > Variables changes summary: 0 Removed, 0 Changed, 0 Added variable
> > 
> > 
> > real0m2.574s
> > user0m2.473s
> > sys 0m0.102s
> > $
> > 
> > So this is the kind of interface change analysis tool we are working on
> > at the moment.
> > 
> > One could also imagine a tool that would compute a CRC that takes the
> > very same suppression specification files into account, letting people
> > to decide that some interface changes are OK.  That CRC would thus be
> > added to the special ELF sections we already have today.  We could keep
> > the modversion machinery, but with a greater dose of flexibility.
> > Whenever modversion detects a change, abidiff would tell people what the
> > change is exactly.
> > 
> > What do you guys think?  
> 
> YES YES YES!!!
> 
> Now I don't work on a distro anymore, but I would think that something
> like this would be really useful, pointing out exactly what changed is
> very important for distro maintainers to determine what they want to do
> (either fix up the abi change with strange hacks, or ignore it due to
> the change being in an area they don't care at all about, i.e. a random
> driver subsystem.)
> 
> So yes, I think this is really good stuff.  But if the distro
> maintainers correct me and think it's useless, then I need to revisit my
> view of exactly what they do for their customers :)

Agree completely. BTW (for those who might be looking into these tools),
we also have https://github.com/skozina/kabi-dw that Stanislav (cc'ed)
mentioned earlier.

It's true that the current modversions __crc_ matching infrastructure is
"just" a symbol versioning system, and we could keep it and just populate
it with something other than genksyms (e.g., a symbol version list provided
by distros). But the starting point should be *no* versioning and simply
using names to break linkage. Unless there's a compelling reason not to,
symbols are simpler, easier, everyone knows how they work.

The other question would be whether to pull a minimal tool into the kernel
source or keep them out of tree (but possibly add some helper scripts etc).
I guess we'll need to see what distros want.

Thanks,
Nick


Re: [PATCH] x86/kbuild: enable modversions for symbols exported from asm

2016-12-11 Thread Nicholas Piggin
On Sat, 10 Dec 2016 13:41:03 +0100
Greg Kroah-Hartman  wrote:

> On Fri, Dec 09, 2016 at 11:46:54PM +0100, Dodji Seketeli wrote:
> > Hello,
> > 
> > Nicholas Piggin  a écrit:
> > 
> > [...]
> >   
> > > That said, a dwarf based checker tool should be able to do as good a job
> > > (maybe a bit better because report is very informative and it may pick up
> > > compiler alignments or padding options).  
> > 
> > So, Nicholas was kind enough to send me the two Linux Kernel binaries
> > that he built with the tiny little interface change that we were
> > discussing earlier.  Here is what the abidiff[1] tools says about that
> > interface change:
> > 
> > $ time ~/git/libabigail/kabidiff/build/tools/abidiff vmlinux.abi1.abi 
> > vmlinux.abi2.abi
> > Functions changes summary: 0 Removed, 1 Changed, 0 Added function
> > Variables changes summary: 0 Removed, 0 Changed, 0 Added variable
> > 
> > 1 function with some indirect sub-type change:
> > 
> >   [C]'function int foo(blah*)' at memory.c:82:1 has some indirect 
> > sub-type changes:
> > parameter 1 of type 'blah*' has sub-type changes:
> >   in pointed to type 'struct blah' at memory.c:78:1:
> > type size changed from 32 to 64 bits
> > 1 data member insertion:
> >   'int blah::y', at offset 0 (in bits) at memory.c:79:1
> > 1 data member change:
> >  'int blah::x' offset changed from 0 to 32 (in bits) (by +32 
> > bits)
> > 
> > 
> > 
> > real0m2.595s
> > user0m2.489s
> > sys 0m0.108s
> > $ 
> > 
> > I kept the timing information to give you an idea of the time it takes
> > on a non-optimized build of abidiff.
> > 
> > One could for instance want that types that are not defined in header
> > files be kept out of the change report.  In that case it's possible to
> > write a little suppression specification file like this one:
> > 
> > $ cat vmlinux.abignore 
> > [suppress_type]
> >   source_location_not_regexp = .*\\.h
> > $
> > 
> > You can then pass that suppression file to the tool:
> > 
> > $ ~/git/libabigail/kabidiff/build/tools/abidiff --suppr 
> > vmlinux.abignore vmlinux.abi1.abi vmlinux.abi2.abi
> > Functions changes summary: 0 Removed, 0 Changed (1 filtered out), 0 
> > Added function
> > Variables changes summary: 0 Removed, 0 Changed, 0 Added variable
> > 
> > 
> > real0m2.574s
> > user0m2.473s
> > sys 0m0.102s
> > $
> > 
> > So this is the kind of interface change analysis tool we are working on
> > at the moment.
> > 
> > One could also imagine a tool that would compute a CRC that takes the
> > very same suppression specification files into account, letting people
> > to decide that some interface changes are OK.  That CRC would thus be
> > added to the special ELF sections we already have today.  We could keep
> > the modversion machinery, but with a greater dose of flexibility.
> > Whenever modversion detects a change, abidiff would tell people what the
> > change is exactly.
> > 
> > What do you guys think?  
> 
> YES YES YES!!!
> 
> Now I don't work on a distro anymore, but I would think that something
> like this would be really useful, pointing out exactly what changed is
> very important for distro maintainers to determine what they want to do
> (either fix up the abi change with strange hacks, or ignore it due to
> the change being in an area they don't care at all about, i.e. a random
> driver subsystem.)
> 
> So yes, I think this is really good stuff.  But if the distro
> maintainers correct me and think it's useless, then I need to revisit my
> view of exactly what they do for their customers :)

Agree completely. BTW (for those who might be looking into these tools),
we also have https://github.com/skozina/kabi-dw that Stanislav (cc'ed)
mentioned earlier.

It's true that the current modversions __crc_ matching infrastructure is
"just" a symbol versioning system, and we could keep it and just populate
it with something other than genksyms (e.g., a symbol version list provided
by distros). But the starting point should be *no* versioning and simply
using names to break linkage. Unless there's a compelling reason not to,
symbols are simpler, easier, everyone knows how they work.

The other question would be whether to pull a minimal tool into the kernel
source or keep them out of tree (but possibly add some helper scripts etc).
I guess we'll need to see what distros want.

Thanks,
Nick


[PATCH v2] siphash: add cryptographically secure hashtable function

2016-12-11 Thread Jason A. Donenfeld
SipHash is a 64-bit keyed hash function that is actually a
cryptographically secure PRF, like HMAC. Except SipHash is super fast,
and is meant to be used as a hashtable keyed lookup function.

SipHash isn't just some new trendy hash function. It's been around for a
while, and there really isn't anything that comes remotely close to
being useful in the way SipHash is. With that said, why do we need this?

There are a variety of attacks known as "hashtable poisoning" in which an
attacker forms some data such that the hash of that data will be the
same, and then preceeds to fill up all entries of a hashbucket. This is
a realistic and well-known denial-of-service vector.

Linux developers already seem to be aware that this is an issue, and
various places that use hash tables in, say, a network context, use a
non-cryptographically secure function (usually jhash) and then try to
twiddle with the key on a time basis (or in many cases just do nothing
and hope that nobody notices). While this is an admirable attempt at
solving the problem, it doesn't actually fix it. SipHash fixes it.

(It fixes it in such a sound way that you could even build a stream
cipher out of SipHash that would resist the modern cryptanalysis.)

There are a modicum of places in the kernel that are vulnerable to
hashtable poisoning attacks, either via userspace vectors or network
vectors, and there's not a reliable mechanism inside the kernel at the
moment to fix it. The first step toward fixing these issues is actually
getting a secure primitive into the kernel for developers to use. Then
we can, bit by bit, port things over to it as deemed appropriate.

Dozens of languages are already using this internally for their hash
tables. Some of the BSDs already use this in their kernels. SipHash is
a widely known high-speed solution to a widely known problem, and it's
time we catch-up.

Signed-off-by: Jason A. Donenfeld 
Cc: Jean-Philippe Aumasson 
Cc: Daniel J. Bernstein 
---
 include/linux/siphash.h |  20 +
 lib/Makefile|   5 ++-
 lib/siphash.c   |  72 ++
 lib/test_siphash.c  | 116 
 4 files changed, 211 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/siphash.h
 create mode 100644 lib/siphash.c
 create mode 100644 lib/test_siphash.c

diff --git a/include/linux/siphash.h b/include/linux/siphash.h
new file mode 100644
index ..6623b3090645
--- /dev/null
+++ b/include/linux/siphash.h
@@ -0,0 +1,20 @@
+/* Copyright (C) 2016 Jason A. Donenfeld 
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ */
+
+#ifndef _LINUX_SIPHASH_H
+#define _LINUX_SIPHASH_H
+
+#include 
+
+enum siphash_lengths {
+   SIPHASH24_KEY_LEN = 16
+};
+
+u64 siphash24(const u8 *data, size_t len, const u8 key[SIPHASH24_KEY_LEN]);
+
+#endif /* _LINUX_SIPHASH_H */
diff --git a/lib/Makefile b/lib/Makefile
index 50144a3aeebd..71d398b04a74 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,7 +22,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
 flex_proportions.o ratelimit.o show_mem.o \
 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
-earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
+earlycpio.o seq_buf.o siphash.o \
+nmi_backtrace.o nodemask.o win_minmax.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
@@ -44,7 +45,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_BPF) += test_bpf.o
 obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
-obj-$(CONFIG_TEST_HASH) += test_hash.o
+obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
 obj-$(CONFIG_TEST_KASAN) += test_kasan.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
diff --git a/lib/siphash.c b/lib/siphash.c
new file mode 100644
index ..e78dc36d19b9
--- /dev/null
+++ b/lib/siphash.c
@@ -0,0 +1,72 @@
+/* Copyright (C) 2015-2016 Jason A. Donenfeld 
+ * Copyright (C) 2012-2014 Jean-Philippe Aumasson 

+ * Copyright (C) 2012-2014 Daniel J. Bernstein 
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ */
+
+#include 
+#include 
+
+static inline u64 le64_to_cpuvp(const void *p)
+{
+   return le64_to_cpup(p);
+}
+
+#define SIPROUND \
+   do { \
+   v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
+   v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
+   v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
+   v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
+   } while(0)
+
+u64 siphash24(const u8 *data, size_t len, const u8 

[PATCH v2] siphash: add cryptographically secure hashtable function

2016-12-11 Thread Jason A. Donenfeld
SipHash is a 64-bit keyed hash function that is actually a
cryptographically secure PRF, like HMAC. Except SipHash is super fast,
and is meant to be used as a hashtable keyed lookup function.

SipHash isn't just some new trendy hash function. It's been around for a
while, and there really isn't anything that comes remotely close to
being useful in the way SipHash is. With that said, why do we need this?

There are a variety of attacks known as "hashtable poisoning" in which an
attacker forms some data such that the hash of that data will be the
same, and then preceeds to fill up all entries of a hashbucket. This is
a realistic and well-known denial-of-service vector.

Linux developers already seem to be aware that this is an issue, and
various places that use hash tables in, say, a network context, use a
non-cryptographically secure function (usually jhash) and then try to
twiddle with the key on a time basis (or in many cases just do nothing
and hope that nobody notices). While this is an admirable attempt at
solving the problem, it doesn't actually fix it. SipHash fixes it.

(It fixes it in such a sound way that you could even build a stream
cipher out of SipHash that would resist the modern cryptanalysis.)

There are a modicum of places in the kernel that are vulnerable to
hashtable poisoning attacks, either via userspace vectors or network
vectors, and there's not a reliable mechanism inside the kernel at the
moment to fix it. The first step toward fixing these issues is actually
getting a secure primitive into the kernel for developers to use. Then
we can, bit by bit, port things over to it as deemed appropriate.

Dozens of languages are already using this internally for their hash
tables. Some of the BSDs already use this in their kernels. SipHash is
a widely known high-speed solution to a widely known problem, and it's
time we catch-up.

Signed-off-by: Jason A. Donenfeld 
Cc: Jean-Philippe Aumasson 
Cc: Daniel J. Bernstein 
---
 include/linux/siphash.h |  20 +
 lib/Makefile|   5 ++-
 lib/siphash.c   |  72 ++
 lib/test_siphash.c  | 116 
 4 files changed, 211 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/siphash.h
 create mode 100644 lib/siphash.c
 create mode 100644 lib/test_siphash.c

diff --git a/include/linux/siphash.h b/include/linux/siphash.h
new file mode 100644
index ..6623b3090645
--- /dev/null
+++ b/include/linux/siphash.h
@@ -0,0 +1,20 @@
+/* Copyright (C) 2016 Jason A. Donenfeld 
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ */
+
+#ifndef _LINUX_SIPHASH_H
+#define _LINUX_SIPHASH_H
+
+#include 
+
+enum siphash_lengths {
+   SIPHASH24_KEY_LEN = 16
+};
+
+u64 siphash24(const u8 *data, size_t len, const u8 key[SIPHASH24_KEY_LEN]);
+
+#endif /* _LINUX_SIPHASH_H */
diff --git a/lib/Makefile b/lib/Makefile
index 50144a3aeebd..71d398b04a74 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,7 +22,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
 flex_proportions.o ratelimit.o show_mem.o \
 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
-earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
+earlycpio.o seq_buf.o siphash.o \
+nmi_backtrace.o nodemask.o win_minmax.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
@@ -44,7 +45,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_BPF) += test_bpf.o
 obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
-obj-$(CONFIG_TEST_HASH) += test_hash.o
+obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
 obj-$(CONFIG_TEST_KASAN) += test_kasan.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
diff --git a/lib/siphash.c b/lib/siphash.c
new file mode 100644
index ..e78dc36d19b9
--- /dev/null
+++ b/lib/siphash.c
@@ -0,0 +1,72 @@
+/* Copyright (C) 2015-2016 Jason A. Donenfeld 
+ * Copyright (C) 2012-2014 Jean-Philippe Aumasson 

+ * Copyright (C) 2012-2014 Daniel J. Bernstein 
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ */
+
+#include 
+#include 
+
+static inline u64 le64_to_cpuvp(const void *p)
+{
+   return le64_to_cpup(p);
+}
+
+#define SIPROUND \
+   do { \
+   v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
+   v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
+   v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
+   v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
+   } while(0)
+
+u64 siphash24(const u8 *data, size_t len, const u8 key[SIPHASH24_KEY_LEN])
+{
+   u64 v0 = 0x736f6d6570736575ULL;
+   u64 v1 = 0x646f72616e646f6dULL;
+   u64 v2 = 0x6c7967656e657261ULL;
+ 

Re: [PATCH] Fix multiple definition error under lto

2016-12-11 Thread Zhang Rui
On Sat, 2016-11-26 at 17:25 -0500, Peter Foley wrote:
> drivers/thermal/built-in.o: In function `type_show.lto_priv.33':
> (.text+0x3d80): multiple definition of `type_show.lto_priv.33'
> drivers/base/built-in.o:(.text+0x2a40): first defined here
> 
can you illustrate how to reproduce this problem?

thanks,
rui
> Signed-off-by: Peter Foley 
> ---
>  drivers/thermal/thermal_core.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/thermal/thermal_core.c
> b/drivers/thermal/thermal_core.c
> index 226b0b4aced6..23ec1dd2ff3b 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -643,7 +643,7 @@ static void thermal_zone_device_check(struct
> work_struct *work)
>   container_of(_dev, struct thermal_zone_device, device)
>  
>  static ssize_t
> -type_show(struct device *dev, struct device_attribute *attr, char
> *buf)
> +thermal_type_show(struct device *dev, struct device_attribute *attr,
> char *buf)
>  {
>   struct thermal_zone_device *tz = to_thermal_zone(dev);
>  
> @@ -1159,7 +1159,7 @@ int power_actor_set_power(struct
> thermal_cooling_device *cdev,
>   return 0;
>  }
>  
> -static DEVICE_ATTR(type, 0444, type_show, NULL);
> +static DEVICE_ATTR(type, 0444, thermal_type_show, NULL);
>  static DEVICE_ATTR(temp, 0444, temp_show, NULL);
>  static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
>  static DEVICE_ATTR(passive, S_IRUGO | S_IWUSR, passive_show,
> passive_store);


Re: [PATCH] Fix multiple definition error under lto

2016-12-11 Thread Zhang Rui
On Sat, 2016-11-26 at 17:25 -0500, Peter Foley wrote:
> drivers/thermal/built-in.o: In function `type_show.lto_priv.33':
> (.text+0x3d80): multiple definition of `type_show.lto_priv.33'
> drivers/base/built-in.o:(.text+0x2a40): first defined here
> 
can you illustrate how to reproduce this problem?

thanks,
rui
> Signed-off-by: Peter Foley 
> ---
>  drivers/thermal/thermal_core.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/thermal/thermal_core.c
> b/drivers/thermal/thermal_core.c
> index 226b0b4aced6..23ec1dd2ff3b 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -643,7 +643,7 @@ static void thermal_zone_device_check(struct
> work_struct *work)
>   container_of(_dev, struct thermal_zone_device, device)
>  
>  static ssize_t
> -type_show(struct device *dev, struct device_attribute *attr, char
> *buf)
> +thermal_type_show(struct device *dev, struct device_attribute *attr,
> char *buf)
>  {
>   struct thermal_zone_device *tz = to_thermal_zone(dev);
>  
> @@ -1159,7 +1159,7 @@ int power_actor_set_power(struct
> thermal_cooling_device *cdev,
>   return 0;
>  }
>  
> -static DEVICE_ATTR(type, 0444, type_show, NULL);
> +static DEVICE_ATTR(type, 0444, thermal_type_show, NULL);
>  static DEVICE_ATTR(temp, 0444, temp_show, NULL);
>  static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
>  static DEVICE_ATTR(passive, S_IRUGO | S_IWUSR, passive_show,
> passive_store);


Re: [Ocfs2-devel] [PATCH] ocfs2: fix crash caused by stale lvb with fsdlm plugin

2016-12-11 Thread Eric Ren

Hi Gang,

On 12/12/2016 10:56 AM, Gang He wrote:

Hi Eric,

Looks good for me.
Just one suggestion,
please monitor if the LVB sharing mechanism in the cluster still works well in 
the normal scenario,
to avoid any performance decrease regression problem.

Thanks for your review. I have done the testing as you suggested, and it works
as expected.

Thanks,
Eric


Reviewed-by: Gang He 

Thanks
Gang



The crash happens rather often when we reset some cluster
nodes while nodes contend fiercely to do truncate and append.

The crash backtrace is below:
"
[  245.197849] dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover_grant 1
locks on 971 resources
[  245.197859] dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover 9
generation 5 done: 4 ms
[  245.198379] ocfs2: Begin replay journal (node 318952601, slot 2) on
device (253,18)
[  247.272338] ocfs2: End replay journal (node 318952601, slot 2) on device
(253,18)
[  247.547084] ocfs2: Beginning quota recovery on device (253,18) for slot 2
[  247.683263] ocfs2: Finishing quota recovery on device (253,18) for slot 2
[  247.833022] (truncate,30154,1):ocfs2_truncate_file:470 ERROR: bug
expression: le64_to_cpu(fe->i_size) != i_size_read(inode)
[  247.833029] (truncate,30154,1):ocfs2_truncate_file:470 ERROR: Inode
290321, inode i_size = 732 != di i_size = 937, i_flags = 0x1
[  247.833074] [ cut here ]
[  247.833077] kernel BUG at /usr/src/linux/fs/ocfs2/file.c:470!
[  247.833079] invalid opcode:  [#1] SMP
[  247.833081] Modules linked in: ocfs2_stack_user(OEN) ocfs2(OEN)
ocfs2_nodemanager ocfs2_stackglue(OEN) quota_tree dlm(OEN) configfs fuse
sd_modiscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi af_packet
iscsi_ibft iscsi_boot_sysfs softdog xfs libcrc32c ppdev parport_pc pcspkr
parport  joydev virtio_balloon virtio_net i2c_piix4 acpi_cpufreq button
processor ext4 crc16 jbd2 mbcache ata_generic cirrus virtio_blk ata_piix
  drm_kms_helper ahci syscopyarea libahci sysfillrect sysimgblt
fb_sys_fops ttm floppy libata drm virtio_pci virtio_ring uhci_hcd virtio
ehci_hcd   usbcore serio_raw usb_common sg dm_multipath dm_mod
scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4
[  247.833107] Supported: No, Unsupported modules are loaded
[  247.833110] CPU: 1 PID: 30154 Comm: truncate Tainted: G   OE   N
4.4.21-69-default #1
[  247.833111] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
rel-1.8.1-0-g4adadbd-20151112_172657-sheep25 04/01/2014
[  247.833112] task: 88004ff6d240 ti: 880074e68000 task.ti:
880074e68000
[  247.833113] RIP: 0010:[]  []
ocfs2_truncate_file+0x640/0x6c0 [ocfs2]
[  247.833151] RSP: 0018:880074e6bd50  EFLAGS: 00010282
[  247.833152] RAX: 0074 RBX: 029e RCX:

[  247.833153] RDX: 0001 RSI: 0246 RDI:
0246
[  247.833154] RBP: 880074e6bda8 R08: 3675dc7a R09:
82013414
[  247.833155] R10: 00034c50 R11:  R12:
88003aab3448
[  247.833156] R13: 02dc R14: 00046e11 R15:
0020
[  247.833157] FS:  7f839f965700() GS:88007fc8()
knlGS:
[  247.833158] CS:  0010 DS:  ES:  CR0: 8005003b
[  247.833159] CR2: 7f839f97e000 CR3: 36723000 CR4:
06e0
[  247.833164] Stack:
[  247.833165]  03a9 0001 880060554000
88004fcaf000
[  247.833167]  88003aa7b090 1000 88003aab3448
880074e6beb0
[  247.833169]  0001 2068 0020

[  247.833171] Call Trace:
[  247.833208]  [] ocfs2_setattr+0x698/0xa90 [ocfs2]
[  247.833225]  [] notify_change+0x1ae/0x380
[  247.833242]  [] do_truncate+0x5e/0x90
[  247.833246]  [] do_sys_ftruncate.constprop.11+0x108/0x160
[  247.833257]  [] entry_SYSCALL_64_fastpath+0x12/0x6d
[  247.834724] DWARF2 unwinder stuck at entry_SYSCALL_64_fastpath+0x12/0x6d
[  247.834725]
[  247.834726] Leftover inexact backtrace:

[  247.834728] Code: 24 28 ba d6 01 00 00 48 c7 c6 30 43 62 a0 8b 41 2c 89
44 24 08 48 8b 41 20 48 c7 c1 78 a3 62 a0 48 89 04 24 31 c0 e8 a0 97 f9 ff <0f>
0b 3d 00 fe ff ff 0f 84 ab fd ff ff 83 f8 fc 0f 84 a2 fd ff
[  247.834748] RIP  [] ocfs2_truncate_file+0x640/0x6c0
[ocfs2]
[  247.834774]  RSP 
"

It's because ocfs2_inode_lock() get us stale LVB in which the i_size is not
equal to the disk i_size. We mistakenly trust the LVB because the
underlaying
fsdlm dlm_lock() doesn't set lkb_sbflags with DLM_SBF_VALNOTVALID properly
for
us. But, why?

The current code tries to downconvert lock without DLM_LKF_VALBLK
flag to tell o2cb don't update RSB's LVB if it's a PR->NULL conversion,
even if the lock resource type needs LVB. This is not the right way for
fsdlm.

The fsdlm plugin behaves different on DLM_LKF_VALBLK, it depends on
DLM_LKF_VALBLK to decide if we care about the LVB in the LKB. If
DLM_LKF_VALBLK
is not set, fsdlm will skip 

Re: [Ocfs2-devel] [PATCH] ocfs2: fix crash caused by stale lvb with fsdlm plugin

2016-12-11 Thread Eric Ren

Hi Gang,

On 12/12/2016 10:56 AM, Gang He wrote:

Hi Eric,

Looks good for me.
Just one suggestion,
please monitor if the LVB sharing mechanism in the cluster still works well in 
the normal scenario,
to avoid any performance decrease regression problem.

Thanks for your review. I have done the testing as you suggested, and it works
as expected.

Thanks,
Eric


Reviewed-by: Gang He 

Thanks
Gang



The crash happens rather often when we reset some cluster
nodes while nodes contend fiercely to do truncate and append.

The crash backtrace is below:
"
[  245.197849] dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover_grant 1
locks on 971 resources
[  245.197859] dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover 9
generation 5 done: 4 ms
[  245.198379] ocfs2: Begin replay journal (node 318952601, slot 2) on
device (253,18)
[  247.272338] ocfs2: End replay journal (node 318952601, slot 2) on device
(253,18)
[  247.547084] ocfs2: Beginning quota recovery on device (253,18) for slot 2
[  247.683263] ocfs2: Finishing quota recovery on device (253,18) for slot 2
[  247.833022] (truncate,30154,1):ocfs2_truncate_file:470 ERROR: bug
expression: le64_to_cpu(fe->i_size) != i_size_read(inode)
[  247.833029] (truncate,30154,1):ocfs2_truncate_file:470 ERROR: Inode
290321, inode i_size = 732 != di i_size = 937, i_flags = 0x1
[  247.833074] [ cut here ]
[  247.833077] kernel BUG at /usr/src/linux/fs/ocfs2/file.c:470!
[  247.833079] invalid opcode:  [#1] SMP
[  247.833081] Modules linked in: ocfs2_stack_user(OEN) ocfs2(OEN)
ocfs2_nodemanager ocfs2_stackglue(OEN) quota_tree dlm(OEN) configfs fuse
sd_modiscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi af_packet
iscsi_ibft iscsi_boot_sysfs softdog xfs libcrc32c ppdev parport_pc pcspkr
parport  joydev virtio_balloon virtio_net i2c_piix4 acpi_cpufreq button
processor ext4 crc16 jbd2 mbcache ata_generic cirrus virtio_blk ata_piix
  drm_kms_helper ahci syscopyarea libahci sysfillrect sysimgblt
fb_sys_fops ttm floppy libata drm virtio_pci virtio_ring uhci_hcd virtio
ehci_hcd   usbcore serio_raw usb_common sg dm_multipath dm_mod
scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4
[  247.833107] Supported: No, Unsupported modules are loaded
[  247.833110] CPU: 1 PID: 30154 Comm: truncate Tainted: G   OE   N
4.4.21-69-default #1
[  247.833111] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
rel-1.8.1-0-g4adadbd-20151112_172657-sheep25 04/01/2014
[  247.833112] task: 88004ff6d240 ti: 880074e68000 task.ti:
880074e68000
[  247.833113] RIP: 0010:[]  []
ocfs2_truncate_file+0x640/0x6c0 [ocfs2]
[  247.833151] RSP: 0018:880074e6bd50  EFLAGS: 00010282
[  247.833152] RAX: 0074 RBX: 029e RCX:

[  247.833153] RDX: 0001 RSI: 0246 RDI:
0246
[  247.833154] RBP: 880074e6bda8 R08: 3675dc7a R09:
82013414
[  247.833155] R10: 00034c50 R11:  R12:
88003aab3448
[  247.833156] R13: 02dc R14: 00046e11 R15:
0020
[  247.833157] FS:  7f839f965700() GS:88007fc8()
knlGS:
[  247.833158] CS:  0010 DS:  ES:  CR0: 8005003b
[  247.833159] CR2: 7f839f97e000 CR3: 36723000 CR4:
06e0
[  247.833164] Stack:
[  247.833165]  03a9 0001 880060554000
88004fcaf000
[  247.833167]  88003aa7b090 1000 88003aab3448
880074e6beb0
[  247.833169]  0001 2068 0020

[  247.833171] Call Trace:
[  247.833208]  [] ocfs2_setattr+0x698/0xa90 [ocfs2]
[  247.833225]  [] notify_change+0x1ae/0x380
[  247.833242]  [] do_truncate+0x5e/0x90
[  247.833246]  [] do_sys_ftruncate.constprop.11+0x108/0x160
[  247.833257]  [] entry_SYSCALL_64_fastpath+0x12/0x6d
[  247.834724] DWARF2 unwinder stuck at entry_SYSCALL_64_fastpath+0x12/0x6d
[  247.834725]
[  247.834726] Leftover inexact backtrace:

[  247.834728] Code: 24 28 ba d6 01 00 00 48 c7 c6 30 43 62 a0 8b 41 2c 89
44 24 08 48 8b 41 20 48 c7 c1 78 a3 62 a0 48 89 04 24 31 c0 e8 a0 97 f9 ff <0f>
0b 3d 00 fe ff ff 0f 84 ab fd ff ff 83 f8 fc 0f 84 a2 fd ff
[  247.834748] RIP  [] ocfs2_truncate_file+0x640/0x6c0
[ocfs2]
[  247.834774]  RSP 
"

It's because ocfs2_inode_lock() get us stale LVB in which the i_size is not
equal to the disk i_size. We mistakenly trust the LVB because the
underlaying
fsdlm dlm_lock() doesn't set lkb_sbflags with DLM_SBF_VALNOTVALID properly
for
us. But, why?

The current code tries to downconvert lock without DLM_LKF_VALBLK
flag to tell o2cb don't update RSB's LVB if it's a PR->NULL conversion,
even if the lock resource type needs LVB. This is not the right way for
fsdlm.

The fsdlm plugin behaves different on DLM_LKF_VALBLK, it depends on
DLM_LKF_VALBLK to decide if we care about the LVB in the LKB. If
DLM_LKF_VALBLK
is not set, fsdlm will skip recovering RSB's 

Re: [PATCH] arm64: mm: Fix NOMAP page initialization

2016-12-11 Thread Yisheng Xie
hi Robert,

On 2016/12/10 2:10, Robert Richter wrote:
> On ThunderX systems with certain memory configurations we see the
> following BUG_ON():
> 
>  kernel BUG at mm/page_alloc.c:1848!
> 
> This happens for some configs with 64k page size enabled. The BUG_ON()
> checks if start and end page of a memmap range belongs to the same
> zone.
> 
> The BUG_ON() check fails if a memory zone contains NOMAP regions. In
> this case the node information of those pages is not initialized. This
> causes an inconsistency of the page links with wrong zone and node
> information for that pages. NOMAP pages from node 1 still point to the
> mem zone from node 0 and have the wrong nid assigned.
> 
The patch can work for zone contains NOMAP regions.

However, if BIOS do not add WB/WT/WC attribute to a physical address range, the
is_memory(md) will return false and this range will not be added to memblock.
   efi_init
  -> reserve_regions
if (is_memory(md)) {
early_init_dt_add_memory_arch(paddr, size);

if (!is_usable_memory(md))
memblock_mark_nomap(paddr, size);
}

Then BUG_ON() check will also fails. Any idea about it?

Here is the crash log I got from D05:
crash log---
[0.00] Booting Linux on physical CPU 0x1
[0.00] Linux version 4.9.0-rc8+ (xys@linux-ibm) (gcc version 6.1.1 
20160711 (Linaro GCC 6.1-2016.08) ) #61 SMP Fri Dec 9 19:46:24 CST 2016
[0.00] Boot CPU: AArch64 Processor [410fd082]
[0.00] earlycon: pl11 at MMIO32 0x602b (options '')
[0.00] bootconsole [pl11] enabled
[0.00] efi: Getting EFI parameters from FDT:
[0.00] efi:   System Table: 0x3f150018
[0.00] efi:   MemMap Address: 0x31b33018
[0.00] efi:   MemMap Size: 0x09f0
[0.00] efi:   MemMap Desc. Size: 0x0030
[0.00] efi:   MemMap Desc. Version: 0x0001
[0.00] efi: EFI v2.60 by EDK II
[0.00] efi:  SMBIOS=0x3f13  SMBIOS 3.0=0x39ca  ACPI=0x39d7  
ACPI 2.0=0x39d70014  MEMATTR=0x3ce14018
[0.00] efi: Processing EFI memory map:
[0.00] MEMBLOCK configuration:
[0.00]  memory size = 0x0 reserved size = 0x1000
[0.00]  memory.cnt  = 0x1
[0.00]  memory[0x0] [0x00-0x], 0x0 
bytes on node 0 flags: 0x0
[0.00]  reserved.cnt  = 0x1
[0.00]  reserved[0x0]   [0x001e40-0x001e400fff], 0x1000 
bytes flags: 0x0
[0.00] efi:   0x-0x0007 [Conventional Memory|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x00-0x07] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x0008-0x016c [Loader Data|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x08-0x00016c] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x016d-0x1e3f [Conventional Memory|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x00016d-0x001e3f] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x1e40-0x1e40 [Loader Data|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x001e40-0x001e40] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x1e41-0x1e47 [Conventional Memory|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x001e41-0x001e47] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x1e48-0x1fff [Loader Data|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x001e48-0x001fff] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x2000-0x2fbf [Conventional Memory|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x002000-0x002fbf] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x2fc0-0x2fc1 [Boot Data  |   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x002fc0-0x002fc1] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x2fc2-0x3049cfff [Conventional Memory|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x002fc2-0x003049] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x3049d000-0x31b0 [Loader Code|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x003049-0x0031b0] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x31b1-0x31b2 [Runtime Data   |RUN|  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x0031b1-0x0031b2] flags 0x0 

Re: [PATCH] arm64: mm: Fix NOMAP page initialization

2016-12-11 Thread Yisheng Xie
hi Robert,

On 2016/12/10 2:10, Robert Richter wrote:
> On ThunderX systems with certain memory configurations we see the
> following BUG_ON():
> 
>  kernel BUG at mm/page_alloc.c:1848!
> 
> This happens for some configs with 64k page size enabled. The BUG_ON()
> checks if start and end page of a memmap range belongs to the same
> zone.
> 
> The BUG_ON() check fails if a memory zone contains NOMAP regions. In
> this case the node information of those pages is not initialized. This
> causes an inconsistency of the page links with wrong zone and node
> information for that pages. NOMAP pages from node 1 still point to the
> mem zone from node 0 and have the wrong nid assigned.
> 
The patch can work for zone contains NOMAP regions.

However, if BIOS do not add WB/WT/WC attribute to a physical address range, the
is_memory(md) will return false and this range will not be added to memblock.
   efi_init
  -> reserve_regions
if (is_memory(md)) {
early_init_dt_add_memory_arch(paddr, size);

if (!is_usable_memory(md))
memblock_mark_nomap(paddr, size);
}

Then BUG_ON() check will also fails. Any idea about it?

Here is the crash log I got from D05:
crash log---
[0.00] Booting Linux on physical CPU 0x1
[0.00] Linux version 4.9.0-rc8+ (xys@linux-ibm) (gcc version 6.1.1 
20160711 (Linaro GCC 6.1-2016.08) ) #61 SMP Fri Dec 9 19:46:24 CST 2016
[0.00] Boot CPU: AArch64 Processor [410fd082]
[0.00] earlycon: pl11 at MMIO32 0x602b (options '')
[0.00] bootconsole [pl11] enabled
[0.00] efi: Getting EFI parameters from FDT:
[0.00] efi:   System Table: 0x3f150018
[0.00] efi:   MemMap Address: 0x31b33018
[0.00] efi:   MemMap Size: 0x09f0
[0.00] efi:   MemMap Desc. Size: 0x0030
[0.00] efi:   MemMap Desc. Version: 0x0001
[0.00] efi: EFI v2.60 by EDK II
[0.00] efi:  SMBIOS=0x3f13  SMBIOS 3.0=0x39ca  ACPI=0x39d7  
ACPI 2.0=0x39d70014  MEMATTR=0x3ce14018
[0.00] efi: Processing EFI memory map:
[0.00] MEMBLOCK configuration:
[0.00]  memory size = 0x0 reserved size = 0x1000
[0.00]  memory.cnt  = 0x1
[0.00]  memory[0x0] [0x00-0x], 0x0 
bytes on node 0 flags: 0x0
[0.00]  reserved.cnt  = 0x1
[0.00]  reserved[0x0]   [0x001e40-0x001e400fff], 0x1000 
bytes flags: 0x0
[0.00] efi:   0x-0x0007 [Conventional Memory|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x00-0x07] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x0008-0x016c [Loader Data|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x08-0x00016c] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x016d-0x1e3f [Conventional Memory|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x00016d-0x001e3f] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x1e40-0x1e40 [Loader Data|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x001e40-0x001e40] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x1e41-0x1e47 [Conventional Memory|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x001e41-0x001e47] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x1e48-0x1fff [Loader Data|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x001e48-0x001fff] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x2000-0x2fbf [Conventional Memory|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x002000-0x002fbf] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x2fc0-0x2fc1 [Boot Data  |   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x002fc0-0x002fc1] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x2fc2-0x3049cfff [Conventional Memory|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x002fc2-0x003049] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x3049d000-0x31b0 [Loader Code|   |  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x003049-0x0031b0] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
[0.00] efi:   0x31b1-0x31b2 [Runtime Data   |RUN|  
|  |  |  |  |  |   |WB|WT|WC|UC]
[0.00] memblock_add: [0x0031b1-0x0031b2] flags 0x0 

[V2] mtd: devices: docg3:- Handle return value of devm_ioremap.

2016-12-11 Thread Arvind Yadav
Here, If devm_ioremap will fail. It will return NULL.
Kernel can run into a NULL-pointer dereference.

Signed-off-by: Arvind Yadav 
---
 drivers/mtd/devices/docg3.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
index b833e6c..ffe3db0 100644
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c
@@ -2083,9 +2083,14 @@ static int __init docg3_probe(struct platform_device 
*pdev)
dev_err(dev, "No I/O memory resource defined\n");
return ret;
}
-   base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
 
ret = -ENOMEM;
+   base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
+   if (!base) {
+   dev_err(dev, "failed to map I/O memory\n");
+   return ret;
+   }
+
cascade = devm_kzalloc(dev, sizeof(*cascade) * DOC_MAX_NBFLOORS,
   GFP_KERNEL);
if (!cascade)
-- 
2.7.4



[V2] mtd: devices: docg3:- Handle return value of devm_ioremap.

2016-12-11 Thread Arvind Yadav
Here, If devm_ioremap will fail. It will return NULL.
Kernel can run into a NULL-pointer dereference.

Signed-off-by: Arvind Yadav 
---
 drivers/mtd/devices/docg3.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
index b833e6c..ffe3db0 100644
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c
@@ -2083,9 +2083,14 @@ static int __init docg3_probe(struct platform_device 
*pdev)
dev_err(dev, "No I/O memory resource defined\n");
return ret;
}
-   base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
 
ret = -ENOMEM;
+   base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
+   if (!base) {
+   dev_err(dev, "failed to map I/O memory\n");
+   return ret;
+   }
+
cascade = devm_kzalloc(dev, sizeof(*cascade) * DOC_MAX_NBFLOORS,
   GFP_KERNEL);
if (!cascade)
-- 
2.7.4



Re: [V1] mtd: devices: docg3:- Handle return value of devm_ioremap.

2016-12-11 Thread arvind Yadav

Yes, We are returning -ENOMEM, ret is initialized to -ENOMEM.
As per your concern, I have added dev_err failure message.

Thanks
-Arvind

On Monday 12 December 2016 12:45 AM, Marek Vasut wrote:

On 12/11/2016 07:01 PM, Arvind Yadav wrote:

Here, If devm_ioremap will fail. It will return NULL.
Kernel can run into a NULL-pointer dereference.

Signed-off-by: Arvind Yadav 
---
  drivers/mtd/devices/docg3.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
index b833e6c..013b5b9 100644
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c
@@ -2083,9 +2083,12 @@ static int __init docg3_probe(struct platform_device 
*pdev)
dev_err(dev, "No I/O memory resource defined\n");
return ret;
}
-   base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
  
  	ret = -ENOMEM;

+   base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
+   if (!base)
+   return ret;

I think return -ENOMEM right away won't hurt here. Also, dev_err()
explaining the failure would be nice to add.

Thanks!


cascade = devm_kzalloc(dev, sizeof(*cascade) * DOC_MAX_NBFLOORS,
   GFP_KERNEL);
if (!cascade)







Re: [V1] mtd: devices: docg3:- Handle return value of devm_ioremap.

2016-12-11 Thread arvind Yadav

Yes, We are returning -ENOMEM, ret is initialized to -ENOMEM.
As per your concern, I have added dev_err failure message.

Thanks
-Arvind

On Monday 12 December 2016 12:45 AM, Marek Vasut wrote:

On 12/11/2016 07:01 PM, Arvind Yadav wrote:

Here, If devm_ioremap will fail. It will return NULL.
Kernel can run into a NULL-pointer dereference.

Signed-off-by: Arvind Yadav 
---
  drivers/mtd/devices/docg3.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
index b833e6c..013b5b9 100644
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c
@@ -2083,9 +2083,12 @@ static int __init docg3_probe(struct platform_device 
*pdev)
dev_err(dev, "No I/O memory resource defined\n");
return ret;
}
-   base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
  
  	ret = -ENOMEM;

+   base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
+   if (!base)
+   return ret;

I think return -ENOMEM right away won't hurt here. Also, dev_err()
explaining the failure would be nice to add.

Thanks!


cascade = devm_kzalloc(dev, sizeof(*cascade) * DOC_MAX_NBFLOORS,
   GFP_KERNEL);
if (!cascade)







Re: [PATCH] ocfs2: fix crash caused by stale lvb with fsdlm plugin

2016-12-11 Thread Gang He
Hi Eric,

Looks good for me.
Just one suggestion, 
please monitor if the LVB sharing mechanism in the cluster still works well in 
the normal scenario, 
to avoid any performance decrease regression problem.

Reviewed-by: Gang He 

Thanks
Gang


>>> 
> The crash happens rather often when we reset some cluster
> nodes while nodes contend fiercely to do truncate and append.
> 
> The crash backtrace is below:
> "
> [  245.197849] dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover_grant 1 
> locks on 971 resources
> [  245.197859] dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover 9 
> generation 5 done: 4 ms
> [  245.198379] ocfs2: Begin replay journal (node 318952601, slot 2) on 
> device (253,18)
> [  247.272338] ocfs2: End replay journal (node 318952601, slot 2) on device 
> (253,18)
> [  247.547084] ocfs2: Beginning quota recovery on device (253,18) for slot 2
> [  247.683263] ocfs2: Finishing quota recovery on device (253,18) for slot 2
> [  247.833022] (truncate,30154,1):ocfs2_truncate_file:470 ERROR: bug 
> expression: le64_to_cpu(fe->i_size) != i_size_read(inode)
> [  247.833029] (truncate,30154,1):ocfs2_truncate_file:470 ERROR: Inode 
> 290321, inode i_size = 732 != di i_size = 937, i_flags = 0x1
> [  247.833074] [ cut here ]
> [  247.833077] kernel BUG at /usr/src/linux/fs/ocfs2/file.c:470!
> [  247.833079] invalid opcode:  [#1] SMP
> [  247.833081] Modules linked in: ocfs2_stack_user(OEN) ocfs2(OEN) 
> ocfs2_nodemanager ocfs2_stackglue(OEN) quota_tree dlm(OEN) configfs fuse 
> sd_modiscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi af_packet 
> iscsi_ibft iscsi_boot_sysfs softdog xfs libcrc32c ppdev parport_pc pcspkr 
> parport  joydev virtio_balloon virtio_net i2c_piix4 acpi_cpufreq button 
> processor ext4 crc16 jbd2 mbcache ata_generic cirrus virtio_blk ata_piix  
>  drm_kms_helper ahci syscopyarea libahci sysfillrect sysimgblt 
> fb_sys_fops ttm floppy libata drm virtio_pci virtio_ring uhci_hcd virtio 
> ehci_hcd   usbcore serio_raw usb_common sg dm_multipath dm_mod 
> scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4
> [  247.833107] Supported: No, Unsupported modules are loaded
> [  247.833110] CPU: 1 PID: 30154 Comm: truncate Tainted: G   OE   N  
> 4.4.21-69-default #1
> [  247.833111] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> rel-1.8.1-0-g4adadbd-20151112_172657-sheep25 04/01/2014
> [  247.833112] task: 88004ff6d240 ti: 880074e68000 task.ti: 
> 880074e68000
> [  247.833113] RIP: 0010:[]  [] 
> ocfs2_truncate_file+0x640/0x6c0 [ocfs2]
> [  247.833151] RSP: 0018:880074e6bd50  EFLAGS: 00010282
> [  247.833152] RAX: 0074 RBX: 029e RCX: 
> 
> [  247.833153] RDX: 0001 RSI: 0246 RDI: 
> 0246
> [  247.833154] RBP: 880074e6bda8 R08: 3675dc7a R09: 
> 82013414
> [  247.833155] R10: 00034c50 R11:  R12: 
> 88003aab3448
> [  247.833156] R13: 02dc R14: 00046e11 R15: 
> 0020
> [  247.833157] FS:  7f839f965700() GS:88007fc8() 
> knlGS:
> [  247.833158] CS:  0010 DS:  ES:  CR0: 8005003b
> [  247.833159] CR2: 7f839f97e000 CR3: 36723000 CR4: 
> 06e0
> [  247.833164] Stack:
> [  247.833165]  03a9 0001 880060554000 
> 88004fcaf000
> [  247.833167]  88003aa7b090 1000 88003aab3448 
> 880074e6beb0
> [  247.833169]  0001 2068 0020 
> 
> [  247.833171] Call Trace:
> [  247.833208]  [] ocfs2_setattr+0x698/0xa90 [ocfs2]
> [  247.833225]  [] notify_change+0x1ae/0x380
> [  247.833242]  [] do_truncate+0x5e/0x90
> [  247.833246]  [] do_sys_ftruncate.constprop.11+0x108/0x160
> [  247.833257]  [] entry_SYSCALL_64_fastpath+0x12/0x6d
> [  247.834724] DWARF2 unwinder stuck at entry_SYSCALL_64_fastpath+0x12/0x6d
> [  247.834725]
> [  247.834726] Leftover inexact backtrace:
> 
> [  247.834728] Code: 24 28 ba d6 01 00 00 48 c7 c6 30 43 62 a0 8b 41 2c 89 
> 44 24 08 48 8b 41 20 48 c7 c1 78 a3 62 a0 48 89 04 24 31 c0 e8 a0 97 f9 ff 
> <0f> 
> 0b 3d 00 fe ff ff 0f 84 ab fd ff ff 83 f8 fc 0f 84 a2 fd ff
> [  247.834748] RIP  [] ocfs2_truncate_file+0x640/0x6c0 
> [ocfs2]
> [  247.834774]  RSP 
> "
> 
> It's because ocfs2_inode_lock() get us stale LVB in which the i_size is not
> equal to the disk i_size. We mistakenly trust the LVB because the 
> underlaying
> fsdlm dlm_lock() doesn't set lkb_sbflags with DLM_SBF_VALNOTVALID properly 
> for
> us. But, why?
> 
> The current code tries to downconvert lock without DLM_LKF_VALBLK
> flag to tell o2cb don't update RSB's LVB if it's a PR->NULL conversion,
> even if the lock resource type needs LVB. This is not the right way for 
> fsdlm.
> 
> The fsdlm plugin behaves different on DLM_LKF_VALBLK, it depends on
> DLM_LKF_VALBLK to decide if we 

Re: [PATCH] ocfs2: fix crash caused by stale lvb with fsdlm plugin

2016-12-11 Thread Gang He
Hi Eric,

Looks good for me.
Just one suggestion, 
please monitor if the LVB sharing mechanism in the cluster still works well in 
the normal scenario, 
to avoid any performance decrease regression problem.

Reviewed-by: Gang He 

Thanks
Gang


>>> 
> The crash happens rather often when we reset some cluster
> nodes while nodes contend fiercely to do truncate and append.
> 
> The crash backtrace is below:
> "
> [  245.197849] dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover_grant 1 
> locks on 971 resources
> [  245.197859] dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover 9 
> generation 5 done: 4 ms
> [  245.198379] ocfs2: Begin replay journal (node 318952601, slot 2) on 
> device (253,18)
> [  247.272338] ocfs2: End replay journal (node 318952601, slot 2) on device 
> (253,18)
> [  247.547084] ocfs2: Beginning quota recovery on device (253,18) for slot 2
> [  247.683263] ocfs2: Finishing quota recovery on device (253,18) for slot 2
> [  247.833022] (truncate,30154,1):ocfs2_truncate_file:470 ERROR: bug 
> expression: le64_to_cpu(fe->i_size) != i_size_read(inode)
> [  247.833029] (truncate,30154,1):ocfs2_truncate_file:470 ERROR: Inode 
> 290321, inode i_size = 732 != di i_size = 937, i_flags = 0x1
> [  247.833074] [ cut here ]
> [  247.833077] kernel BUG at /usr/src/linux/fs/ocfs2/file.c:470!
> [  247.833079] invalid opcode:  [#1] SMP
> [  247.833081] Modules linked in: ocfs2_stack_user(OEN) ocfs2(OEN) 
> ocfs2_nodemanager ocfs2_stackglue(OEN) quota_tree dlm(OEN) configfs fuse 
> sd_modiscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi af_packet 
> iscsi_ibft iscsi_boot_sysfs softdog xfs libcrc32c ppdev parport_pc pcspkr 
> parport  joydev virtio_balloon virtio_net i2c_piix4 acpi_cpufreq button 
> processor ext4 crc16 jbd2 mbcache ata_generic cirrus virtio_blk ata_piix  
>  drm_kms_helper ahci syscopyarea libahci sysfillrect sysimgblt 
> fb_sys_fops ttm floppy libata drm virtio_pci virtio_ring uhci_hcd virtio 
> ehci_hcd   usbcore serio_raw usb_common sg dm_multipath dm_mod 
> scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4
> [  247.833107] Supported: No, Unsupported modules are loaded
> [  247.833110] CPU: 1 PID: 30154 Comm: truncate Tainted: G   OE   N  
> 4.4.21-69-default #1
> [  247.833111] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> rel-1.8.1-0-g4adadbd-20151112_172657-sheep25 04/01/2014
> [  247.833112] task: 88004ff6d240 ti: 880074e68000 task.ti: 
> 880074e68000
> [  247.833113] RIP: 0010:[]  [] 
> ocfs2_truncate_file+0x640/0x6c0 [ocfs2]
> [  247.833151] RSP: 0018:880074e6bd50  EFLAGS: 00010282
> [  247.833152] RAX: 0074 RBX: 029e RCX: 
> 
> [  247.833153] RDX: 0001 RSI: 0246 RDI: 
> 0246
> [  247.833154] RBP: 880074e6bda8 R08: 3675dc7a R09: 
> 82013414
> [  247.833155] R10: 00034c50 R11:  R12: 
> 88003aab3448
> [  247.833156] R13: 02dc R14: 00046e11 R15: 
> 0020
> [  247.833157] FS:  7f839f965700() GS:88007fc8() 
> knlGS:
> [  247.833158] CS:  0010 DS:  ES:  CR0: 8005003b
> [  247.833159] CR2: 7f839f97e000 CR3: 36723000 CR4: 
> 06e0
> [  247.833164] Stack:
> [  247.833165]  03a9 0001 880060554000 
> 88004fcaf000
> [  247.833167]  88003aa7b090 1000 88003aab3448 
> 880074e6beb0
> [  247.833169]  0001 2068 0020 
> 
> [  247.833171] Call Trace:
> [  247.833208]  [] ocfs2_setattr+0x698/0xa90 [ocfs2]
> [  247.833225]  [] notify_change+0x1ae/0x380
> [  247.833242]  [] do_truncate+0x5e/0x90
> [  247.833246]  [] do_sys_ftruncate.constprop.11+0x108/0x160
> [  247.833257]  [] entry_SYSCALL_64_fastpath+0x12/0x6d
> [  247.834724] DWARF2 unwinder stuck at entry_SYSCALL_64_fastpath+0x12/0x6d
> [  247.834725]
> [  247.834726] Leftover inexact backtrace:
> 
> [  247.834728] Code: 24 28 ba d6 01 00 00 48 c7 c6 30 43 62 a0 8b 41 2c 89 
> 44 24 08 48 8b 41 20 48 c7 c1 78 a3 62 a0 48 89 04 24 31 c0 e8 a0 97 f9 ff 
> <0f> 
> 0b 3d 00 fe ff ff 0f 84 ab fd ff ff 83 f8 fc 0f 84 a2 fd ff
> [  247.834748] RIP  [] ocfs2_truncate_file+0x640/0x6c0 
> [ocfs2]
> [  247.834774]  RSP 
> "
> 
> It's because ocfs2_inode_lock() get us stale LVB in which the i_size is not
> equal to the disk i_size. We mistakenly trust the LVB because the 
> underlaying
> fsdlm dlm_lock() doesn't set lkb_sbflags with DLM_SBF_VALNOTVALID properly 
> for
> us. But, why?
> 
> The current code tries to downconvert lock without DLM_LKF_VALBLK
> flag to tell o2cb don't update RSB's LVB if it's a PR->NULL conversion,
> even if the lock resource type needs LVB. This is not the right way for 
> fsdlm.
> 
> The fsdlm plugin behaves different on DLM_LKF_VALBLK, it depends on
> DLM_LKF_VALBLK to decide if we care about the 

Re: [PATCH v2 1/1] lockd: Change nsm_use_hostnames from bool to u32

2016-12-11 Thread hejianet


Hi Xinhui
Thanks, it really works.
Will send out V3 soon afterwards

B.R.
Jia

On 12/12/16 1:43 AM, Pan Xinhui wrote:


hi, jia
nice catch!
However I think we should fix it totally.
This is because do_proc_dointvec_conv() try to get a int value from a bool *.

something like below might help. pls. ignore the code style and this is tested 
:)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index fc4084e..7eeaee4 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -519,6 +519,8 @@ EXPORT_SYMBOL_GPL(lockd_down);
  * Sysctl parameters (same as module parameters, different interface).
  */

+int proc_dou8vec(struct ctl_table *table, int write,
+void __user *buffer, size_t *lenp, loff_t 
*ppos);

 static struct ctl_table nlm_sysctls[] = {
{
.procname   = "nlm_grace_period",
@@ -561,7 +563,7 @@ static struct ctl_table nlm_sysctls[] = {
.data   = _use_hostnames,
.maxlen = sizeof(int),
.mode   = 0644,
-   .proc_handler   = proc_dointvec,
+   .proc_handler   = proc_dou8vec,
},
{
.procname   = "nsm_local_state",
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 706309f..6307737 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2112,6 +2112,30 @@ static int proc_put_char(void __user **buf, size_t 
*size, char c)

return 0;
 }

+
+static int do_proc_dou8vec_conv(bool *negp, unsigned long *lvalp,
+u8 *valp,
+int write, void *data)
+{
+   if (write) {
+   if (*negp) {
+   *valp = -*lvalp;
+   } else {
+   *valp = *lvalp;
+   }
+   } else {
+   int val = *valp;
+   if (val < 0) {
+   *negp = true;
+   *lvalp = -(unsigned long)val;
+   } else {
+   *negp = false;
+   *lvalp = (unsigned long)val;
+   }
+   }
+   return 0;
+}
+
 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
 int *valp,
 int write, void *data)
@@ -2296,6 +2320,14 @@ int proc_douintvec(struct ctl_table *table, int write,
do_proc_douintvec_conv, NULL);
 }

+int proc_dou8vec(struct ctl_table *table, int write,
+void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+   return do_proc_dointvec(table, write, buffer, lenp, ppos,
+   do_proc_dou8vec_conv, NULL);
+}
+
+

在 2016/12/11 23:36, Jia He 写道:

nsm_use_hostnames is a module paramter and it will be exported to sysctl
procfs. This is to let user sometimes change it from userspace. But the
minimal unit for sysctl procfs read/write it sizeof(int).
In big endian system, the converting from/to  bool to/from int will cause
error for proc items.

This patch changes the type definition of nsm_use_hostnames.

V2: Changes extern type in lockd.h
Signed-off-by: Jia He 
---
 fs/lockd/mon.c  | 2 +-
 fs/lockd/svc.c  | 2 +-
 include/linux/lockd/lockd.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 19166d4..3e7ff4d 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -57,7 +57,7 @@ static DEFINE_SPINLOCK(nsm_lock);
  * Local NSM state
  */
 u32__read_mostlynsm_local_state;
-bool__read_mostlynsm_use_hostnames;
+u32__read_mostlynsm_use_hostnames;

 static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
 {
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index fc4084e..308033d 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -658,7 +658,7 @@ module_param_call(nlm_udpport, param_set_port, 
param_get_int,

   _udpport, 0644);
 module_param_call(nlm_tcpport, param_set_port, param_get_int,
   _tcpport, 0644);
-module_param(nsm_use_hostnames, bool, 0644);
+module_param(nsm_use_hostnames, u32, 0644);
 module_param(nlm_max_connections, uint, 0644);

 static int lockd_init_net(struct net *net)
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c153738..db52152 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -196,7 +196,7 @@ extern struct svc_procedure nlmsvc_procedures4[];
 #endif
 extern intnlmsvc_grace_period;
 extern unsigned longnlmsvc_timeout;
-extern boolnsm_use_hostnames;
+extern u32nsm_use_hostnames;
 extern u32nsm_local_state;

 /*








Re: [PATCH v2 1/1] lockd: Change nsm_use_hostnames from bool to u32

2016-12-11 Thread hejianet


Hi Xinhui
Thanks, it really works.
Will send out V3 soon afterwards

B.R.
Jia

On 12/12/16 1:43 AM, Pan Xinhui wrote:


hi, jia
nice catch!
However I think we should fix it totally.
This is because do_proc_dointvec_conv() try to get a int value from a bool *.

something like below might help. pls. ignore the code style and this is tested 
:)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index fc4084e..7eeaee4 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -519,6 +519,8 @@ EXPORT_SYMBOL_GPL(lockd_down);
  * Sysctl parameters (same as module parameters, different interface).
  */

+int proc_dou8vec(struct ctl_table *table, int write,
+void __user *buffer, size_t *lenp, loff_t 
*ppos);

 static struct ctl_table nlm_sysctls[] = {
{
.procname   = "nlm_grace_period",
@@ -561,7 +563,7 @@ static struct ctl_table nlm_sysctls[] = {
.data   = _use_hostnames,
.maxlen = sizeof(int),
.mode   = 0644,
-   .proc_handler   = proc_dointvec,
+   .proc_handler   = proc_dou8vec,
},
{
.procname   = "nsm_local_state",
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 706309f..6307737 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2112,6 +2112,30 @@ static int proc_put_char(void __user **buf, size_t 
*size, char c)

return 0;
 }

+
+static int do_proc_dou8vec_conv(bool *negp, unsigned long *lvalp,
+u8 *valp,
+int write, void *data)
+{
+   if (write) {
+   if (*negp) {
+   *valp = -*lvalp;
+   } else {
+   *valp = *lvalp;
+   }
+   } else {
+   int val = *valp;
+   if (val < 0) {
+   *negp = true;
+   *lvalp = -(unsigned long)val;
+   } else {
+   *negp = false;
+   *lvalp = (unsigned long)val;
+   }
+   }
+   return 0;
+}
+
 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
 int *valp,
 int write, void *data)
@@ -2296,6 +2320,14 @@ int proc_douintvec(struct ctl_table *table, int write,
do_proc_douintvec_conv, NULL);
 }

+int proc_dou8vec(struct ctl_table *table, int write,
+void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+   return do_proc_dointvec(table, write, buffer, lenp, ppos,
+   do_proc_dou8vec_conv, NULL);
+}
+
+

在 2016/12/11 23:36, Jia He 写道:

nsm_use_hostnames is a module paramter and it will be exported to sysctl
procfs. This is to let user sometimes change it from userspace. But the
minimal unit for sysctl procfs read/write it sizeof(int).
In big endian system, the converting from/to  bool to/from int will cause
error for proc items.

This patch changes the type definition of nsm_use_hostnames.

V2: Changes extern type in lockd.h
Signed-off-by: Jia He 
---
 fs/lockd/mon.c  | 2 +-
 fs/lockd/svc.c  | 2 +-
 include/linux/lockd/lockd.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 19166d4..3e7ff4d 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -57,7 +57,7 @@ static DEFINE_SPINLOCK(nsm_lock);
  * Local NSM state
  */
 u32__read_mostlynsm_local_state;
-bool__read_mostlynsm_use_hostnames;
+u32__read_mostlynsm_use_hostnames;

 static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
 {
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index fc4084e..308033d 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -658,7 +658,7 @@ module_param_call(nlm_udpport, param_set_port, 
param_get_int,

   _udpport, 0644);
 module_param_call(nlm_tcpport, param_set_port, param_get_int,
   _tcpport, 0644);
-module_param(nsm_use_hostnames, bool, 0644);
+module_param(nsm_use_hostnames, u32, 0644);
 module_param(nlm_max_connections, uint, 0644);

 static int lockd_init_net(struct net *net)
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c153738..db52152 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -196,7 +196,7 @@ extern struct svc_procedure nlmsvc_procedures4[];
 #endif
 extern intnlmsvc_grace_period;
 extern unsigned longnlmsvc_timeout;
-extern boolnsm_use_hostnames;
+extern u32nsm_use_hostnames;
 extern u32nsm_local_state;

 /*








Re: [PATCH v2 2/2] x86/KASLR/64: Determine kernel text mapping size at runtime

2016-12-11 Thread Baoquan He
On 12/11/16 at 01:06pm, Borislav Petkov wrote:
> On Sun, Dec 11, 2016 at 06:58:29PM +0800, Baoquan He wrote:
> > For arguing and defending myself, I couldn't be very objective.
> 
> Yeah, it is mind-boggling the amount of bullshit you would come up with
> instead of simply saying, "no, I don't have a good reason and use case
> for my patch". It made me laugh, FWIW. Especially the bit about people
> getting naked - I had to go check we're still talking about the same
> thing.

Yes, I can't agree more, that use case is totally of bullshit. At the
very beginning, we all know that this patch is trying to fix the
inconsistency between kaslr codes not compiled in and code compiled in
but with "nokaslr" specified. In short, this patch is fixing an
inconsistency, no bug is reported yet. Here the inconsistency is the
reason for this patch. I think it has been made very clearly now. This
also has been pointed out by Kees when he offered his "Acked-by". I
welcome and treat all comments seriously, no other choices are given
to me.


If at the start, you said straightforwardly like:

"No bug, no fix!"

"A little inconsistency makes the world more exciting, it can make me
high."

or
"We can leave with it until a bug is reported, remaining 1G is no harm."

I can accept it totally and mute. But I didn't hear them. As an expert of
x86 arch and authority, you honor me to step in and give comments, I
have to reply with respect.

I am very glad to see you said you laughed at something, whatever it is
for, at least it means thing is not screwed up thoroughly, laughter is
always good.

Thanks
Baoquan



Re: [PATCH v2 2/2] x86/KASLR/64: Determine kernel text mapping size at runtime

2016-12-11 Thread Baoquan He
On 12/11/16 at 01:06pm, Borislav Petkov wrote:
> On Sun, Dec 11, 2016 at 06:58:29PM +0800, Baoquan He wrote:
> > For arguing and defending myself, I couldn't be very objective.
> 
> Yeah, it is mind-boggling the amount of bullshit you would come up with
> instead of simply saying, "no, I don't have a good reason and use case
> for my patch". It made me laugh, FWIW. Especially the bit about people
> getting naked - I had to go check we're still talking about the same
> thing.

Yes, I can't agree more, that use case is totally of bullshit. At the
very beginning, we all know that this patch is trying to fix the
inconsistency between kaslr codes not compiled in and code compiled in
but with "nokaslr" specified. In short, this patch is fixing an
inconsistency, no bug is reported yet. Here the inconsistency is the
reason for this patch. I think it has been made very clearly now. This
also has been pointed out by Kees when he offered his "Acked-by". I
welcome and treat all comments seriously, no other choices are given
to me.


If at the start, you said straightforwardly like:

"No bug, no fix!"

"A little inconsistency makes the world more exciting, it can make me
high."

or
"We can leave with it until a bug is reported, remaining 1G is no harm."

I can accept it totally and mute. But I didn't hear them. As an expert of
x86 arch and authority, you honor me to step in and give comments, I
have to reply with respect.

I am very glad to see you said you laughed at something, whatever it is
for, at least it means thing is not screwed up thoroughly, laughter is
always good.

Thanks
Baoquan



  1   2   3   4   >