date:20190421

Re: DISCONTIGMEM is deprecated

2019-04-21 Thread Mike Rapoport

On Fri, Apr 19, 2019 at 07:05:21AM -0700, Matthew Wilcox wrote:
> On Fri, Apr 19, 2019 at 10:43:35AM +0100, Mel Gorman wrote:
> > DISCONTIG is essentially deprecated and even parisc plans to move to
> > SPARSEMEM so there is no need to be fancy, this patch simply disables
> > watermark boosting by default on DISCONTIGMEM.
> 
> I don't think parisc is the only arch which uses DISCONTIGMEM for !NUMA
> scenarios.  Grepping the arch/ directories shows:
> 
> alpha (does support NUMA, but also non-NUMA DISCONTIGMEM)
> arc (for supporting more than 1GB of memory)
> ia64 (looks complicated ...)
> m68k (for multiple chunks of memory)
> mips (does support NUMA but also non-NUMA)
> parisc (both NUMA and non-NUMA)

i386 NUMA as well
 
> I'm not sure that these architecture maintainers even know that DISCONTIGMEM
> is deprecated.  Adding linux-arch to the cc.
> 

-- 
Sincerely yours,
Mike.

Re: [EXT] Re: [PATCH v5] arm64: dts: ls1088a: add one more thermal zone node

2019-04-21 Thread Shawn Guo

On Thu, Apr 18, 2019 at 03:28:56AM +, Andy Tang wrote:
> 
> > -Original Message-
> > From: Daniel Lezcano 
> > Sent: 2019年4月12日 20:19
> > To: Andy Tang ; shawn...@kernel.org
> > Cc: Leo Li ; robh...@kernel.org; mark.rutl...@arm.com;
> > linux-arm-ker...@lists.infradead.org; devicet...@vger.kernel.org;
> > linux-kernel@vger.kernel.org; linux...@vger.kernel.org; rui.zh...@intel.com;
> > edubez...@gmail.com
> > Subject: Re: [EXT] Re: [PATCH v5] arm64: dts: ls1088a: add one more thermal
> > zone node
> > 
> > WARNING: This email was created outside of NXP. DO NOT CLICK links or
> > attachments unless you recognize the sender and know the content is safe.
> > 
> > 
> > 
> > On 12/04/2019 09:47, Andy Tang wrote:
> > >
> > >> -Original Message-
> > >> From: Daniel Lezcano 
> > >> Sent: 2019年4月12日 3:15
> > >> To: Andy Tang ; shawn...@kernel.org
> > >> Cc: Leo Li ; robh...@kernel.org;
> > >> mark.rutl...@arm.com; linux-arm-ker...@lists.infradead.org;
> > >> devicet...@vger.kernel.org; linux-kernel@vger.kernel.org;
> > >> linux...@vger.kernel.org; rui.zh...@intel.com; edubez...@gmail.com
> > >> Subject: [EXT] Re: [PATCH v5] arm64: dts: ls1088a: add one more
> > >> thermal zone node
> > >>
> > >> WARNING: This email was created outside of NXP. DO NOT CLICK links or
> > >> attachments unless you recognize the sender and know the content is safe.
> > >>
> > >>
> > >>
> > >> On 11/04/2019 10:32, Yuantian Tang wrote:
> > >>> Ls1088a has 2 thermal sensors, core cluster and SoC platform. Core
> > >>> cluster sensor is used to monitor the temperature of core and SoC
> > >>> platform is for platform. The current dts only support the first sensor.
> > >>> This patch adds the second sensor node to dts to enable it.
> > >>>
> > >>> Signed-off-by: Yuantian Tang 
> > >>> ---
> > >>> v5:
> > >>>   - update the thermal zone name due to the length limitation
> > >>>   - remove cooling map in platform zone
> > >>> v4:
> > >>>   - use hyphen instead of underscore in node name
> > >>> v3:
> > >>>   - use more descriptive name for each zone
> > >>> v2:
> > >>>   - Add more information about sensors to description
> > >>>  arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi |   28
> > >> ---
> > >>>  1 files changed, 24 insertions(+), 4 deletions(-)
> > >>>
> > >>> diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
> > >>> b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
> > >>> index de93b42..de39672 100644
> > >>> --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
> > >>> +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
> > >>> @@ -129,19 +129,19 @@
> > >>>   };
> > >>>
> > >>>   thermal-zones {
> > >>> - cpu_thermal: cpu-thermal {
> > >>> + core-cluster {
> > >>>   polling-delay-passive = <1000>;
> > >>>   polling-delay = <5000>;
> > >>>   thermal-sensors = < 0>;
> > >>>
> > >>>   trips {
> > >>> - cpu_alert: cpu-alert {
> > >>> + core_cluster_alert: core-cluster-alert
> > >>> + {
> > >>>   temperature = <85000>;
> > >>>   hysteresis = <2000>;
> > >>>   type = "passive";
> > >>>   };
> > >>>
> > >>> - cpu_crit: cpu-crit {
> > >>> + core_cluster_crit: core-cluster-crit {
> > >>>   temperature = <95000>;
> > >>>   hysteresis = <2000>;
> > >>>   type = "critical"; @@ -150,7
> > >>> +150,7 @@
> > >>>
> > >>>   cooling-maps {
> > >>>   map0 {
> > >>> - trip = <_alert>;
> > >>> + trip = <_cluster_alert>;
> > >>>   cooling-device =
> > >>>   <
> > >> THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> > >>>   <
> > >> THERMAL_NO_LIMIT
> > >>> THERMAL_NO_LIMIT>, @@ -163,6 +163,26 @@
> > >>>   };
> > >>>   };
> > >>>   };
> > >>> +
> > >>> + platform {
> > >>> + polling-delay-passive = <1000>;
> > >>> + polling-delay = <5000>;
> > >>> + thermal-sensors = < 1>;
> > >>> +
> > >>> + trips {
> > >>> + platform-alert {
> > >>> + temperature = <85000>;
> > >>> + hysteresis = <2000>;
> > >>> + type = "passive";
> > >>> + };
> > >>> +
> > >>> + platform-crit {

Re: [PATCH 1/2] soc: imx-sc: add i.MX system controller soc driver support

2019-04-21 Thread Shawn Guo

On Thu, Apr 11, 2019 at 06:49:12AM +, Anson Huang wrote:
> i.MX8QXP is an ARMv8 SoC which has a Cortex-M4 system controller
> inside, the system controller is in charge of controlling power,
> clock and fuse etc..
> 
> This patch adds i.MX system controller soc driver support,
> Linux kernel has to communicate with system controller via MU
> (message unit) IPC to get soc revision, uid etc..
> 
> With this patch, soc info can be read from sysfs:
> 
> i.mx8qxp-mek# cat /sys/devices/soc0/family
> Freescale i.MX
> 
> i.mx8qxp-mek# cat /sys/devices/soc0/soc_id
> i.MX8QXP
> 
> i.mx8qxp-mek# cat /sys/devices/soc0/machine
> Freescale i.MX8QXP MEK
> 
> i.mx8qxp-mek# cat /sys/devices/soc0/revision
> 1.1
> 
> i.mx8qxp-mek# cat /sys/devices/soc0/soc_uid
> 7B64280B57AC1898
> 
> Signed-off-by: Anson Huang 
> ---
>  drivers/soc/imx/Kconfig  |   7 ++
>  drivers/soc/imx/Makefile |   1 +
>  drivers/soc/imx/soc-imx-sc.c | 220 
> +++
>  3 files changed, 228 insertions(+)
>  create mode 100644 drivers/soc/imx/soc-imx-sc.c

Rather than creating a new driver, please take a look at Abel's generic
i.MX8 SoC driver, and see if it can be extended to cover i.MX8QXP.

Shawn

Re: [PATCH 1/2] soc: imx-sc: add i.MX system controller soc driver support

2019-04-21 Thread Shawn Guo

On Sun, Apr 21, 2019 at 03:40:00PM +0800, Shawn Guo wrote:
> On Thu, Apr 11, 2019 at 06:49:12AM +, Anson Huang wrote:
> > i.MX8QXP is an ARMv8 SoC which has a Cortex-M4 system controller
> > inside, the system controller is in charge of controlling power,
> > clock and fuse etc..
> > 
> > This patch adds i.MX system controller soc driver support,
> > Linux kernel has to communicate with system controller via MU
> > (message unit) IPC to get soc revision, uid etc..
> > 
> > With this patch, soc info can be read from sysfs:
> > 
> > i.mx8qxp-mek# cat /sys/devices/soc0/family
> > Freescale i.MX
> > 
> > i.mx8qxp-mek# cat /sys/devices/soc0/soc_id
> > i.MX8QXP
> > 
> > i.mx8qxp-mek# cat /sys/devices/soc0/machine
> > Freescale i.MX8QXP MEK
> > 
> > i.mx8qxp-mek# cat /sys/devices/soc0/revision
> > 1.1
> > 
> > i.mx8qxp-mek# cat /sys/devices/soc0/soc_uid
> > 7B64280B57AC1898
> > 
> > Signed-off-by: Anson Huang 
> > ---
> >  drivers/soc/imx/Kconfig  |   7 ++
> >  drivers/soc/imx/Makefile |   1 +
> >  drivers/soc/imx/soc-imx-sc.c | 220 
> > +++
> >  3 files changed, 228 insertions(+)
> >  create mode 100644 drivers/soc/imx/soc-imx-sc.c
> 
> Rather than creating a new driver, please take a look at Abel's generic
> i.MX8 SoC driver, and see if it can be extended to cover i.MX8QXP.

Forgot to give pointer to Abel's driver.

https://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git/commit/?h=imx/drivers=a7e26f356ca12906a164d83c9e9f8527ee7da022

Shawn

Re: Goed nieuws

2019-04-21 Thread Andre Moreti



- Mensagem original -
De: Andre Moreti 
Para: andre moreti 
Enviadas: Sat, 20 Apr 2019 12:00:23 -0300 (BRT)
Assunto: Goed nieuws

Ik ben Andre Moreti door ONU, je hebt een donatie, neem contact op voor meer 
informatie: onuespfoundat...@gmail.com voor claim

oprecht
Andre Moreti
contact: onuespfoundat...@gmail.com

Re: [PATCH] ARM: dts: imx: Add mclk0 clock for SAI

2019-04-21 Thread Nicolin Chen

On Sat, Apr 20, 2019 at 09:12:52AM +, Daniel Baluta wrote:
> From: Shengjiu Wang 
> 
> SAI has 4 clock sources, which can be selected using MSEL
> bit of SAI TCR2 register.

I have a doubt at this statement. As far as I can understand,
this MSEL is probably used by its internal clock MUX, so it's
not really proving that SAI has 4 MCLK inputs. What I know is
that SAI block itself only has 3 MCLK inputs as we defined in
DT. It's just internally connects bus clock or MCLK1 to input0
of clock MUX's and connects MCLK[1-3] to input[1-3]. So adding
an MCLK0 here doesn't sound a right way to me. Unless someone
can justify for it, I think we should just fix it from driver
side.

Thanks
Nicolin

> 
> On imx6/7 mclk0 and mclk1 always point to the same clock
> source. Anyhow, this is no longer true for imx8.
> 
> For this reason, we need to add mclk0 and handle it
> in a generic way in SAI driver.
> 
> Signed-off-by: Shengjiu Wang 
> Signed-off-by: Daniel Baluta 
> ---
>  arch/arm/boot/dts/imx6sx.dtsi | 6 --
>  arch/arm/boot/dts/imx6ul.dtsi | 9 ++---
>  arch/arm/boot/dts/imx7s.dtsi  | 9 ++---
>  3 files changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi
> index b16a123990a2..682207b5d868 100644
> --- a/arch/arm/boot/dts/imx6sx.dtsi
> +++ b/arch/arm/boot/dts/imx6sx.dtsi
> @@ -1071,9 +1071,10 @@
>   reg = <0x021d4000 0x4000>;
>   interrupts = ;
>   clocks = < IMX6SX_CLK_SAI1_IPG>,
> +  < IMX6SX_CLK_SAI1>,
>< IMX6SX_CLK_SAI1>,
>< 0>, < 0>;
> - clock-names = "bus", "mclk1", "mclk2", "mclk3";
> + clock-names = "bus", "mclk0", "mclk1", "mclk2", 
> "mclk3";
>   dma-names = "rx", "tx";
>   dmas = < 31 24 0>, < 32 24 0>;
>   status = "disabled";
> @@ -1090,9 +1091,10 @@
>   reg = <0x021dc000 0x4000>;
>   interrupts = ;
>   clocks = < IMX6SX_CLK_SAI2_IPG>,
> +  < IMX6SX_CLK_SAI2>,
>< IMX6SX_CLK_SAI2>,
>< 0>, < 0>;
> - clock-names = "bus", "mclk1", "mclk2", "mclk3";
> + clock-names = "bus", "mclk0", "mclk1", "mclk2", 
> "mclk3";
>   dma-names = "rx", "tx";
>   dmas = < 33 24 0>, < 34 24 0>;
>   status = "disabled";
> diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi
> index bbf010c73336..e9691306f557 100644
> --- a/arch/arm/boot/dts/imx6ul.dtsi
> +++ b/arch/arm/boot/dts/imx6ul.dtsi
> @@ -304,9 +304,10 @@
>   reg = <0x02028000 0x4000>;
>   interrupts =  IRQ_TYPE_LEVEL_HIGH>;
>   clocks = < IMX6UL_CLK_SAI1_IPG>,
> +  < IMX6UL_CLK_SAI1>,
>< IMX6UL_CLK_SAI1>,
>< IMX6UL_CLK_DUMMY>, 
> < IMX6UL_CLK_DUMMY>;
> - clock-names = "bus", "mclk1", "mclk2", 
> "mclk3";
> + clock-names = "bus", "mclk0", "mclk1", 
> "mclk2", "mclk3";
>   dmas = < 35 24 0>,
>  < 36 24 0>;
>   dma-names = "rx", "tx";
> @@ -319,9 +320,10 @@
>   reg = <0x0202c000 0x4000>;
>   interrupts =  IRQ_TYPE_LEVEL_HIGH>;
>   clocks = < IMX6UL_CLK_SAI2_IPG>,
> +  < IMX6UL_CLK_SAI2>,
>< IMX6UL_CLK_SAI2>,
>< IMX6UL_CLK_DUMMY>, 
> < IMX6UL_CLK_DUMMY>;
> - clock-names = "bus", "mclk1", "mclk2", 
> "mclk3";
> + clock-names = "bus", "mclk0", "mclk1", 
> "mclk2", "mclk3";
>   dmas = < 37 24 0>,
>  < 38 24 0>;
>   dma-names = "rx", "tx";
> @@ -334,9 +336,10 @@
>   reg = <0x0203 0x4000>;
>   interrupts =  IRQ_TYPE_LEVEL_HIGH>;
>   clocks = < IMX6UL_CLK_SAI3_IPG>,
> +  < IMX6UL_CLK_SAI3>,
>

Re: [PATCH v2 1/2] ras: fix an off-by-one error in __find_elem()

2019-04-21 Thread Borislav Petkov

On Sat, Apr 20, 2019 at 12:15:26PM -0700, Cong Wang wrote:
> Yes, one is V1 and the other is V2. Is it hard to understand V2 is to
> replace V1?

Well, looking at these two very different fixes, it made me think that
you don't really know what you're doing. So I went and did the Knuth's
version just so that I can analyze and understand the issue myself.

The final result ended up needing *both* the index fix *and* removed the
trailing noodling code after the loop which looked fishy at best and I
wanted it gone anyway.

So in the end:

1. your first fix was correct but incomplete

2. your second was replaced by a better version of the whole thing

So the final result is a lot cleaner and straight-forward. And it is
only 29 lines and I don't see a problem with it going to stable.

And I as author and maintainer of this code have very much the
prerogative to decide which way to go, TYVM. No matter how much you
passive-aggressively bitch. Thanks to your last mail, I won't have to
make this choice anymore.

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

Re: [PATCH] drivers: cpufreq: use kstrtoul instead of obsolete simple_strtoul

2019-04-21 Thread kbuild test robot

Hi Mohan,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on pm/linux-next]
[also build test ERROR on v5.1-rc5 next-20190418]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Mohan-Kumar/drivers-cpufreq-use-kstrtoul-instead-of-obsolete-simple_strtoul/20190421-044216
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git 
linux-next
config: i386-randconfig-c0-04211526 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot 


All errors (new ones prefixed by >>):

   drivers//cpufreq/elanfreq.c: In function 'elanfreq_setup':
>> drivers//cpufreq/elanfreq.c:187:48: error: 'val' undeclared (first use in 
>> this function); did you mean 'vmap'?
 max_freq = kstrtoul(str, 0, (unsigned long *));
   ^~~
   vmap
   drivers//cpufreq/elanfreq.c:187:48: note: each undeclared identifier is 
reported only once for each function it appears in

vim +187 drivers//cpufreq/elanfreq.c

   171  
   172  
   173  #ifndef MODULE
   174  /**
   175   * elanfreq_setup - elanfreq command line parameter parsing
   176   *
   177   * elanfreq command line parameter.  Use:
   178   *  elanfreq=66000
   179   * to set the maximum CPU frequency to 66 MHz. Note that in
   180   * case you do not give this boot parameter, the maximum
   181   * frequency will fall back to _current_ CPU frequency which
   182   * might be lower. If you build this as a module, use the
   183   * max_freq module parameter instead.
   184   */
   185  static int __init elanfreq_setup(char *str)
   186  {
 > 187  max_freq = kstrtoul(str, 0, (unsigned long *));
   188  pr_warn("You're using the deprecated elanfreq command line 
option. Use elanfreq.max_freq instead, please!\n");
   189  return 1;
   190  }
   191  __setup("elanfreq=", elanfreq_setup);
   192  #endif
   193  
   194  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

[PATCH v2 03/06] misc: genwqe: Fix misuse of %x

2019-04-21 Thread Fuqian Huang

The pointer should be printed with %p or %px rather than
cast to long long type and printed with %016llx.
Change %x to %p to print the pointer.

Signed-off-by: Fuqian Huang 
---
 drivers/misc/genwqe/card_debugfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/genwqe/card_debugfs.c 
b/drivers/misc/genwqe/card_debugfs.c
index 7c713e0..6f7e39f 100644
--- a/drivers/misc/genwqe/card_debugfs.c
+++ b/drivers/misc/genwqe/card_debugfs.c
@@ -227,7 +227,7 @@ static int ddcb_info_show(struct seq_file *s, void *unused)
seq_puts(s, "DDCB QUEUE:\n");
seq_printf(s, "  ddcb_max:%d\n"
   "  ddcb_daddr:  %016llx - %016llx\n"
-  "  ddcb_vaddr:  %016llx\n"
+  "  ddcb_vaddr:  %p\n"
   "  ddcbs_in_flight: %u\n"
   "  ddcbs_max_in_flight: %u\n"
   "  ddcbs_completed: %u\n"
@@ -237,7 +237,7 @@ static int ddcb_info_show(struct seq_file *s, void *unused)
   queue->ddcb_max, (long long)queue->ddcb_daddr,
   (long long)queue->ddcb_daddr +
   (queue->ddcb_max * DDCB_LENGTH),
-  (long long)queue->ddcb_vaddr, queue->ddcbs_in_flight,
+  queue->ddcb_vaddr, queue->ddcbs_in_flight,
   queue->ddcbs_max_in_flight, queue->ddcbs_completed,
   queue->return_on_busy, queue->wait_on_busy,
   cd->irqs_processed);
-- 
2.11.0

[PATCH v2 02/06] kernel: cgroup: fix misuse of %x

2019-04-21 Thread Fuqian Huang

Pointers should be printed with %p or %px rather than
cast to unsigned long type and printed with %lx.
Change %lx to %p to print the pointers.

Signed-off-by: Fuqian Huang 
---
 kernel/cgroup/debug.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c
index 5f1b873..80aa3f0 100644
--- a/kernel/cgroup/debug.c
+++ b/kernel/cgroup/debug.c
@@ -64,8 +64,8 @@ static int current_css_set_read(struct seq_file *seq, void *v)
css = cset->subsys[ss->id];
if (!css)
continue;
-   seq_printf(seq, "%2d: %-4s\t- %lx[%d]\n", ss->id, ss->name,
- (unsigned long)css, css->id);
+   seq_printf(seq, "%2d: %-4s\t- %p[%d]\n", ss->id, ss->name,
+ css, css->id);
}
rcu_read_unlock();
spin_unlock_irq(_set_lock);
@@ -224,8 +224,8 @@ static int cgroup_subsys_states_read(struct seq_file *seq, 
void *v)
if (css->parent)
snprintf(pbuf, sizeof(pbuf) - 1, " P=%d",
 css->parent->id);
-   seq_printf(seq, "%2d: %-4s\t- %lx[%d] %d%s\n", ss->id, ss->name,
- (unsigned long)css, css->id,
+   seq_printf(seq, "%2d: %-4s\t- %p[%d] %d%s\n", ss->id, ss->name,
+ css, css->id,
  atomic_read(>online_cnt), pbuf);
}
 
-- 
2.11.0

[PATCH v2 06/06] sound: isa: gus: fix misuse of %x

2019-04-21 Thread Fuqian Huang

Pointers should be printed with %p or %px rather than
cast to long type and printed with %lx.
Change %lx to %p to print the pointer.

Signed-off-by: Fuqian Huang 
---
 sound/isa/gus/gus_mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/isa/gus/gus_mem.c b/sound/isa/gus/gus_mem.c
index 4ac76f4..63e790f 100644
--- a/sound/isa/gus/gus_mem.c
+++ b/sound/isa/gus/gus_mem.c
@@ -306,7 +306,7 @@ static void snd_gf1_mem_info_read(struct snd_info_entry 
*entry,
used = 0;
for (block = alloc->first, i = 0; block; block = block->next, i++) {
used += block->size;
-   snd_iprintf(buffer, "Block %i at 0x%lx onboard 0x%x size %i 
(0x%x):\n", i, (long) block, block->ptr, block->size, block->size);
+   snd_iprintf(buffer, "Block %i at 0x%p onboard 0x%x size %i 
(0x%x):\n", i, block, block->ptr, block->size, block->size);
if (block->share ||
block->share_id[0] || block->share_id[1] ||
block->share_id[2] || block->share_id[3])
-- 
2.11.0

[PATCH v2] lib/scatterlist: Remove leftover from sg_page_iter comment

2019-04-21 Thread Gal Pressman

Commit d901b2760dc6 ("lib/scatterlist: Provide a DMA page iterator")
added the sg DMA iterator but a leftover remained in the sg_page_iter
documentation as you cannot get the page dma address (only the page
itself), fix it.

Cc: Jason Gunthorpe 
Signed-off-by: Gal Pressman 
Reviewed-by: Mukesh Ojha 
---
Changelog:
v1->v2:
* Reword commit message
---
 include/linux/scatterlist.h | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index b4be960c7e5d..30a9a55c28ba 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -340,11 +340,11 @@ int sg_alloc_table_chained(struct sg_table *table, int 
nents,
  * sg page iterator
  *
  * Iterates over sg entries page-by-page.  On each successful iteration, you
- * can call sg_page_iter_page(@piter) to get the current page and its dma
- * address. @piter->sg will point to the sg holding this page and
- * @piter->sg_pgoffset to the page's page offset within the sg. The iteration
- * will stop either when a maximum number of sg entries was reached or a
- * terminating sg (sg_last(sg) == true) was reached.
+ * can call sg_page_iter_page(@piter) to get the current page.
+ * @piter->sg will point to the sg holding this page and @piter->sg_pgoffset to
+ * the page's page offset within the sg. The iteration will stop either when a
+ * maximum number of sg entries was reached or a terminating sg
+ * (sg_last(sg) == true) was reached.
  */
 struct sg_page_iter {
struct scatterlist  *sg;/* sg holding the page */
-- 
2.7.4

Re: [PATCH v4] arm64: dts: hi3660: Add CoreSight support

2019-04-21 Thread Leo Yan

On Sat, Apr 20, 2019 at 10:00:35PM +0800, Wanglai Shi wrote:
> This patch adds DT bindings for the CoreSight trace components
> on hi3660, which is used by 96boards Hikey960.
> 
> Signed-off-by: Wanglai Shi 

Reviewed this patch and tested on my Hikey960 board, FWIW:

Reviewed-and-tested-by: Leo Yan 

> ---
>  .../arm64/boot/dts/hisilicon/hi3660-coresight.dtsi | 456 
> +
>  arch/arm64/boot/dts/hisilicon/hi3660.dtsi  |   2 +
>  2 files changed, 458 insertions(+)
>  create mode 100644 arch/arm64/boot/dts/hisilicon/hi3660-coresight.dtsi
> 
> diff --git a/arch/arm64/boot/dts/hisilicon/hi3660-coresight.dtsi 
> b/arch/arm64/boot/dts/hisilicon/hi3660-coresight.dtsi
> new file mode 100644
> index 000..d607f2f
> --- /dev/null
> +++ b/arch/arm64/boot/dts/hisilicon/hi3660-coresight.dtsi
> @@ -0,0 +1,456 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +/*
> + * dtsi for Hisilicon Hi3660 Coresight
> + *
> + * Copyright (C) 2016-2018 Hisilicon Ltd.
> + *
> + * Author: Wanglai Shi 
> + *
> + */
> +/ {
> + soc {
> + /* A53 cluster internals */
> + etm@ecc4 {
> + compatible = "arm,coresight-etm4x", "arm,primecell";
> + reg = <0 0xecc4 0 0x1000>;
> + clocks = <_ctrl HI3660_PCLK>;
> + clock-names = "apb_pclk";
> + cpu = <>;
> +
> + out-ports {
> + port {
> + etm0_out: endpoint {
> + remote-endpoint =
> + <_funnel_in0>;
> + };
> + };
> + };
> + };
> +
> + etm@ecd4 {
> + compatible = "arm,coresight-etm4x", "arm,primecell";
> + reg = <0 0xecd4 0 0x1000>;
> + clocks = <_ctrl HI3660_PCLK>;
> + clock-names = "apb_pclk";
> + cpu = <>;
> +
> + out-ports {
> + port {
> + etm1_out: endpoint {
> + remote-endpoint =
> + <_funnel_in1>;
> + };
> + };
> + };
> + };
> +
> + etm@ece4 {
> + compatible = "arm,coresight-etm4x", "arm,primecell";
> + reg = <0 0xece4 0 0x1000>;
> + clocks = <_ctrl HI3660_PCLK>;
> + clock-names = "apb_pclk";
> + cpu = <>;
> +
> + out-ports {
> + port {
> + etm2_out: endpoint {
> + remote-endpoint =
> + <_funnel_in2>;
> + };
> + };
> + };
> + };
> +
> + etm@ecf4 {
> + compatible = "arm,coresight-etm4x", "arm,primecell";
> + reg = <0 0xecf4 0 0x1000>;
> + clocks = <_ctrl HI3660_PCLK>;
> + clock-names = "apb_pclk";
> + cpu = <>;
> +
> + out-ports {
> + port {
> + etm3_out: endpoint {
> + remote-endpoint =
> + <_funnel_in3>;
> + };
> + };
> + };
> + };
> +
> + funnel@ec801000 {
> + compatible = "arm,coresight-dynamic-funnel", 
> "arm,primecell";
> + reg = <0 0xec801000 0 0x1000>;
> + clocks = <_ctrl HI3660_PCLK>;
> + clock-names = "apb_pclk";
> +
> + out-ports {
> + port {
> + cluster0_funnel_out: endpoint {
> + remote-endpoint =
> + <_etf_in>;
> + };
> + };
> + };
> +
> + in-ports {
> + #address-cells = <1>;
> + #size-cells = <0>;
> +
> + port@0 {
> + reg = <0>;
> + cluster0_funnel_in0: endpoint {
> + remote-endpoint = <_out>;
> +

Re: DISCONTIGMEM is deprecated

2019-04-21 Thread Matthew Wilcox

On Sun, Apr 21, 2019 at 09:38:59AM +0300, Mike Rapoport wrote:
> On Fri, Apr 19, 2019 at 07:05:21AM -0700, Matthew Wilcox wrote:
> > On Fri, Apr 19, 2019 at 10:43:35AM +0100, Mel Gorman wrote:
> > > DISCONTIG is essentially deprecated and even parisc plans to move to
> > > SPARSEMEM so there is no need to be fancy, this patch simply disables
> > > watermark boosting by default on DISCONTIGMEM.
> > 
> > I don't think parisc is the only arch which uses DISCONTIGMEM for !NUMA
> > scenarios.  Grepping the arch/ directories shows:
> > 
> > alpha (does support NUMA, but also non-NUMA DISCONTIGMEM)
> > arc (for supporting more than 1GB of memory)
> > ia64 (looks complicated ...)
> > m68k (for multiple chunks of memory)
> > mips (does support NUMA but also non-NUMA)
> > parisc (both NUMA and non-NUMA)
> 
> i386 NUMA as well

I clearly over-trimmed.  The original assumption that Mel had was that
DISCONTIGMEM => NUMA, and that's not true on the above six architectures.
It is true on i386 ;-)

Re: arch/sh/kernel/cpu/sh2/clock-sh7619.o:undefined reference to `followparent_recalc'

2019-04-21 Thread Yoshinori Sato

On Sun, 21 Apr 2019 04:34:36 +0900,
Randy Dunlap wrote:
> 
> On 4/20/19 12:40 AM, kbuild test robot wrote:
> > Hi Randy,
> > 
> > It's probably a bug fix that unveils the link errors.
> > 
> > tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
> > master
> > head:   371dd432ab39f7bc55d6ec77d63b430285627e04
> > commit: acaf892ecbf5be7710ae05a61fd43c668f68ad95 sh: fix multiple function 
> > definition build errors
> > date:   2 weeks ago
> > config: sh-allmodconfig (attached as .config)
> > compiler: sh4-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
> > reproduce:
> > wget 
> > https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
> > ~/bin/make.cross
> > chmod +x ~/bin/make.cross
> > git checkout acaf892ecbf5be7710ae05a61fd43c668f68ad95
> > # save the attached .config to linux build tree
> > GCC_VERSION=7.2.0 make.cross ARCH=sh 
> 
> Hi,
> 
> Once again, the question is the validity of the SH2 .config file in this case
> (that was attached).
> 
> I don't believe that it is valid because CONFIG_SH_DEVICE_TREE=y,
> which selects COMMON_CLK, and there is no followparent_recalc() in the
> COMMON_CLK API.
> 
> Also, while CONFIG_HAVE_CLK=y, drivers/sh/Makefile prevents that from
> building clk/core.c, which could provide followparent_recalc():
> 
> ifneq ($(CONFIG_COMMON_CLK),y)
> obj-$(CONFIG_HAVE_CLK)+= clk/
> endif
> 
> Hm, maybe that's where the problem is.  I'll look into that more.
>

Yes.
Selected target (CONFIG_SH_7619_SOLUTION_ENGINE) is non devicetree
and used superh specific clk modules.
So allyesconfig output is incorrect.

I fixed Kconfig to output the correct config.

> 
> 
> It would be Good if someone from the SuperH area could/would comment.
> 
> Thanks.
> 
> 
> > If you fix the issue, kindly add following tag
> > Reported-by: kbuild test robot 
> > 
> > 
> > All errors (new ones prefixed by >>):
> > 
> >>> arch/sh/kernel/cpu/sh2/clock-sh7619.o:(.data+0x1c): undefined reference 
> >>> to `followparent_recalc'
> > 
> > ---
> > 0-DAY kernel test infrastructureOpen Source Technology 
> > Center
> > https://lists.01.org/pipermail/kbuild-all   Intel 
> > Corporation
> > 
> 
> 
> -- 
> ~Randy

-- 
Yosinori Sato

[no subject]

2019-04-21 Thread 김현규

unsubscribe

[RESEND] drivers: cpufreq: use kstrtoul instead of obsolete simple_strtoul issue fixed

2019-04-21 Thread mohankumar718

From: Mohan Kumar 

Replace the obsolte simple_strtoul function with kstrtoul.

Signed-off-by: Mohan Kumar 
---
 drivers/cpufreq/elanfreq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c
index 03419f064752..6d861c2364e2 100644
--- a/drivers/cpufreq/elanfreq.c
+++ b/drivers/cpufreq/elanfreq.c
@@ -184,7 +184,8 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
  */
 static int __init elanfreq_setup(char *str)
 {
-   max_freq = simple_strtoul(str, , 0);
+   unsigned long int val = 0;
+   max_freq = kstrtoul(str, 0, );
pr_warn("You're using the deprecated elanfreq command line option. Use 
elanfreq.max_freq instead, please!\n");
return 1;
 }
-- 
2.17.1

Re: [PATCH] ptrace.2: Improve clarity for multi-threaded tracers

2019-04-21 Thread Niklas Hambüchen

Hey Michael,

On 2019-02-25 16:51, Michael Kerrisk (man-pages) wrote:
> Do you plan to revise this patch in the light of Dmitry's comments?

sorry for the delay, I do intend to finish it and just replied.

Niklas

Re: [PATCH] ptrace.2: Improve clarity for multi-threaded tracers

2019-04-21 Thread Niklas Hambüchen

Hey Dmitry,

On 2019-02-17 23:15, Dmitry V. Levin wrote:
>>  A tracee first needs to be attached to the tracer.
>> -Attachment and subsequent commands are per thread:
>> -in a multithreaded process,
>> +Attachment and subsequent commands are per thread,
>> +on both the tracer and tracee side.
>> +Issuing a tracing command from a thread that is not the tracer of the given
>> +.I pid
>> +will result in an
>> +.B ESRCH
>> +error.
> 
> This is confusing.  What do you mean by a tracing command?
> Is PTRACE_TRACEME a tracing command?  PTRACE_ATTACH?  PTRACE_SEIZE?

I was referring to the same command as in other places in the man page, as in 
the existing sentences

Most ptrace commands [...] require the tracee to be in a  ptrace-stop, 
otherwise they fail with ESRCH.

or

(for commands which require a stopped tracee)

Would thus "ptrace command" be better than "tracing command" here?

>>  .B ESRCH
>>  The specified process does not exist, or is not currently being traced
>> -by the caller, or is not stopped
>> +by the calling thread, or is not stopped
>>  (for requests that require a stopped tracee).
>>  .SH CONFORMING TO
>>  SVr4, 4.3BSD.
> 
> I agree the current text can be made more clear on the subject,
> but, unfortunately, proposed change makes the description more confusing.

Do you mean "calling thread" is more confusing than "caller"?
If yes, what would you suggest instead?

My intent here was to, for anybody who encounters ESRCH and looks it up in an 
effort to see what's going on, make clear that threads are important here.

Or should I switch to `task_struct` terminology? That wouldn't be userspace 
terminology though, and the rest of the man page also talks about threads.

Niklas

signature.asc
Description: OpenPGP digital signature

Reasons for oldconfig resetting config options to =m

2019-04-21 Thread Niklas Hambüchen

When you set an option to `=y` but one of its dependencies is set to `=m`, then 
`make oldconfig` will reset it back to `=m` as well.

That makes sense, but does there exist a feature somewhere that can tell me 
explicitly when this happens, ideally with a reason like:

Resetting CONFIG_USB_STORAGE from =y to =m
because its dependency CONFIG_... is set to =m
Consequently also resetting the following dependent options from =y to 
=m:
CONFIG_USB_STORAGE_REALTEK
CONFIG_USB_...

This would help tuning kernel configs in situations where menuconfig is not 
appropriate, for example when working with diffs of configs.

Is there such tooling, or what do people use?

Thanks,
Niklas

Re: arch/sh/kernel/cpu/sh2/clock-sh7619.o:undefined reference to `followparent_recalc'

2019-04-21 Thread Randy Dunlap

On 4/21/19 6:52 AM, Yoshinori Sato wrote:
> On Sun, 21 Apr 2019 04:34:36 +0900,
> Randy Dunlap wrote:
>>
>> On 4/20/19 12:40 AM, kbuild test robot wrote:
>>> Hi Randy,
>>>
>>> It's probably a bug fix that unveils the link errors.
>>>
>>> tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
>>> master
>>> head:   371dd432ab39f7bc55d6ec77d63b430285627e04
>>> commit: acaf892ecbf5be7710ae05a61fd43c668f68ad95 sh: fix multiple function 
>>> definition build errors
>>> date:   2 weeks ago
>>> config: sh-allmodconfig (attached as .config)
>>> compiler: sh4-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
>>> reproduce:
>>> wget 
>>> https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
>>> ~/bin/make.cross
>>> chmod +x ~/bin/make.cross
>>> git checkout acaf892ecbf5be7710ae05a61fd43c668f68ad95
>>> # save the attached .config to linux build tree
>>> GCC_VERSION=7.2.0 make.cross ARCH=sh 
>>
>> Hi,
>>
>> Once again, the question is the validity of the SH2 .config file in this case
>> (that was attached).
>>
>> I don't believe that it is valid because CONFIG_SH_DEVICE_TREE=y,
>> which selects COMMON_CLK, and there is no followparent_recalc() in the
>> COMMON_CLK API.
>>
>> Also, while CONFIG_HAVE_CLK=y, drivers/sh/Makefile prevents that from
>> building clk/core.c, which could provide followparent_recalc():
>>
>> ifneq ($(CONFIG_COMMON_CLK),y)
>> obj-$(CONFIG_HAVE_CLK)   += clk/
>> endif
>>
>> Hm, maybe that's where the problem is.  I'll look into that more.
>>
> 
> Yes.
> Selected target (CONFIG_SH_7619_SOLUTION_ENGINE) is non devicetree
> and used superh specific clk modules.
> So allyesconfig output is incorrect.
> 
> I fixed Kconfig to output the correct config.

Thanks for that.
The patch fixes this problem in my builds.

However, now I see these build errors:

ERROR: "__ashiftrt_r4_28" [fs/udf/udf.ko] undefined!
ERROR: "__ashiftrt_r4_26" [drivers/rtc/rtc-x1205.ko] undefined!
ERROR: "__ashiftrt_r4_25" [drivers/rtc/rtc-pcf2123.ko] undefined!
ERROR: "__ashiftrt_r4_28" [drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.ko] 
undefined!
ERROR: "__ashiftrt_r4_25" [drivers/input/tablet/gtco.ko] undefined!
ERROR: "__ashiftrt_r4_26" [drivers/input/mouse/psmouse.ko] undefined!
ERROR: "__ashiftrt_r4_28" [drivers/input/mouse/psmouse.ko] undefined!
ERROR: "__ashiftrt_r4_25" [drivers/iio/pressure/bmp280.ko] undefined!
ERROR: "__ashiftrt_r4_26" [drivers/iio/dac/ad5764.ko] undefined!
ERROR: "__ashiftrt_r4_26" [drivers/iio/accel/mma7660.ko] undefined!
ERROR: "__ashiftrt_r4_25" [drivers/iio/accel/dmard06.ko] undefined!
ERROR: "__ashiftrt_r4_26" [drivers/iio/accel/bma220_spi.ko] undefined!
ERROR: "__ashiftrt_r4_25" [drivers/crypto/hisilicon/sec/hisi_sec.ko] undefined!

Is this just a toolchain problem?

I am using the gcc 8.1.0 tools from
https://mirrors.edge.kernel.org/pub/tools/crosstool/

thanks.
-- 
~Randy

[PATCH] csky: Add support for libdw

2019-04-21 Thread guoren

From: Mao Han 

This patch add support for DWARF register mappings and libdw registers
initialization, which is used by perf callchain analyzing when
--call-graph=dwarf is given.

Here is elfutils csky backend patch set:
https://sourceware.org/ml/elfutils-devel/2019-q2/msg7.html

Signed-off-by: Mao Han 
Signed-off-by: Guo Ren 
Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Arnd Bergmann 
---
 tools/arch/csky/include/uapi/asm/perf_regs.h |  51 ++
 tools/perf/Makefile.config   |   6 +-
 tools/perf/arch/csky/Build   |   1 +
 tools/perf/arch/csky/Makefile|   3 +
 tools/perf/arch/csky/include/perf_regs.h | 100 +++
 tools/perf/arch/csky/util/Build  |   2 +
 tools/perf/arch/csky/util/dwarf-regs.c   |  49 +
 tools/perf/arch/csky/util/unwind-libdw.c |  77 +
 8 files changed, 288 insertions(+), 1 deletion(-)
 create mode 100644 tools/arch/csky/include/uapi/asm/perf_regs.h
 create mode 100644 tools/perf/arch/csky/Build
 create mode 100644 tools/perf/arch/csky/Makefile
 create mode 100644 tools/perf/arch/csky/include/perf_regs.h
 create mode 100644 tools/perf/arch/csky/util/Build
 create mode 100644 tools/perf/arch/csky/util/dwarf-regs.c
 create mode 100644 tools/perf/arch/csky/util/unwind-libdw.c

diff --git a/tools/arch/csky/include/uapi/asm/perf_regs.h 
b/tools/arch/csky/include/uapi/asm/perf_regs.h
new file mode 100644
index 000..ee323d8
--- /dev/null
+++ b/tools/arch/csky/include/uapi/asm/perf_regs.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
+
+#ifndef _ASM_CSKY_PERF_REGS_H
+#define _ASM_CSKY_PERF_REGS_H
+
+/* Index of struct pt_regs */
+enum perf_event_csky_regs {
+   PERF_REG_CSKY_TLS,
+   PERF_REG_CSKY_LR,
+   PERF_REG_CSKY_PC,
+   PERF_REG_CSKY_SR,
+   PERF_REG_CSKY_SP,
+   PERF_REG_CSKY_ORIG_A0,
+   PERF_REG_CSKY_A0,
+   PERF_REG_CSKY_A1,
+   PERF_REG_CSKY_A2,
+   PERF_REG_CSKY_A3,
+   PERF_REG_CSKY_REGS0,
+   PERF_REG_CSKY_REGS1,
+   PERF_REG_CSKY_REGS2,
+   PERF_REG_CSKY_REGS3,
+   PERF_REG_CSKY_REGS4,
+   PERF_REG_CSKY_REGS5,
+   PERF_REG_CSKY_REGS6,
+   PERF_REG_CSKY_REGS7,
+   PERF_REG_CSKY_REGS8,
+   PERF_REG_CSKY_REGS9,
+#if defined(__CSKYABIV2__)
+   PERF_REG_CSKY_EXREGS0,
+   PERF_REG_CSKY_EXREGS1,
+   PERF_REG_CSKY_EXREGS2,
+   PERF_REG_CSKY_EXREGS3,
+   PERF_REG_CSKY_EXREGS4,
+   PERF_REG_CSKY_EXREGS5,
+   PERF_REG_CSKY_EXREGS6,
+   PERF_REG_CSKY_EXREGS7,
+   PERF_REG_CSKY_EXREGS8,
+   PERF_REG_CSKY_EXREGS9,
+   PERF_REG_CSKY_EXREGS10,
+   PERF_REG_CSKY_EXREGS11,
+   PERF_REG_CSKY_EXREGS12,
+   PERF_REG_CSKY_EXREGS13,
+   PERF_REG_CSKY_EXREGS14,
+   PERF_REG_CSKY_HI,
+   PERF_REG_CSKY_LO,
+   PERF_REG_CSKY_DCSR,
+#endif
+   PERF_REG_CSKY_MAX,
+};
+#endif /* _ASM_CSKY_PERF_REGS_H */
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index fe3f97e..42985ae 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -59,6 +59,10 @@ ifeq ($(SRCARCH),arm64)
   LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
 endif
 
+ifeq ($(SRCARCH),csky)
+  NO_PERF_REGS := 0
+endif
+
 ifeq ($(ARCH),s390)
   NO_PERF_REGS := 0
   NO_SYSCALL_TABLE := 0
@@ -77,7 +81,7 @@ endif
 # Disable it on all other architectures in case libdw unwind
 # support is detected in system. Add supported architectures
 # to the check.
-ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390))
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390 csky))
   NO_LIBDW_DWARF_UNWIND := 1
 endif
 
diff --git a/tools/perf/arch/csky/Build b/tools/perf/arch/csky/Build
new file mode 100644
index 000..e4e5f33
--- /dev/null
+++ b/tools/perf/arch/csky/Build
@@ -0,0 +1 @@
+perf-y += util/
diff --git a/tools/perf/arch/csky/Makefile b/tools/perf/arch/csky/Makefile
new file mode 100644
index 000..7fbca17
--- /dev/null
+++ b/tools/perf/arch/csky/Makefile
@@ -0,0 +1,3 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
diff --git a/tools/perf/arch/csky/include/perf_regs.h 
b/tools/perf/arch/csky/include/perf_regs.h
new file mode 100644
index 000..8f336ea
--- /dev/null
+++ b/tools/perf/arch/csky/include/perf_regs.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
+
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include 
+#include 
+#include 
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_CSKY_MAX) - 1)
+#define PERF_REGS_MAX  PERF_REG_CSKY_MAX
+#define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_32
+
+#define PERF_REG_IPPERF_REG_CSKY_PC
+#define PERF_REG_SPPERF_REG_CSKY_SP
+
+static inline const char *perf_reg_name(int id)
+{
+   switch (id) {
+   case

Re: Reasons for oldconfig resetting config options to =m

2019-04-21 Thread Randy Dunlap

On 4/21/19 8:11 AM, Niklas Hambüchen wrote:
> When you set an option to `=y` but one of its dependencies is set to `=m`, 
> then `make oldconfig` will reset it back to `=m` as well.
> 
> That makes sense, but does there exist a feature somewhere that can tell me 
> explicitly when this happens, ideally with a reason like:
> 
> Resetting CONFIG_USB_STORAGE from =y to =m
> because its dependency CONFIG_... is set to =m
> Consequently also resetting the following dependent options from =y 
> to =m:
> CONFIG_USB_STORAGE_REALTEK
> CONFIG_USB_...
> 
> This would help tuning kernel configs in situations where menuconfig is not 
> appropriate, for example when working with diffs of configs.
> 
> Is there such tooling, or what do people use?

When using 'make menuconfig', enter /usb_storage and it says:

  │ Symbol: USB_STORAGE [=m]│  
  │ Type  : tristate│  
  │ Prompt: USB Mass Storage support│  
  │   Location: │  
  │ -> Device Drivers   │  
  │ (1)   -> USB support (USB_SUPPORT [=y]) │  
  │   Defined at drivers/usb/storage/Kconfig:9  │  
  │   Depends on: USB_SUPPORT [=y] && USB [=m] && SCSI [=m]

so it is limited to m (or n) by USB and SCSI.

Or you can just find the symbol of interest and use Help to get the same info.

The same can be done with 'make nconfig'.

With 'make xconfig', if you click on "USB Mass Storage support" (e.g.), it shows
the same info.  Or you can use Ctrl+F (or Edit/Find), enter a symbol (or
a partial symbol name), like USB_STORAGE, which gives you a list of symbols.
Click on one of them and it will give you the same info.

HTH.  There is nothing quite as explicit as your example output.

-- 
~Randy

[PATCH] x86_64: uninline TASK_SIZE

2019-04-21 Thread Alexey Dobriyan

TASK_SIZE macro is quite deceptive: it looks like a constant but in fact
compiles to 50+ bytes.

Space savings on x86_64 defconfig:

add/remove: 1/0 grow/shrink: 3/24 up/down: 77/-2247 (-2170)
Function old new   delta
_task_size -  52 +52
mpol_shared_policy_init  344 363 +19
shmem_get_unmapped_area   92  97  +5
__rseq_handle_notify_resume.cold  34  35  +1
copy_from_user_nmi   123 113 -10
mmap_address_hint_valid   92  56 -36
arch_get_unmapped_area_topdown   471 435 -36
tlb_gather_mmu   164 126 -38
hugetlb_get_unmapped_area774 736 -38
__create_xol_area497 458 -39
arch_tlb_gather_mmu  160 120 -40
setup_new_exec   380 336 -44
__x64_sys_mlockall   378 333 -45
__ia32_sys_mlockall  378 333 -45
tlb_flush_mmu235 189 -46
unmap_page_range20982048 -50
copy_mount_options   518 465 -53
__get_user_pages17371675 -62
get_unmapped_area270 204 -66
perf_prepare_sample 11761098 -78
perf_callchain_user  549 469 -80
mremap_to.isra   545 457 -88
arch_tlb_finish_mmu  394 305 -89
__do_munmap 1039 927-112
elf_map  527 409-118
prctl_set_mm15091335-174
__rseq_handle_notify_resume 1116 906-210
load_elf_binary11761   1-650
Total: Before=14121337, After=14119167, chg -0.02%

Signed-off-by: Alexey Dobriyan 
---

 arch/x86/include/asm/processor.h |4 ++--
 arch/x86/kernel/Makefile |1 +
 arch/x86/kernel/task_size_64.c   |9 +
 3 files changed, 12 insertions(+), 2 deletions(-)

--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -887,8 +887,8 @@ static inline void spin_lock_prefetch(const void *x)
 
 #define TASK_SIZE_LOW  (test_thread_flag(TIF_ADDR32) ? \
IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
-#define TASK_SIZE  (test_thread_flag(TIF_ADDR32) ? \
-   IA32_PAGE_OFFSET : TASK_SIZE_MAX)
+unsigned long _task_size(void);
+#define TASK_SIZE  _task_size()
 #define TASK_SIZE_OF(child)((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
 
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -46,6 +46,7 @@ CFLAGS_irq.o := -I$(src)/../include/asm/trace
 
 obj-y  := process_$(BITS).o signal.o
 obj-$(CONFIG_COMPAT)   += signal_compat.o
+obj-$(CONFIG_X86_64)   += task_size_64.o
 obj-y  += traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y  += time.o ioport.o dumpstack.o nmi.o
 obj-$(CONFIG_MODIFY_LDT_SYSCALL)   += ldt.o
new file mode 100644
--- /dev/null
+++ b/arch/x86/kernel/task_size_64.c
@@ -0,0 +1,9 @@
+#include 
+#include 
+#include 
+
+unsigned long _task_size(void)
+{
+   return test_thread_flag(TIF_ADDR32) ? IA32_PAGE_OFFSET : TASK_SIZE_MAX;
+}
+EXPORT_SYMBOL(_task_size);

Re: Reasons for oldconfig resetting config options to =m

2019-04-21 Thread Niklas Hambüchen

On 2019-04-21 17:55, Randy Dunlap wrote:
> When using 'make menuconfig', enter /usb_storage and it says:
> ...
>   │   Depends on: USB_SUPPORT [=y] && USB [=m] && SCSI [=m]
> 
> HTH.  There is nothing quite as explicit as your example output.

Hmm OK.

The menuconfig approach is what I've used so far manually, but it does not work 
so well for batch / CI use.

Thanks nevertheless for the detail explanation!

[PATCH] exec: move struct linux_binprm::buf

2019-04-21 Thread Alexey Dobriyan

struct linux_binprm::buf is the first field and it is exactly 128 bytes
in size. It means that on x86_64 all accesses to other fields will go
though [r64 + disp32] addressing mode which is 3 bytes bloatier than
[r64 + disp8] addressing mode. Given that accesses to other fields
outnumber accesses to ->buf, move it down.

Space savings (x86_64 defconfig):
more on distro configs because LSMs actively dereference "bprm"
but do not care about first 128 bytes of the executable itself.

add/remove: 0/0 grow/shrink: 0/24 up/down: 0/-492 (-492)
Function old new   delta
selinux_bprm_committing_creds552 549  -3
finalize_exec 94  91  -3
__audit_log_bprm_fcaps   283 280  -3
__audit_bprm  39  36  -3
perf_trace_sched_process_exec347 341  -6
install_exec_creds   105  99  -6
cap_bprm_set_creds.cold   60  54  -6
would_dump   137 128  -9
load_script  637 628  -9
bprm_change_interp61  52  -9
trace_event_raw_event_sched_process_exec 260 250 -10
search_binary_handler255 240 -15
remove_arg_zero  295 277 -18
free_bprm119 101 -18
prepare_binprm   379 360 -19
setup_new_exec   336 315 -21
flush_old_exec  16381617 -21
copy_strings.isra746 724 -22
setup_arg_pages  559 530 -29
load_misc_binary11511118 -33
selinux_bprm_set_creds   792 753 -39
load_elf_binary1   11072 -39
cap_bprm_set_creds  14961454 -42
__do_execve_file.isra   23952286-109

Signed-off-by: Alexey Dobriyan 
---

 include/linux/binfmts.h |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -15,7 +15,6 @@ struct filename;
  * This structure is used to hold the arguments that are used when loading 
binaries.
  */
 struct linux_binprm {
-   char buf[BINPRM_BUF_SIZE];
 #ifdef CONFIG_MMU
struct vm_area_struct *vma;
unsigned long vma_pages;
@@ -64,6 +63,8 @@ struct linux_binprm {
unsigned long loader, exec;
 
struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
+
+   char buf[BINPRM_BUF_SIZE];
 } __randomize_layout;
 
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0

Re: linux-next: build failure after merge of the akpm-current tree

2019-04-21 Thread Alexey Dobriyan

On Thu, Apr 18, 2019 at 09:02:47AM +1000, Stephen Rothwell wrote:
> Hi Kees,
> 
> On Wed, 17 Apr 2019 17:28:39 -0500 Kees Cook  wrote:
> >
> > On Wed, Apr 17, 2019 at 5:22 PM Kees Cook  wrote:
> > >
> > > On Wed, Apr 17, 2019 at 1:53 AM Stephen Rothwell  
> > > wrote:  
> > > >
> > > > Hi Andrew,
> > > >
> > > > After merging the akpm-current tree, today's linux-next build (arm
> > > > multi_v7_defconfig) failed like this:
> > > >
> > > > fs/binfmt_elf.c: In function 'load_elf_binary':
> > > > fs/binfmt_elf.c:1140:7: error: 'elf_interpreter' undeclared (first use 
> > > > in this function); did you mean 'interpreter'?
> > > >   if (!elf_interpreter)
> > > >^~~
> > > >interpreter  
> > >
> > > static int load_elf_binary(struct linux_binprm *bprm)
> > > {
> > > ...
> > > char * elf_interpreter = NULL;
> > >
> > > This is _absolutely_ a valid variable.  
> 
> It was. However commit a34f642bccf1 from Andrew's tree changes its scope.
> 
> So there is nothing wrong with commit 3ebf0dd657ce, it is the incorrect
> rebase of it on top of a34f642bccf1 that causes the build problem.
> 
> > > > Caused by commit
> > > >
> > > >   3ebf0dd657ce ("fs/binfmt_elf.c: move brk out of mmap when doing 
> > > > direct loader exec")
> > > >
> > > > interacting with commit
> > > >
> > > >   a34f642bccf1 ("fs/binfmt_elf.c: free PT_INTERP filename ASAP")
> > > >
> > > > I have applied the following patch for today.
> > > >
> > > > From: Stephen Rothwell 
> > > > Date: Wed, 17 Apr 2019 16:48:29 +1000
> > > > Subject: [PATCH] fix "fs/binfmt_elf.c: move brk out of mmap when doing 
> > > > direct loader exec"
> > > >
> > > > Signed-off-by: Stephen Rothwell 
> > > > ---
> > > >  fs/binfmt_elf.c | 2 +-
> > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > >
> > > > diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> > > > index b3bbe6bca499..fe5668a1bbaa 100644
> > > > --- a/fs/binfmt_elf.c
> > > > +++ b/fs/binfmt_elf.c
> > > > @@ -1137,7 +1137,7 @@ static int load_elf_binary(struct linux_binprm 
> > > > *bprm)
> > > >  * collide early with the stack growing down), and into the 
> > > > unused
> > > >  * ELF_ET_DYN_BASE region.
> > > >  */
> > > > -   if (!elf_interpreter)
> > > > +   if (!interpreter)  
> > >
> > > No, this is very wrong and will, I think, cause all PIE binaries to fail 
> > > to run.  
> > 
> > I may be wrong: I think this will cause all static binaries to see
> > their brk moved very unexpectedly. All static PIE binaries will fail?
> 
> Are you sure that elf_interpreter == NULL is not equivalent to
> interpreter == NULL by this point in the code?  Earlier if
> elf_intpreter is not NULL, we have set interpreter (using open_exec)
> and errored out if that fails.

My patch was done based on this very observation: if interpreter has been
opened then its filename has been allocated before otherwise how do you
know which interpreter to open? Just like with pathname resolution, once
lookup is done and inode has been fished out, pathname becomes irrelevant.

[PATCH] staging: rtl8188eu: cleanup indenting issue in odm.c

2019-04-21 Thread Michael Straube

Cleanup indenting issue reported by checkpatch.
WARNING: suspect code indent for conditional statements (8, 17)

Signed-off-by: Michael Straube 
---
 drivers/staging/rtl8188eu/hal/odm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8188eu/hal/odm.c 
b/drivers/staging/rtl8188eu/hal/odm.c
index 95a8fc23e62c..74f7c9c81bf6 100644
--- a/drivers/staging/rtl8188eu/hal/odm.c
+++ b/drivers/staging/rtl8188eu/hal/odm.c
@@ -1096,7 +1096,7 @@ void odm_EdcaTurboCheckCE(struct odm_dm_struct *pDM_Odm)
} else {
/*  Turn Off EDCA turbo here. */
/*  Restore original EDCA according to the declaration of AP. */
-if (pDM_Odm->DM_EDCA_Table.bCurrentTurboEDCA) {
+   if (pDM_Odm->DM_EDCA_Table.bCurrentTurboEDCA) {
usb_write32(Adapter, REG_EDCA_BE_PARAM,
Adapter->HalData->AcParam_BE);
pDM_Odm->DM_EDCA_Table.bCurrentTurboEDCA = false;
-- 
2.21.0

Re: [PATCH] reiserfs: Force type conversion in xattr_hash

2019-04-21 Thread Al Viro

On Thu, Apr 18, 2019 at 03:50:19PM -0700, Andrew Morton wrote:
> On Wed, 17 Apr 2019 17:22:00 +0530 Bharath Vedartham  
> wrote:
> 
> > This patch fixes the sparse warning:
> > 
> > fs/reiserfs//xattr.c:453:28: warning: incorrect type in return
> > expression (different base types)
> > fs/reiserfs//xattr.c:453:28:expected unsigned int
> > fs/reiserfs//xattr.c:453:28:got restricted __wsum
> > fs/reiserfs//xattr.c:453:28: warning: incorrect type in return
> > expression (different base types)
> > fs/reiserfs//xattr.c:453:28:expected unsigned int
> > fs/reiserfs//xattr.c:453:28:got restricted __wsum
> > 
> > csum_partial returns restricted integer __wsum whereas xattr_hash
> > expects a return type of __u32.
> > 
> > ...
> >
> > --- a/fs/reiserfs/xattr.c
> > +++ b/fs/reiserfs/xattr.c
> > @@ -450,7 +450,7 @@ static struct page *reiserfs_get_page(struct inode 
> > *dir, size_t n)
> >  
> >  static inline __u32 xattr_hash(const char *msg, int len)
> >  {
> > -   return csum_partial(msg, len, 0);
> > +   return (__force __u32)csum_partial(msg, len, 0);
> >  }
> >  
> >  int reiserfs_commit_write(struct file *f, struct page *page,
> 
> hm.  Conversion from int to __u32 should be OK - why is sparse being so
> picky here?

Because csum_partial() returns __wsum_t, not int.

> Why is the __force needed, btw?

So that accidental mixing of those csums (both 16bit and 32bit) with
host- or net-endian would be caught.

And I'm not at all sure reiserfs xattr_hash() doesn't bugger it up, actually.

Recall that 16bit inet csum is the sum of 16bit words (treated as host-endian)
modulo 0x, i.e. the entire buffer interpreted as host-endian integer
taken modulo 0x.  That has a lovely property - memory representation
of that value is the same whether we'd done calculations on b-e or l-e
host; the reason is that modulo 65535 byteswap is the same as multiplying
by 256, so the sum of byteswapped 16bit values modulo 65535 is byteswapped
sum of original values.

csum_partial() is sum of 32bit words (treated as host-endian) modulo 0x,
i.e. the entire buffer treated as host-endian number modulo 0x.
It is convenient when we want to calculate the 16bit csum - 0x is
a multiple of 0x, so residue modulo 0x determines the residue
modulo 0x; that's what csum_fold() is.

However, result of csum_partial() on big- and little-endian hosts
does *not* have the same property.  Consider e.g. an array {0, 0, 0, 128,
0, 0, 0, 128}.  csum_partial of that on l-e will be (2^31 + 2^31)mod(2^32 - 1),
i.e. 1, with {1, 0, 0, 0} as memory representation.  16bit csum will
again be 1, with {1, 0} as memory representation.  On big-endian we
get (128 + 128)mod(2^32 - 1), i.e. 256, with {0, 0, 1, 0} as memory
representation.  16bit csum is again 256, stored as {1, 0}, i.e.
the same as if we'd done everything on l-e; however, raw csum_partial()
values have different memory representations.  They certainly are
different as host-endian (and so are 16bit csums).

Reiserfs takes csum_partial() on buffer, interprets it as host-endian
and stores it little-endian on disk.  When fetching those it does
the same calculation and fails on mismatch.  However, if the
store had been done on little-endian host and load - on big-endian
one we *will* get mismatch almost all the time.  Treating ->rx_hash
as __wsum_t (and not doing that cpu_to_le32()) would lower the
frequency of mismatches, but still would be broken.  Storing
a 16bit csum (declared as __sum16_t, again, without cpu_to_le...())
would be endian-safe, but that's not what reiserfs folks wanted
(16 bits of csum instead of 32, for starters).

IOW, what sparse has caught here is a genuine endianness bug; images
created on little-endian host and mounted on big-endian (or vice
versa) will see csum mismatches when trying to fetch xattrs.
Broken since
commit 0b1a6a8ca8a78c2e068b04acf97479ee89a024ac
Author: Andrew Morton 
Date:   Sun May 9 23:59:13 2004 -0700

[PATCH] reiserfs: xattr support

From: Chris Mason 

From: je...@suse.com

reiserfs support for xattrs

ISTR some discussions of reiserfs layout endianness problems, but
that had been many years ago and I could be wrong; I _think_
the conclusion had been "it sucks, but we can't do anything
without breaking existing filesystem images".  Not sure if that
was the same bug or something different, though.

Build regressions/improvements in v5.1-rc4

2019-04-21 Thread Geert Uytterhoeven

Below is the list of build error/warning regressions/improvements in
v5.1-rc4[1] compared to v5.0[2].

Summarized:
  - build errors: +1/-1
  - build warnings: +117/-109

JFYI, when comparing v5.1-rc4[1] to v5.1-rc3[3], the summaries are:
  - build errors: +1/-0
  - build warnings: +60/-85

Happy fixing! ;-)

Thanks to the linux-next team for providing the build service.

[1] 
http://kisskb.ellerman.id.au/kisskb/branch/linus/head/15ade5d2e7775667cf191cf2f94327a4889f8b9d/
 (all 236 configs)
[2] 
http://kisskb.ellerman.id.au/kisskb/branch/linus/head/1c163f4c7b3f621efff9b28a47abb36f7378d783/
 (all 236 configs)
[3] 
http://kisskb.ellerman.id.au/kisskb/branch/linus/head/79a3aaa7b82e3106be97842dedfd8429248896e6/
 (all 236 configs)


*** ERRORS ***

1 error regressions:
  + error: arch/sh/kernel/cpu/sh2/clock-sh7619.o: undefined reference to 
`followparent_recalc':  => .data+0x70)

1 error improvements:
  - error: ene_ub6250.c: relocation truncated to fit: R_NDS32_9_PCREL_RELA 
against `.text': (.text+0x348) => 


*** WARNINGS ***

117 warning regressions:
  + /kisskb/src/arch/arm/include/asm/uaccess.h: warning: 'old_fs' may be used 
uninitialized in this function [-Wuninitialized]:  => 70:36
  + /kisskb/src/arch/arm/mm/init.c: warning: unused variable 'dtcm_end' 
[-Wunused-variable]:  => 470:13
  + /kisskb/src/arch/arm/mm/init.c: warning: unused variable 'itcm_end' 
[-Wunused-variable]:  => 471:13
  + /kisskb/src/arch/s390/kernel/perf_cpum_cf_diag.c: warning: 
'cf_diag_push_sample' uses dynamic stack allocation [enabled by default]:  => 
514:1
  + /kisskb/src/arch/xtensa/mm/mmu.c: warning: format '%zu' expects argument of 
type 'size_t', but argument 3 has type 'long unsigned int' [-Wformat=]:  => 36:9
  + /kisskb/src/drivers/clk/ti/clk.c: warning: 'ret' may be used uninitialized 
in this function [-Wuninitialized]:  => 578:2
  + /kisskb/src/drivers/clocksource/timer-ti-dm.c: warning: 
'omap_dm_timer_set_load_start' defined but not used [-Wunused-function]:  => 
589:12
  + /kisskb/src/drivers/gpu/drm/arm/display/komeda/komeda_dev.c: warning: 'ret' 
may be used uninitialized in this function [-Wuninitialized]:  => 145:5
  + /kisskb/src/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c: warning: (near 
initialization for 'opts.mipi_dphy') [-Wmissing-braces]:  => 620:8
  + /kisskb/src/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c: warning: missing braces 
around initializer [-Wmissing-braces]:  => 620:8
  + /kisskb/src/drivers/i2c/busses/i2c-sh_mobile.c: warning: 'data' may be used 
uninitialized in this function [-Wmaybe-uninitialized]:  => 399:26
  + /kisskb/src/drivers/i2c/busses/i2c-sh_mobile.c: warning: 'data' may be used 
uninitialized in this function [-Wuninitialized]:  => 399:26
  + /kisskb/src/drivers/iio/adc/ad7606_par.c: warning: unused variable 'st' 
[-Wunused-variable]:  => 36:23, 21:23
  + /kisskb/src/drivers/iio/imu/bmi160/bmi160_core.c: warning: 'int_latch_mask' 
may be used uninitialized in this function [-Wuninitialized]:  => 599:29
  + /kisskb/src/drivers/iio/imu/bmi160/bmi160_core.c: warning: 'int_map_mask' 
may be used uninitialized in this function [-Wuninitialized]:  => 606:29
  + /kisskb/src/drivers/iio/imu/bmi160/bmi160_core.c: warning: 
'int_out_ctrl_shift' may be used uninitialized in this function 
[-Wuninitialized]:  => 577:47
  + /kisskb/src/drivers/iio/imu/bmi160/bmi160_core.c: warning: 'pin_name' may 
be used uninitialized in this function [-Wuninitialized]:  => 618:3
  + /kisskb/src/drivers/mtd/ubi/wl.c: warning: 'err' may be used uninitialized 
in this function [-Wuninitialized]:  => 1520:19
  + /kisskb/src/fs/ocfs2/alloc.c: warning: 'first_bit' may be used 
uninitialized in this function [-Wuninitialized]:  => 7604:17
  + /kisskb/src/init/main.c: warning: format '%zu' expects argument of type 
'size_t', but argument 3 has type '__kernel_size_t {aka unsigned int}' 
[-Wformat=]:  => 787:37
  + /kisskb/src/init/main.c: warning: format '%zu' expects argument of type 
'size_t', but argument 3 has type 'unsigned int' [-Wformat=]:  => 384:35, 
388:35, 380:35
  + /kisskb/src/kernel/events/core.c: warning: 'perf_event_bpf_output' uses 
dynamic stack allocation [enabled by default]:  => 7825:1
  + /kisskb/src/kernel/events/core.c: warning: 'perf_event_ksymbol_output' uses 
dynamic stack allocation [enabled by default]:  => 7736:1
  + /kisskb/src/kernel/rcu/srcutree.c: warning: 'levelspread[]' may be 
used uninitialized in this function [-Wuninitialized]:  => 140:32
  + /kisskb/src/kernel/rcu/srcutree.c: warning: 'levelspread[]' may be 
used uninitialized in this function [-Wuninitialized]:  => 121:34
  + /kisskb/src/kernel/rcu/srcutree.c: warning: 'levelspread[]' may be 
used uninitialized in this function [-Wuninitialized]:  => 121:34
  + /kisskb/src/kernel/rcu/srcutree.c: warning: 'levelspread[]' may be 
used uninitialized in this function [-Wuninitialized]:  => 121:34
  + /kisskb/src/kernel/rcu/srcutree.c: warning: 'levelspread[]' may be 
used uninitialized in this function [-Wuninitialized]:  => 121:34

Re: [PATCH 2/3] ARM: mvebu: drop return from void function

2019-04-21 Thread Gregory CLEMENT

Hi Nicholas,

> The return statement is unnecessary here - so drop it.
>
> Signed-off-by: Nicholas Mc Guire 

Applied on mvebu/arm

Thanks,

Gregory

> ---
>
> Problem reported by checkpatch
>
> WARNING: void function return statements are not generally useful
> #141: FILE: arch/arm/mach-mvebu/board-v7.c:141:
> +   return;
> +}
>
> Patch was compile-tested: mvebu_v7_defconfig (implies MACH_MVEBU_ANY=y)
> (with some unrelated sparse warnings about missing syscalls)
>
> Patch is against 5.1-rc4 (localversion-next is 20190412)
>
>  arch/arm/mach-mvebu/board-v7.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/arch/arm/mach-mvebu/board-v7.c b/arch/arm/mach-mvebu/board-v7.c
> index 37f8cb6..28fd256 100644
> --- a/arch/arm/mach-mvebu/board-v7.c
> +++ b/arch/arm/mach-mvebu/board-v7.c
> @@ -137,7 +137,6 @@ static void __init i2c_quirk(void)
>  
>   of_update_property(np, new_compat);
>   }
> - return;
>  }
>  
>  static void __init mvebu_dt_init(void)
> -- 
> 2.1.4
>

-- 
Gregory Clement, Bootlin
Embedded Linux and Kernel engineering
http://bootlin.com

Re: [PATCH v3 4/4] ARM: mvebu: prefix coprocessor operand with p

2019-04-21 Thread Gregory CLEMENT

Hi Stefan,

> In every other instance where mrc is used the coprocessor operand
> is prefix with p (e.g. p15). Use the p prefix in this case too.
> This fixes a build issue when using LLVM's integrated assembler:
>   arch/arm/mach-mvebu/coherency_ll.S:69:6: error: invalid operand for 
> instruction
>mrc 15, 0, r3, cr0, cr0, 5
>^
>   arch/arm/mach-mvebu/pmsu_ll.S:19:6: error: invalid operand for instruction
>mrc 15, 0, r0, cr0, cr0, 5 @ get the CPU ID
>^
>
> Signed-off-by: Stefan Agner 
> Acked-by: Nicolas Pitre 

Applied on mvebu/arm, as well as the previous patch.

Thanks,

Gregory

> ---
>  arch/arm/mach-mvebu/coherency_ll.S | 2 +-
>  arch/arm/mach-mvebu/pmsu_ll.S  | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm/mach-mvebu/coherency_ll.S 
> b/arch/arm/mach-mvebu/coherency_ll.S
> index 8b2fbc8b6bc6..2d962fe48821 100644
> --- a/arch/arm/mach-mvebu/coherency_ll.S
> +++ b/arch/arm/mach-mvebu/coherency_ll.S
> @@ -66,7 +66,7 @@ ENDPROC(ll_get_coherency_base)
>   * fabric registers
>   */
>  ENTRY(ll_get_coherency_cpumask)
> - mrc 15, 0, r3, cr0, cr0, 5
> + mrc p15, 0, r3, cr0, cr0, 5
>   and r3, r3, #15
>   mov r2, #(1 << 24)
>   lsl r3, r2, r3
> diff --git a/arch/arm/mach-mvebu/pmsu_ll.S b/arch/arm/mach-mvebu/pmsu_ll.S
> index c1fb713e9306..7aae9a25cfeb 100644
> --- a/arch/arm/mach-mvebu/pmsu_ll.S
> +++ b/arch/arm/mach-mvebu/pmsu_ll.S
> @@ -16,7 +16,7 @@
>  ENTRY(armada_38x_scu_power_up)
>   mrc p15, 4, r1, c15, c0 @ get SCU base address
>   orr r1, r1, #0x8@ SCU CPU Power Status Register
> - mrc 15, 0, r0, cr0, cr0, 5  @ get the CPU ID
> + mrc p15, 0, r0, cr0, cr0, 5 @ get the CPU ID
>   and r0, r0, #15
>   add r1, r1, r0
>   mov r0, #0x0
> -- 
> 2.21.0
>

-- 
Gregory Clement, Bootlin
Embedded Linux and Kernel engineering
http://bootlin.com

Re: [PATCH v2 1/3] ARM: dts: armada-38x: add interrupts for watchdog

2019-04-21 Thread Gregory CLEMENT

Hi Chris,

> The first interrupt is for the regular watchdog timeout. Normally the
> RSTOUT line will trigger a reset before this interrupt fires but on
> systems with a non-standard reset it may still trigger.
>
> The second interrupt is for a timer1 which is used as a pre-timeout for
> the watchdog.
>
> Signed-off-by: Chris Packham 
> ---
> Changes in v2:
> - new, split out from "watchdog: orion_wdt: use timer1 as a pretimeout"
>
>  arch/arm/boot/dts/armada-38x.dtsi | 2 ++
>  1 file changed, 2 insertions(+)
>

Applied on mvebu/dt

Thanks,

Gregory


> diff --git a/arch/arm/boot/dts/armada-38x.dtsi 
> b/arch/arm/boot/dts/armada-38x.dtsi
> index 929459c42760..fc550c640ca8 100644
> --- a/arch/arm/boot/dts/armada-38x.dtsi
> +++ b/arch/arm/boot/dts/armada-38x.dtsi
> @@ -376,6 +376,8 @@
>   reg = <0x20300 0x34>, <0x20704 0x4>, <0x18260 
> 0x4>;
>   clocks = < 2>, <>;
>   clock-names = "nbclk", "fixed";
> + interrupts-extended = < GIC_SPI 64 
> IRQ_TYPE_LEVEL_HIGH>,
> +   < GIC_SPI  9 
> IRQ_TYPE_LEVEL_HIGH>;
>   };
>  
>   cpurst: cpurst@20800 {
> -- 
> 2.21.0
>

-- 
Gregory Clement, Bootlin
Embedded Linux and Kernel engineering
http://bootlin.com

Re: [PATCH 2/4] ARM: mvebu: fix a leaked reference by adding missing of_node_put

2019-04-21 Thread Gregory CLEMENT

Hi Wen Yang,

> The call to of_get_next_child returns a node pointer with refcount
> incremented thus it must be explicitly decremented after the last
> usage.
>
> Detected by coccinelle with the following warnings:
> ./arch/arm/mach-mvebu/pm-board.c:135:2-8: ERROR: missing of_node_put; 
> acquired a node pointer with refcount incremented on line 88, but without a 
> corresponding object release within this functio
>
> Signed-off-by: Wen Yang 
> Cc: Jason Cooper 
> Cc: Andrew Lunn 
> Cc: Gregory Clement 
> Cc: Sebastian Hesselbarth 
> Cc: Russell King 
> Cc: linux-arm-ker...@lists.infradead.org
> Cc: linux-kernel@vger.kernel.org

Applied on mvebu/arm

Thanks,

Gregory

> ---
>  arch/arm/mach-mvebu/pm-board.c | 11 ---
>  1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm/mach-mvebu/pm-board.c b/arch/arm/mach-mvebu/pm-board.c
> index db17121..1e2c17c 100644
> --- a/arch/arm/mach-mvebu/pm-board.c
> +++ b/arch/arm/mach-mvebu/pm-board.c
> @@ -79,7 +79,7 @@ static void mvebu_armada_pm_enter(void __iomem *sdram_reg, 
> u32 srcmd)
>  static int __init mvebu_armada_pm_init(void)
>  {
>   struct device_node *np;
> - struct device_node *gpio_ctrl_np;
> + struct device_node *gpio_ctrl_np = NULL;
>   int ret = 0, i;
>  
>   if (!of_machine_is_compatible("marvell,axp-gp"))
> @@ -126,18 +126,23 @@ static int __init mvebu_armada_pm_init(void)
>   goto out;
>   }
>  
> + if(gpio_ctrl_np)
> + of_node_put(gpio_ctrl_np);
>   gpio_ctrl_np = args.np;
>   pic_raw_gpios[i] = args.args[0];
>   }
>  
>   gpio_ctrl = of_iomap(gpio_ctrl_np, 0);
> - if (!gpio_ctrl)
> - return -ENOMEM;
> + if (!gpio_ctrl) {
> + ret = -ENOMEM;
> + goto out;
> + }
>  
>   mvebu_pm_suspend_init(mvebu_armada_pm_enter);
>  
>  out:
>   of_node_put(np);
> + of_node_put(gpio_ctrl_np);
>   return ret;
>  }
>  
> -- 
> 2.9.5
>

-- 
Gregory Clement, Bootlin
Embedded Linux and Kernel engineering
http://bootlin.com

Re: [PATCH] ARM: mvebu: kirkwood: remove error message when retrieving mac address

2019-04-21 Thread Gregory CLEMENT

Hi Chris,

> On 20/02/19 4:14 AM, Gregory CLEMENT wrote:
>> Hi Chris,
>>   
>>   On lun., févr. 18 2019, Chris Packham  
>> wrote:
>> 
>>> Kirkwood has always had the ability to retrieve the local-mac-address
>>> from the hardware (usually this was configured by the bootloader). This
>>> is particularly useful when dealing with a legacy non-DT aware
>>> bootloader.
>>>
>>> The "error" message just indicated that the board used an old bootloader
>>> and in many cases users can't do anything about this. The message
>>> probably should have been pr_info() to inform the user that the kernel
>>> has been helpful but rather than than let's remove it entirely to make
>>> the kernel less noisy.
>>>
>>> Signed-off-by: Chris Packham 
>> 
>> I'm OK with this patch, however as it is not a fix, it's too late for
>> 5.1. I will apply it on mvebu/arm for 5.2 once 5.1-rc1 will be released.
>> 
>
> No problem with that. We have a local fork I'll cherry pick it into once 
> it hits linux-mvebu.
>

Applied on mvebu/arm

Thanks,

Gregory


>> Thanks,
>> 
>> gregory
>> 
>>> ---
>>>   arch/arm/mach-mvebu/kirkwood.c | 2 --
>>>   1 file changed, 2 deletions(-)
>>>
>>> diff --git a/arch/arm/mach-mvebu/kirkwood.c b/arch/arm/mach-mvebu/kirkwood.c
>>> index 0aa88105d46e..bf3ff0f580c2 100644
>>> --- a/arch/arm/mach-mvebu/kirkwood.c
>>> +++ b/arch/arm/mach-mvebu/kirkwood.c
>>> @@ -107,8 +107,6 @@ static void __init kirkwood_dt_eth_fixup(void)
>>> clk_prepare_enable(clk);
>>>   
>>> /* store MAC address register contents in local-mac-address */
>>> -   pr_err(FW_INFO "%pOF: local-mac-address is not set\n", np);
>>> -
>>> pmac = kzalloc(sizeof(*pmac) + 6, GFP_KERNEL);
>>> if (!pmac)
>>> goto eth_fixup_no_mem;
>>> -- 
>>> 2.20.1
>>>
>> 
>

-- 
Gregory Clement, Bootlin
Embedded Linux and Kernel engineering
http://bootlin.com

Re: Build regressions/improvements in v5.1-rc4

2019-04-21 Thread Geert Uytterhoeven

On Sun, Apr 21, 2019 at 7:15 PM Geert Uytterhoeven  wrote:
> JFYI, when comparing v5.1-rc4[1] to v5.1-rc3[3], the summaries are:
>   - build errors: +1/-0

  + error: arch/sh/kernel/cpu/sh2/clock-sh7619.o: undefined reference
to `followparent_recalc':  => .data+0x70)

sh4/sh-all{mod,yes}config (patch available)

> [1] 
> http://kisskb.ellerman.id.au/kisskb/branch/linus/head/15ade5d2e7775667cf191cf2f94327a4889f8b9d/
>  (all 236 configs)
> [3] 
> http://kisskb.ellerman.id.au/kisskb/branch/linus/head/79a3aaa7b82e3106be97842dedfd8429248896e6/
>  (all 236 configs)

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

Build regressions/improvements in v5.1-rc5

2019-04-21 Thread Geert Uytterhoeven

Below is the list of build error/warning regressions/improvements in
v5.1-rc5[1] compared to v5.0[2].

Summarized:
  - build errors: +1/-1
  - build warnings: +139/-107

JFYI, when comparing v5.1-rc5[1] to v5.1-rc4[3], the summaries are:
  - build errors: +0/-0
  - build warnings: +86/-62

Happy fixing! ;-)

Thanks to the linux-next team for providing the build service.

[1] 
http://kisskb.ellerman.id.au/kisskb/branch/linus/head/dc4060a5dc2557e6b5aa813bf5b73677299d62d2/
 (all 236 configs)
[2] 
http://kisskb.ellerman.id.au/kisskb/branch/linus/head/1c163f4c7b3f621efff9b28a47abb36f7378d783/
 (all 236 configs)
[3] 
http://kisskb.ellerman.id.au/kisskb/branch/linus/head/15ade5d2e7775667cf191cf2f94327a4889f8b9d/
 (all 236 configs)


*** ERRORS ***

1 error regressions:
  + error: arch/sh/kernel/cpu/sh2/clock-sh7619.o: undefined reference to 
`followparent_recalc':  => .data+0x70)

1 error improvements:
  - error: ene_ub6250.c: relocation truncated to fit: R_NDS32_9_PCREL_RELA 
against `.text': (.text+0x348) => 


*** WARNINGS ***

139 warning regressions:
  + /kisskb/src/arch/arm/include/asm/uaccess.h: warning: 'old_fs' may be used 
uninitialized in this function [-Wuninitialized]:  => 70:36
  + /kisskb/src/arch/arm/mm/init.c: warning: unused variable 'dtcm_end' 
[-Wunused-variable]:  => 470:13
  + /kisskb/src/arch/arm/mm/init.c: warning: unused variable 'itcm_end' 
[-Wunused-variable]:  => 471:13
  + /kisskb/src/arch/s390/kernel/perf_cpum_cf_diag.c: warning: 
'cf_diag_push_sample' uses dynamic stack allocation [enabled by default]:  => 
514:1
  + /kisskb/src/drivers/clk/ti/clk.c: warning: 'ret' may be used uninitialized 
in this function [-Wuninitialized]:  => 578:2
  + /kisskb/src/drivers/clocksource/timer-ti-dm.c: warning: 
'omap_dm_timer_set_load_start' defined but not used [-Wunused-function]:  => 
589:12
  + /kisskb/src/drivers/gpu/drm/arm/display/komeda/komeda_dev.c: warning: 'ret' 
may be used uninitialized in this function [-Wuninitialized]:  => 145:5
  + /kisskb/src/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c: warning: (near 
initialization for 'opts.mipi_dphy') [-Wmissing-braces]:  => 620:8
  + /kisskb/src/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c: warning: missing braces 
around initializer [-Wmissing-braces]:  => 620:8
  + /kisskb/src/drivers/i2c/busses/i2c-sh_mobile.c: warning: 'data' may be used 
uninitialized in this function [-Wmaybe-uninitialized]:  => 399:26
  + /kisskb/src/drivers/i2c/busses/i2c-sh_mobile.c: warning: 'data' may be used 
uninitialized in this function [-Wuninitialized]:  => 399:26
  + /kisskb/src/drivers/iio/adc/ad7606_par.c: warning: unused variable 'st' 
[-Wunused-variable]:  => 21:23, 36:23
  + /kisskb/src/drivers/iio/imu/bmi160/bmi160_core.c: warning: 'int_latch_mask' 
may be used uninitialized in this function [-Wuninitialized]:  => 599:29
  + /kisskb/src/drivers/iio/imu/bmi160/bmi160_core.c: warning: 'int_map_mask' 
may be used uninitialized in this function [-Wuninitialized]:  => 606:29
  + /kisskb/src/drivers/iio/imu/bmi160/bmi160_core.c: warning: 
'int_out_ctrl_shift' may be used uninitialized in this function 
[-Wuninitialized]:  => 577:47
  + /kisskb/src/drivers/iio/imu/bmi160/bmi160_core.c: warning: 'pin_name' may 
be used uninitialized in this function [-Wuninitialized]:  => 618:3
  + /kisskb/src/drivers/mtd/ubi/wl.c: warning: 'err' may be used uninitialized 
in this function [-Wuninitialized]:  => 1520:19
  + /kisskb/src/fs/ocfs2/alloc.c: warning: 'first_bit' may be used 
uninitialized in this function [-Wuninitialized]:  => 7604:17
  + /kisskb/src/init/main.c: warning: format '%zu' expects argument of type 
'size_t', but argument 3 has type '__kernel_size_t {aka unsigned int}' 
[-Wformat=]:  => 787:37
  + /kisskb/src/init/main.c: warning: format '%zu' expects argument of type 
'size_t', but argument 3 has type 'unsigned int' [-Wformat=]:  => 388:35, 
380:35, 384:35
  + /kisskb/src/kernel/events/core.c: warning: 'perf_event_bpf_output' uses 
dynamic stack allocation [enabled by default]:  => 7858:1
  + /kisskb/src/kernel/events/core.c: warning: 'perf_event_ksymbol_output' uses 
dynamic stack allocation [enabled by default]:  => 7769:1
  + /kisskb/src/kernel/rcu/srcutree.c: warning: 'levelspread[]' may be 
used uninitialized in this function [-Wuninitialized]:  => 140:32
  + /kisskb/src/kernel/rcu/srcutree.c: warning: 'levelspread[]' may be 
used uninitialized in this function [-Wuninitialized]:  => 121:34
  + /kisskb/src/kernel/rcu/srcutree.c: warning: 'levelspread[]' may be 
used uninitialized in this function [-Wuninitialized]:  => 121:34
  + /kisskb/src/kernel/rcu/srcutree.c: warning: 'levelspread[]' may be 
used uninitialized in this function [-Wuninitialized]:  => 121:34
  + /kisskb/src/kernel/rcu/srcutree.c: warning: 'levelspread[]' may be 
used uninitialized in this function [-Wuninitialized]:  => 140:32
  + /kisskb/src/kernel/rcu/srcutree.c: warning: 'levelspread[]' may be 
used uninitialized in this function [-Wuninitialized]:  => 121:34
  +

[PATCH v1 2/5] mfd: max77620: Support Maxim 77663

2019-04-21 Thread Dmitry Osipenko

Add support for Maxim 77663 using the Max77620 driver. The hardware
is very similar to Max77663/20024, although there are couple minor
differences.

Signed-off-by: Dmitry Osipenko 
---
 drivers/mfd/max77620.c   | 68 +++-
 include/linux/mfd/max77620.h |  1 +
 2 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/drivers/mfd/max77620.c b/drivers/mfd/max77620.c
index d8ddd1a6f304..3b6dded0595c 100644
--- a/drivers/mfd/max77620.c
+++ b/drivers/mfd/max77620.c
@@ -111,6 +111,25 @@ static const struct mfd_cell max20024_children[] = {
},
 };
 
+static const struct mfd_cell max77663_children[] = {
+   { .name = "max77620-clock", },
+   { .name = "max77663-pmic", },
+   { .name = "max77620-watchdog", },
+   {
+   .name = "max77620-gpio",
+   .resources = gpio_resources,
+   .num_resources = ARRAY_SIZE(gpio_resources),
+   }, {
+   .name = "max77620-rtc",
+   .resources = rtc_resources,
+   .num_resources = ARRAY_SIZE(rtc_resources),
+   }, {
+   .name = "max77663-power",
+   .resources = power_resources,
+   .num_resources = ARRAY_SIZE(power_resources),
+   },
+};
+
 static const struct regmap_range max77620_readable_ranges[] = {
regmap_reg_range(MAX77620_REG_CNFGGLBL1, MAX77620_REG_DVSSD4),
 };
@@ -171,6 +190,35 @@ static const struct regmap_config max20024_regmap_config = 
{
.volatile_table = _volatile_table,
 };
 
+static const struct regmap_range max77663_readable_ranges[] = {
+   regmap_reg_range(MAX77620_REG_CNFGGLBL1, MAX77620_REG_CID5),
+};
+
+static const struct regmap_access_table max77663_readable_table = {
+   .yes_ranges = max77663_readable_ranges,
+   .n_yes_ranges = ARRAY_SIZE(max77663_readable_ranges),
+};
+
+static const struct regmap_range max77663_writable_ranges[] = {
+   regmap_reg_range(MAX77620_REG_CNFGGLBL1, MAX77620_REG_CID5),
+};
+
+static const struct regmap_access_table max77663_writable_table = {
+   .yes_ranges = max77663_writable_ranges,
+   .n_yes_ranges = ARRAY_SIZE(max77663_writable_ranges),
+};
+
+static const struct regmap_config max77663_regmap_config = {
+   .name = "power-slave",
+   .reg_bits = 8,
+   .val_bits = 8,
+   .max_register = MAX77620_REG_CID5 + 1,
+   .cache_type = REGCACHE_RBTREE,
+   .rd_table = _readable_table,
+   .wr_table = _writable_table,
+   .volatile_table = _volatile_table,
+};
+
 /*
  * MAX77620 and MAX20024 has the following steps of the interrupt handling
  * for TOP interrupts:
@@ -237,6 +285,9 @@ static int max77620_get_fps_period_reg_value(struct 
max77620_chip *chip,
case MAX20024:
fps_min_period = MAX20024_FPS_PERIOD_MIN_US;
break;
+   case MAX77663:
+   fps_min_period = MAX20024_FPS_PERIOD_MIN_US;
+   break;
case MAX77620:
fps_min_period = MAX77620_FPS_PERIOD_MIN_US;
break;
@@ -274,6 +325,9 @@ static int max77620_config_fps(struct max77620_chip *chip,
case MAX77620:
fps_max_period = MAX77620_FPS_PERIOD_MAX_US;
break;
+   case MAX77663:
+   fps_max_period = MAX20024_FPS_PERIOD_MAX_US;
+   break;
default:
return -EINVAL;
}
@@ -375,6 +429,9 @@ static int max77620_initialise_fps(struct max77620_chip 
*chip)
}
 
 skip_fps:
+   if (chip->chip_id == MAX77663)
+   return 0;
+
/* Enable wake on EN0 pin */
ret = regmap_update_bits(chip->rmap, MAX77620_REG_ONOFFCNFG2,
 MAX77620_ONOFFCNFG2_WK_EN0,
@@ -453,6 +510,11 @@ static int max77620_probe(struct i2c_client *client,
n_mfd_cells = ARRAY_SIZE(max20024_children);
rmap_config = _regmap_config;
break;
+   case MAX77663:
+   mfd_cells = max77663_children;
+   n_mfd_cells = ARRAY_SIZE(max77663_children);
+   rmap_config = _regmap_config;
+   break;
default:
dev_err(chip->dev, "ChipID is invalid %d\n", chip->chip_id);
return -EINVAL;
@@ -546,6 +608,9 @@ static int max77620_i2c_suspend(struct device *dev)
return ret;
}
 
+   if (chip->chip_id == MAX77663)
+   goto out;
+
/* Disable WK_EN0 */
ret = regmap_update_bits(chip->rmap, MAX77620_REG_ONOFFCNFG2,
 MAX77620_ONOFFCNFG2_WK_EN0, 0);
@@ -581,7 +646,7 @@ static int max77620_i2c_resume(struct device *dev)
 * For MAX20024: No need to configure WKEN0 on resume as
 * it is configured on Init.
 */
-   if (chip->chip_id == MAX20024)
+   if (chip->chip_id == MAX20024 || chip->chip_id == MAX77663)
goto out;
 
/* Enable WK_EN0 */
@@ -603,6 +668,7 @@ static int

[PATCH v1 4/5] dt-bindings: mfd: max77620: Add compatible for Maxim 77663

2019-04-21 Thread Dmitry Osipenko

Maxim 77663 has a few minor differences in regards to hardware interface
and available capabilities by comparing it with 77620 and 20024 models,
hence re-use 77620 device-tree binding for the 77663.

Signed-off-by: Dmitry Osipenko 
---
 Documentation/devicetree/bindings/mfd/max77620.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mfd/max77620.txt 
b/Documentation/devicetree/bindings/mfd/max77620.txt
index 9c16d51cc15b..9058499051e0 100644
--- a/Documentation/devicetree/bindings/mfd/max77620.txt
+++ b/Documentation/devicetree/bindings/mfd/max77620.txt
@@ -4,7 +4,8 @@ Required properties:
 ---
 - compatible: Must be one of
"maxim,max77620"
-   "maxim,max20024".
+   "maxim,max20024"
+   "maxim,max77663".
 - reg: I2C device address.
 
 Optional properties:
@@ -105,6 +106,7 @@ Optional properties:
 Here supported time periods by device in microseconds are as follows:
 MAX77620 supports 40, 80, 160, 320, 640, 1280, 2560 and 5120 microseconds.
 MAX20024 supports 20, 40, 80, 160, 320, 640, 1280 and 2540 microseconds.
+MAX77663 supports 20, 40, 80, 160, 320, 640, 1280 and 2540 microseconds.
 
 -maxim,power-ok-control: configure map power ok bit
1: Enables POK(Power OK) to control nRST_IO and GPIO1
-- 
2.21.0

[PATCH v1 1/5] mfd: max77620: Fix swapped FPS_PERIOD_MAX_US values

2019-04-21 Thread Dmitry Osipenko

The FPS_PERIOD_MAX_US definitions are swapped for MAX20024 and MAX77620,
fix it.

Cc: stable 
Signed-off-by: Dmitry Osipenko 
---
 include/linux/mfd/max77620.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/mfd/max77620.h b/include/linux/mfd/max77620.h
index ad2a9a852aea..b4fd5a7c2aaa 100644
--- a/include/linux/mfd/max77620.h
+++ b/include/linux/mfd/max77620.h
@@ -136,8 +136,8 @@
 #define MAX77620_FPS_PERIOD_MIN_US 40
 #define MAX20024_FPS_PERIOD_MIN_US 20
 
-#define MAX77620_FPS_PERIOD_MAX_US 2560
-#define MAX20024_FPS_PERIOD_MAX_US 5120
+#define MAX20024_FPS_PERIOD_MAX_US 2560
+#define MAX77620_FPS_PERIOD_MAX_US 5120
 
 #define MAX77620_REG_FPS_GPIO1 0x54
 #define MAX77620_REG_FPS_GPIO2 0x55
-- 
2.21.0

[PATCH v1 0/5] Add support for Maxim 77663 MFD

2019-04-21 Thread Dmitry Osipenko

Hello,

This series adds support for the Maxim 77663 chip that provides PMIC, RTC,
GPIO and watchdog timer functionality. The hardware is very similar to the
Maxim 77620/20024 hardware units that are already supported by the kernel,
hence we will reuse the existing drivers for 77663. The GPIO, regulator,
RTC and watchdog timer functionality was tested on a Nexus 7 tablet that
has the Max77663 chip, everything is working perfectly fine. I'm looking
at upstreaming support for that tablet device and Max77663 is one of the
core components that are currently missing in the upstream kernel.

Dmitry Osipenko (5):
  mfd: max77620: Fix swapped FPS_PERIOD_MAX_US values
  mfd: max77620: Support Maxim 77663
  regulator: max77620: Support Maxim 77663
  dt-bindings: mfd: max77620: Add compatible for Maxim 77663
  mfd: max77620: Support device-tree properly

 .../devicetree/bindings/mfd/max77620.txt  |  4 +-
 drivers/mfd/max77620.c| 99 +--
 drivers/regulator/max77620-regulator.c| 26 -
 include/linux/mfd/max77620.h  |  5 +-
 4 files changed, 123 insertions(+), 11 deletions(-)

-- 
2.21.0

[PATCH v1 3/5] regulator: max77620: Support Maxim 77663

2019-04-21 Thread Dmitry Osipenko

Add support for Maxim 77663.

Signed-off-by: Dmitry Osipenko 
---
 drivers/regulator/max77620-regulator.c | 26 +-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/max77620-regulator.c 
b/drivers/regulator/max77620-regulator.c
index 1607ac673e44..7d6b53828056 100644
--- a/drivers/regulator/max77620-regulator.c
+++ b/drivers/regulator/max77620-regulator.c
@@ -761,6 +761,24 @@ static struct max77620_regulator_info 
max20024_regs_info[MAX77620_NUM_REGS] = {
RAIL_LDO(LDO8, ldo8, "in-ldo7-8", N, 80, 395, 5),
 };
 
+static struct max77620_regulator_info max77663_regs_info[MAX77620_NUM_REGS] = {
+   RAIL_SD(SD0, sd0, "in-sd0", SD0, 60, 3387500, 12500, 0xFF, NONE),
+   RAIL_SD(SD1, sd1, "in-sd1", SD1, 80, 1587500, 12500, 0xFF, NONE),
+   RAIL_SD(SD2, sd2, "in-sd2", SDX, 60, 3787500, 12500, 0xFF, NONE),
+   RAIL_SD(SD3, sd3, "in-sd3", SDX, 60, 3787500, 12500, 0xFF, NONE),
+   RAIL_SD(SD4, sd4, "in-sd4", SDX, 60, 3787500, 12500, 0xFF, NONE),
+
+   RAIL_LDO(LDO0, ldo0, "in-ldo0-1", N, 80, 2375000, 25000),
+   RAIL_LDO(LDO1, ldo1, "in-ldo0-1", N, 80, 2375000, 25000),
+   RAIL_LDO(LDO2, ldo2, "in-ldo2",   P, 80, 395, 5),
+   RAIL_LDO(LDO3, ldo3, "in-ldo3-5", P, 80, 395, 5),
+   RAIL_LDO(LDO4, ldo4, "in-ldo4-6", P, 80, 1587500, 12500),
+   RAIL_LDO(LDO5, ldo5, "in-ldo3-5", P, 80, 395, 5),
+   RAIL_LDO(LDO6, ldo6, "in-ldo4-6", P, 80, 395, 5),
+   RAIL_LDO(LDO7, ldo7, "in-ldo7-8", N, 80, 395, 5),
+   RAIL_LDO(LDO8, ldo8, "in-ldo7-8", N, 80, 395, 5),
+};
+
 static int max77620_regulator_probe(struct platform_device *pdev)
 {
struct max77620_chip *max77620_chip = dev_get_drvdata(pdev->dev.parent);
@@ -785,9 +803,14 @@ static int max77620_regulator_probe(struct platform_device 
*pdev)
case MAX77620:
rinfo = max77620_regs_info;
break;
-   default:
+   case MAX20024:
rinfo = max20024_regs_info;
break;
+   case MAX77663:
+   rinfo = max77663_regs_info;
+   break;
+   default:
+   return -EINVAL;
}
 
config.regmap = pmic->rmap;
@@ -881,6 +904,7 @@ static const struct dev_pm_ops max77620_regulator_pm_ops = {
 static const struct platform_device_id max77620_regulator_devtype[] = {
{ .name = "max77620-pmic", },
{ .name = "max20024-pmic", },
+   { .name = "max77663-pmic", },
{},
 };
 MODULE_DEVICE_TABLE(platform, max77620_regulator_devtype);
-- 
2.21.0

[PATCH v1 5/5] mfd: max77620: Support device-tree properly

2019-04-21 Thread Dmitry Osipenko

For some unknown reason the driver for Max77620 doesn't wire up the
device-tree support properly and nothing in kernel creates I2C device
for the driver (and never did), moreover device-tree files for NVIDIA
Tegra210/186/194 boards already have nodes for Max77620. Hence add the
missing of_match_table to make driver actually usable.

Signed-off-by: Dmitry Osipenko 
---
 drivers/mfd/max77620.c | 33 ++---
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/drivers/mfd/max77620.c b/drivers/mfd/max77620.c
index 3b6dded0595c..ef313604ca47 100644
--- a/drivers/mfd/max77620.c
+++ b/drivers/mfd/max77620.c
@@ -37,6 +37,10 @@
 #include 
 #include 
 
+struct max77620_desc {
+   enum max77620_chip_id chip_id;
+};
+
 static const struct resource gpio_resources[] = {
DEFINE_RES_IRQ(MAX77620_IRQ_TOP_GPIO),
 };
@@ -486,6 +490,7 @@ static int max77620_probe(struct i2c_client *client,
const struct regmap_config *rmap_config;
struct max77620_chip *chip;
const struct mfd_cell *mfd_cells;
+   const struct max77620_desc *desc;
int n_mfd_cells;
int ret;
 
@@ -493,11 +498,13 @@ static int max77620_probe(struct i2c_client *client,
if (!chip)
return -ENOMEM;
 
+   desc = of_device_get_match_data(>dev);
+
i2c_set_clientdata(client, chip);
chip->dev = >dev;
chip->irq_base = -1;
chip->chip_irq = client->irq;
-   chip->chip_id = (enum max77620_chip_id)id->driver_data;
+   chip->chip_id = desc->chip_id;
 
switch (chip->chip_id) {
case MAX77620:
@@ -665,11 +672,23 @@ static int max77620_i2c_resume(struct device *dev)
 }
 #endif
 
-static const struct i2c_device_id max77620_id[] = {
-   {"max77620", MAX77620},
-   {"max20024", MAX20024},
-   {"max77663", MAX77663},
-   {},
+static const struct max77620_desc max77620_desc = {
+   .chip_id = MAX77620,
+};
+
+static const struct max77620_desc max20024_desc = {
+   .chip_id = MAX20024,
+};
+
+static const struct max77620_desc max77663_desc = {
+   .chip_id = MAX77663,
+};
+
+static const struct of_device_id max77620_of_match[] = {
+   { .compatible = "maxim,max77620", .data = _desc },
+   { .compatible = "maxim,max20024", .data = _desc },
+   { .compatible = "maxim,max77663", .data = _desc },
+   { },
 };
 
 static const struct dev_pm_ops max77620_pm_ops = {
@@ -680,8 +699,8 @@ static struct i2c_driver max77620_driver = {
.driver = {
.name = "max77620",
.pm = _pm_ops,
+   .of_match_table = of_match_ptr(max77620_of_match),
},
.probe = max77620_probe,
-   .id_table = max77620_id,
 };
 builtin_i2c_driver(max77620_driver);
-- 
2.21.0

Re: [PATCH] i2c: mux: demux-pinctrl: use struct_size() in devm_kzalloc()

2019-04-21 Thread Peter Rosin

On 2019-01-04 19:06, Wolfram Sang wrote:
> 
>> Instead of leaving these open-coded and prone to type mistakes, we can
>> now use the new struct_size() helper:
> 
> I am okay with this patch in general. However, I think the name of the
> helper 'struct_size' is a bit unfortunate. I really wondered on the
> first read why it needs 3 arguments to find out a struct size. I only
> understood on the second read. I think 'trailing_array_struct_size'
> would be way more clearer what it does.

I read that as an Acked-by... :-)

Cheers,
Peter

Linux v5.1-rc6

2019-04-21 Thread Linus Torvalds

It's Easter Sunday here, but I don't let little things like random
major religious holidays interrupt my kernel development workflow. The
occasional scuba trip? Sure. But everybody sitting around eating
traditional foods? No. You have to have priorities. There's only so
much memma you can eat even if your wife had to make it from scratch
because nobody eats that stuff in the US.

Anyway, rc6 is actually larger than I would have liked, which made me
go back and look at history, and for some reason that's not all that
unusual. We recently had similar rc6 bumps in both 4.18 and 5.0.

So I'm not going to worry about it. I think it's just random timing of
pull requests, and almost certainly at least partly due to the
networking pull request in here (with just over a third of the changes
being networking-related, either in drivers or core networking).

Aside from the networking stuff, we've got the usual other driver
updates (nvdimm, iio, gpu stands out), arch updates (mainly x86 - kvm
fixes stand out), and tooling (selftests and perf).

And then we have various random collection of other changes: some core
mm, some filesystem fixes, scheduler and tracing stuff.

But despite rc6 being a bit larger than I'd hope for, all of it is
pretty small, and I don't think there's anything really worrisome in
here. In fact, a lot of it is truly trivial stuff, some of it just
spelling fixes and the like.

Have a gander at the appended shortlog for details if you care, but
more importantly, give it a whirl and kick the tires...

   Linus

---

Aditya Pakki (1):
  libnvdimm/btt: Fix a kmemdup failure check

Adrian Hunter (1):
  perf scripts python: export-to-sqlite.py: Fix use of parent_id
in calls_view

Alex Deucher (1):
  drm/amdgpu/gmc9: fix VM_L2_CNTL3 programming

Alexander Shishkin (2):
  perf/core: Fix the address filtering fix
  perf/ring_buffer: Fix AUX record suppression

Alexander Wetzel (1):
  mac80211: Honor SW_CRYPTO_CONTROL for unicast keys in AP VLAN mode

Alexey Dobriyan (2):
  proc: fix map_files test on F29
  proc: fixup proc-pid-vm test

Andi Kleen (2):
  afs: Avoid section confusion in CM_NAME
  x86/cpu/bugs: Use __initconst for 'const' init data

Andrea Arcangeli (1):
  coredump: fix race condition between
mmget_not_zero()/get_task_mm() and core dumping

Andrei Otcheretianski (1):
  mac80211: Increase MAX_MSG_LEN

Andrew Morton (1):
  locking/atomics: Don't assume that scripts are executable

Andy Duan (1):
  net: fec: manage ahb clock in runtime pm

Aneesh Kumar K.V (1):
  fs/dax: Deposit pagetable even when installing zero page

Anson Huang (1):
  Input: snvs_pwrkey - initialize necessary driver data before enabling IRQ

Anup Patel (2):
  RISC-V: Add separate defconfig for 32bit systems
  RISC-V: Fix Maximum Physical Memory 2GiB option for 64bit systems

Ard Biesheuvel (1):
  net/core: work around section mismatch warning for ptp_classifier

Arnaldo Carvalho de Melo (1):
  tools include uapi: Sync sound/asound.h copy

Arnd Bergmann (7):
  iio: pms7003: select IIO_TRIGGERED_BUFFER
  extcon: ptn5150: fix COMPILE_TEST dependencies
  gpu: host1x: Program stream ID to bypass without SMMU
  clocksource/drivers/npcm: select TIMER_OF
  afs: avoid deprecated get_seconds()
  socket: fix compat SO_RCVTIMEO_NEW/SO_SNDTIMEO_NEW
  mm/kmemleak.c: fix unused-function warning

Aurelien Aptel (1):
  CIFS: keep FileInfo handle live during oplock break

Baoquan He (1):
  x86/mm/KASLR: Fix the size of the direct mapping section

Bart Van Assche (1):
  locking/lockdep: Make lockdep_unregister_key() honor 'debug_locks' again

Ben Gardon (1):
  kvm: mmu: Fix overflow on kvm mmu page limit calculation

Bo YU (1):
  misc: fastrpc: add checked value for dma_set_mask

Boris Brezillon (1):
  MAINTAINERS: Fix the I3C entry

Chang-An Chen (1):
  timers/sched_clock: Prevent generic sched_clock wrap caused by
tick_freeze()

Christian Brauner (1):
  signal: use fdget() since we don't allow O_PATH

Christian Gromm (1):
  staging: most: core: use device description as name

Christian König (3):
  drm/ttm: fix out-of-bounds read in ttm_put_pages() v2
  drm/ttm: fix start page for huge page check in ttm_put_pages()
  drm/ttm: fix incrementing the page pointer for huge pages

Christoph Hellwig (1):
  scsi: aic7xxx: fix EISA support

Claudiu Manoil (2):
  ocelot: Don't sleep in atomic context (irqs_disabled())
  ocelot: Clean up stats update deferred work

Colin Ian King (5):
  vxge: fix return of a free'd memblock on a failed dma mapping
  qede: fix write to free'd pointer error and double free of ptp
  bnx2x: fix spelling mistake "dicline" -> "decline"
  x86/Kconfig: Fix spelling mistake "effectivness" -> "effectiveness"
  qed: fix spelling mistake "faspath" -> "fastpath"

Corey Minyard (2):
  ipmi: Fix failure on SMBIOS specified devices

Re: [GIT PULL] locking fixes

2019-04-21 Thread Ingo Molnar



* Linus Torvalds  wrote:

> On Sat, Apr 20, 2019 at 12:30 AM Ingo Molnar  wrote:
> >
> > A lockdep warning fix and a script execution fix when atomics are
> > generated.
> 
> Hmm. I've pulled this, but looking at it, I think it's worth noting 
> something...
> 
> > diff --git a/scripts/atomic/gen-atomics.sh b/scripts/atomic/gen-atomics.sh
> > index 27400b0cd732..000dc6437893 100644
> > --- a/scripts/atomic/gen-atomics.sh
> > +++ b/scripts/atomic/gen-atomics.sh
> > -   ${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header}
> > +   /bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} > 
> > ${LINUXDIR}/include/${header}
> 
> /bin/sh ?
> 
> Yes, that's what the hash-bang line says in the scripts themselves,
> and thus what we used to do with the whole direct execution thing, so
> it's clearly not _wrong_, but every single time when we manually do
> the "run with shell" normally, we use $(CONFIG_SHELL)".
> 
> So I get the feeling that we should likely do that here too.
> 
> Of course, the gen-atomics script is (now) outside the normal build,
> so maybe people just go "this is special, doesn't go through the
> normal build process anyway, and thus might as well not follow the
> common rules".

Yeah, agreed that this is all a bit weird. The status quo right now is:

 - scripts/atomic/gen-atomics.sh is a completely standalone, external 
   script which isn't even tied into any Makefile mechanism to build the 
   kernel.

 - To generate the headers one has to explicitly call 
   scripts/atomic/gen-atomics.sh, and it's not even executable, so the 
   incantation is even more weird:

 $ . scripts/atomic/gen-atomics.sh

So I agree that the UI of all this should be improved, I suspect we 
should do the following improvements:

   - make gen-atomics.sh executable

   - add a "make headers_gen_atomics" target to the main Makefile

   - call gen-atomics.sh via the build system and thus have access to 
 $CONFIG_SHELL and such and don't have assumptions about the shell 
 environment.

Arguably /bin/sh tends to exist during the build, everywhere. What 
usually results in the use of CONFIG_SHELL isn't /bin/sh per se but 
specific shell variant assumptions such as /bin/bash and the resulting 
occasional Bashism in the scripts - there are systems with non-bash 
shells by default and so.

Thanks,

Ingo

Re: [PATCH 1/2] X86/kdump: move crashkernel=X to reserve under 4G by default

2019-04-21 Thread Ingo Molnar



* Dave Young  wrote:

> The kdump crashkernel low reservation is limited to under 896M even for
> X86_64. This obscure and miserable limitation exists for old kexec-tools
> compatibility, but the reason is not documented anywhere.
> 
> Some more tests/investigations about the background:
> a) Previously old kexec-tools can only load purgatory to memory under 2G,
>Eric remove that limitation in 2012 in kexec-tools:
>Commit b4f9f8599679 ("kexec x86_64: Make purgatory relocatable anywhere
>in the 64bit address space.")
> 
> b) back in 2013 Yinghai removed all the limitations in new kexec-tools,
>bzImage64 can be loaded to anywhere.
>Commit 82c3dd2280d2 ("kexec, x86_64: Load bzImage64 above 4G")
> 
> c) test results with old kexec-tools with old and latest kernels.
>   1. old kexec-tools can not build with modern toolchain anymore,
>  I built it in a RHEL6 vm
>   2. 2.0.0 kexec-tools does not work with latest kernel even with
>  memory under 896M and give an error:
>  "ELF core (kcore) parse failed", it needs below kexec-tools fix 
>  Commit ed15ba1b9977 ("build_mem_phdrs(): check if p_paddr is invalid")
>   3. even with patched kexec-tools which fixes 2),  it still needs some
>  other fixes to work correctly for kaslr enabled kernels.
> 
> So the situation is:
> * old kexec-tools is already broken with latest kernels
> * we can not keep this limitations forever just for compatibility of very
>   old kexec-tools.
> * If one must use old tools then he/she can choose crashkernel=X@Y
> * people have reported bugs crashkernel=384M failed because kaslr makes
>   the 0-896M space sparse, 
> * crashkernel can reserve in low or high area, it is natural to understand 
>   low as memory under 4G
> 
> Hence drop the 896M limitation, and change crashkernel low reservation to
> reserve under 4G by default.
> 
> Signed-off-by: Dave Young 
> ---
>  arch/x86/kernel/setup.c |   10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> --- linux-x86.orig/arch/x86/kernel/setup.c
> +++ linux-x86/arch/x86/kernel/setup.c
> @@ -71,6 +71,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -448,18 +449,17 @@ static void __init memblock_x86_reserve_
>  #ifdef CONFIG_KEXEC_CORE
>  
>  /* 16M alignment for crash kernel regions */
> -#define CRASH_ALIGN  (16 << 20)
> +#define CRASH_ALIGN  SZ_16M
>  
>  /*
>   * Keep the crash kernel below this limit.  On 32 bits earlier kernels
>   * would limit the kernel to the low 512 MiB due to mapping restrictions.
> - * On 64bit, old kexec-tools need to under 896MiB.
>   */
>  #ifdef CONFIG_X86_32
> -# define CRASH_ADDR_LOW_MAX  (512 << 20)
> -# define CRASH_ADDR_HIGH_MAX (512 << 20)
> +# define CRASH_ADDR_LOW_MAX  SZ_512M
> +# define CRASH_ADDR_HIGH_MAX SZ_512M
>  #else
> -# define CRASH_ADDR_LOW_MAX  (896UL << 20)
> +# define CRASH_ADDR_LOW_MAX  SZ_4G
>  # define CRASH_ADDR_HIGH_MAX MAXMEM
>  #endif

Reviewed-by: Ingo Molnar 

Thanks,

Ingo

Re: [PATCH] Documentation: decnet: remove reference to CONFIG_DECNET_ROUTE_FWMARK

2019-04-21 Thread David Miller

From: Corentin Labbe 
Date: Sat, 20 Apr 2019 16:43:01 +

> CONFIG_DECNET_ROUTE_FWMARK was removed in commit 47dcf0cb1005 ("[NET]: 
> Rethink mark field in struct flowi")
> Since nothing replace it (and nothindg need to replace it, simply remove
> it from documentation.
> 
> Signed-off-by: Corentin Labbe 

Applied.

Re: [PATCH 2/2] X86/kdump: fall back to reserve high crashkernel memory

2019-04-21 Thread Ingo Molnar



* Dave Young  wrote:

> crashkernel=xM tries to reserve crashkernel memory under 4G, which
> is enough for usual cases.  But this could fail sometimes, for example
> one tries to reserve a big chunk like 2G, it is possible to fail.
> 
> So let the crashkernel=xM just fall back to use high memory in case it
> fails to find a suitable low range.  Do not set the ,high as default
> because it allocs extra low memory for DMA buffers and swiotlb, this is
> not always necessary for all machines. Typically like crashkernel=128M
> usually work with low reservation under 4G, so still keep <4G as default.
> 
> Signed-off-by: Dave Young 
> ---
>  Documentation/admin-guide/kernel-parameters.txt |7 +--
>  arch/x86/kernel/setup.c |   22 ++
>  2 files changed, 19 insertions(+), 10 deletions(-)
> 
> --- linux-x86.orig/arch/x86/kernel/setup.c
> +++ linux-x86/arch/x86/kernel/setup.c
> @@ -541,21 +541,27 @@ static void __init reserve_crashkernel(v
>   }
>  
>   /* 0 means: find the address automatically */
> - if (crash_base <= 0) {
> + if (!crash_base) {
>   /*
>* Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
> -  * as old kexec-tools loads bzImage below that, unless
> -  * "crashkernel=size[KMG],high" is specified.
> +  * as crashkernel=x,high allocs memory over 4G, also allocs

s/allocs
 /allocates

> +  * 256M extra low memory for DMA buffers and swiotlb.
> +  * but the extra memory is not required for all machines.
> +  * So prefer low memory first, and fallback to high memory

s/fallback
 /fall back

> +  * unless "crashkernel=size[KMG],high" is specified.
>*/
> - crash_base = memblock_find_in_range(CRASH_ALIGN,
> - high ? CRASH_ADDR_HIGH_MAX
> -  : CRASH_ADDR_LOW_MAX,
> - crash_size, CRASH_ALIGN);
> + if (!high)
> + crash_base = memblock_find_in_range(CRASH_ALIGN,
> + CRASH_ADDR_LOW_MAX,
> + crash_size, CRASH_ALIGN);
> + if (!crash_base)
> + crash_base = memblock_find_in_range(CRASH_ALIGN,
> + CRASH_ADDR_HIGH_MAX,
> + crash_size, CRASH_ALIGN);
>   if (!crash_base) {
>   pr_info("crashkernel reservation failed - No suitable 
> area found.\n");
>   return;
>   }
> -
>   } else {
>   unsigned long long start;
>  
> --- linux-x86.orig/Documentation/admin-guide/kernel-parameters.txt
> +++ linux-x86/Documentation/admin-guide/kernel-parameters.txt
> @@ -704,8 +704,11 @@
>   upon panic. This parameter reserves the physical
>   memory region [offset, offset + size] for that kernel
>   image. If '@offset' is omitted, then a suitable offset
> - is selected automatically. Check
> - Documentation/kdump/kdump.txt for further details.
> + is selected automatically.
> + [KNL, x86_64] select a region under 4G first, and
> + fallback to reserve region above 4G in case without

s/fallback
 /fall back

> + '@offset'.
> + See Documentation/kdump/kdump.txt for further details.
>  
>   crashkernel=range1:size1[,range2:size2,...][@offset]
>   [KNL] Same as above, but depends on the memory

With the nits fixed:

Reviewed-by: Ingo Molnar 

Thanks,

Ingo

Re: [PATCH] x86_64: uninline TASK_SIZE

2019-04-21 Thread Ingo Molnar



* Alexey Dobriyan  wrote:

> TASK_SIZE macro is quite deceptive: it looks like a constant but in fact
> compiles to 50+ bytes.
> 
> Space savings on x86_64 defconfig:
> 
> add/remove: 1/0 grow/shrink: 3/24 up/down: 77/-2247 (-2170)
> Function old new   delta
> _task_size -  52 +52
> mpol_shared_policy_init  344 363 +19
> shmem_get_unmapped_area   92  97  +5
> __rseq_handle_notify_resume.cold  34  35  +1
> copy_from_user_nmi   123 113 -10
> mmap_address_hint_valid   92  56 -36
> arch_get_unmapped_area_topdown   471 435 -36
> tlb_gather_mmu   164 126 -38
> hugetlb_get_unmapped_area774 736 -38
> __create_xol_area497 458 -39
> arch_tlb_gather_mmu  160 120 -40
> setup_new_exec   380 336 -44
> __x64_sys_mlockall   378 333 -45
> __ia32_sys_mlockall  378 333 -45
> tlb_flush_mmu235 189 -46
> unmap_page_range20982048 -50
> copy_mount_options   518 465 -53
> __get_user_pages17371675 -62
> get_unmapped_area270 204 -66
> perf_prepare_sample 11761098 -78
> perf_callchain_user  549 469 -80
> mremap_to.isra   545 457 -88
> arch_tlb_finish_mmu  394 305 -89
> __do_munmap 1039 927-112
> elf_map  527 409-118
> prctl_set_mm15091335-174
> __rseq_handle_notify_resume 1116 906-210
> load_elf_binary11761   1-650
> Total: Before=14121337, After=14119167, chg -0.02%
> 
> Signed-off-by: Alexey Dobriyan 
> ---
> 
>  arch/x86/include/asm/processor.h |4 ++--
>  arch/x86/kernel/Makefile |1 +
>  arch/x86/kernel/task_size_64.c   |9 +
>  3 files changed, 12 insertions(+), 2 deletions(-)
> 
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -887,8 +887,8 @@ static inline void spin_lock_prefetch(const void *x)
>  
>  #define TASK_SIZE_LOW(test_thread_flag(TIF_ADDR32) ? \
>   IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
> -#define TASK_SIZE(test_thread_flag(TIF_ADDR32) ? \
> - IA32_PAGE_OFFSET : TASK_SIZE_MAX)
> +unsigned long _task_size(void);
> +#define TASK_SIZE_task_size()
>  #define TASK_SIZE_OF(child)  ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
>   IA32_PAGE_OFFSET : TASK_SIZE_MAX)
>  
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -46,6 +46,7 @@ CFLAGS_irq.o := -I$(src)/../include/asm/trace
>  
>  obj-y:= process_$(BITS).o signal.o
>  obj-$(CONFIG_COMPAT) += signal_compat.o
> +obj-$(CONFIG_X86_64) += task_size_64.o
>  obj-y+= traps.o idt.o irq.o irq_$(BITS).o 
> dumpstack_$(BITS).o
>  obj-y+= time.o ioport.o dumpstack.o nmi.o
>  obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o
> new file mode 100644
> --- /dev/null
> +++ b/arch/x86/kernel/task_size_64.c
> @@ -0,0 +1,9 @@
> +#include 
> +#include 
> +#include 
> +
> +unsigned long _task_size(void)
> +{
> + return test_thread_flag(TIF_ADDR32) ? IA32_PAGE_OFFSET : TASK_SIZE_MAX;
> +}
> +EXPORT_SYMBOL(_task_size);

Good idea - but instead of adding yet another compilation unit, why not 
stick _task_size() into arch/x86/kernel/process_64.c, which is the 
canonical place for process management related arch functions?

Thanks,

Ingo

Re: [tip:x86/mm] x86/fault: Decode and print #PF oops in human readable form

2019-04-21 Thread Borislav Petkov

On Fri, Apr 19, 2019 at 11:35:51AM -0700, tip-bot for Sean Christopherson wrote:
> BUG: kernel NULL pointer dereference, address = 0008
> #PF: supervisor-privileged instruction fetch from kernel code
> #PF: error_code(0x0010) - not-present page
> 
> BUG: unable to handle page fault for address = beef
> #PF: supervisor-privileged instruction fetch from kernel code
> #PF: error_code(0x0010) - not-present page
> 
> BUG: unable to handle page fault for address = c923
> #PF: supervisor-privileged write access from kernel code
> #PF: error_code(0x000b) - reserved bit violation

Writing those in human-readable form is nice. May I suggest making those
messages more succinct, though - we'll be staring at them for years,
after all.

---
From: Borislav Petkov 
Date: Sun, 21 Apr 2019 20:24:08 +0200
Subject: [PATCH] x86/fault: Make fault messages more succinct

So we are going to be staring at those in the next years, let's make
them more succinct. In particular:

 - change "address = " to "address: "
 - "-privileged" reads funny. It should be simply "kernel" or "user"
 - "from kernel code" reads funny too. "kernel mode" or "user mode" is
   more natural.

An actual example says more than 1000 words, of course:

  [0.248370] BUG: kernel NULL pointer dereference, address: 05b8
  [0.249120] #PF: supervisor write access in kernel mode
  [0.249717] #PF: error_code(0x0002) - not-present page

Signed-off-by: Borislav Petkov 
---
 arch/x86/mm/fault.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 74c9204c5751..a0df19b0897d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -630,13 +630,13 @@ show_fault_oops(struct pt_regs *regs, unsigned long 
error_code, unsigned long ad
}
 
if (address < PAGE_SIZE && !user_mode(regs))
-   pr_alert("BUG: kernel NULL pointer dereference, address = 
%px\n",
+   pr_alert("BUG: kernel NULL pointer dereference, address: %px\n",
(void *)address);
else
-   pr_alert("BUG: unable to handle page fault for address = %px\n",
+   pr_alert("BUG: unable to handle page fault for address: %px\n",
(void *)address);
 
-   pr_alert("#PF: %s-privileged %s from %s code\n",
+   pr_alert("#PF: %s %s in %s mode\n",
 (error_code & X86_PF_USER)  ? "user" : "supervisor",
 (error_code & X86_PF_INSTR) ? "instruction fetch" :
 (error_code & X86_PF_WRITE) ? "write access" :
-- 
2.21.0

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

[tip:x86/mm] x86/fault: Make fault messages more succinct

2019-04-21 Thread tip-bot for Borislav Petkov

Commit-ID:  ea2f8d60603efbd1cb4e193a593945a2fe24d264
Gitweb: https://git.kernel.org/tip/ea2f8d60603efbd1cb4e193a593945a2fe24d264
Author: Borislav Petkov 
AuthorDate: Sun, 21 Apr 2019 20:35:24 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 21 Apr 2019 20:48:51 +0200

x86/fault: Make fault messages more succinct

So we are going to be staring at those in the next years, let's make
them more succinct. In particular:

 - change "address = " to "address: "

 - "-privileged" reads funny. It should be simply "kernel" or "user"

 - "from kernel code" reads funny too. "kernel mode" or "user mode" is
   more natural.

An actual example says more than 1000 words, of course:

  [0.248370] BUG: kernel NULL pointer dereference, address: 05b8
  [0.249120] #PF: supervisor write access in kernel mode
  [0.249717] #PF: error_code(0x0002) - not-present page

Signed-off-by: Borislav Petkov 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: dave.han...@linux.intel.com
Cc: l...@kernel.org
Cc: r...@surriel.com
Cc: sean.j.christopher...@intel.com
Cc: yu-cheng...@intel.com
Link: http://lkml.kernel.org/r/20190421183524.gc6...@zn.tnic
Signed-off-by: Ingo Molnar 
---
 arch/x86/mm/fault.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 74c9204c5751..a0df19b0897d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -630,13 +630,13 @@ show_fault_oops(struct pt_regs *regs, unsigned long 
error_code, unsigned long ad
}
 
if (address < PAGE_SIZE && !user_mode(regs))
-   pr_alert("BUG: kernel NULL pointer dereference, address = 
%px\n",
+   pr_alert("BUG: kernel NULL pointer dereference, address: %px\n",
(void *)address);
else
-   pr_alert("BUG: unable to handle page fault for address = %px\n",
+   pr_alert("BUG: unable to handle page fault for address: %px\n",
(void *)address);
 
-   pr_alert("#PF: %s-privileged %s from %s code\n",
+   pr_alert("#PF: %s %s in %s mode\n",
 (error_code & X86_PF_USER)  ? "user" : "supervisor",
 (error_code & X86_PF_INSTR) ? "instruction fetch" :
 (error_code & X86_PF_WRITE) ? "write access" :

[PATCH v3 1/2] power: reset: nvmem-reboot-mode: use NVMEM as reboot mode write interface

2019-04-21 Thread Han Nandor

Add a new reboot mode write interface that is using an NVMEM cell
to store the reboot mode magic.

Signed-off-by: Nandor Han 
---
 drivers/power/reset/Kconfig |  9 +++
 drivers/power/reset/Makefile|  1 +
 drivers/power/reset/nvmem-reboot-mode.c | 76 +
 3 files changed, 86 insertions(+)
 create mode 100644 drivers/power/reset/nvmem-reboot-mode.c

diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
index 6533aa560aa1..bb4a4e854f96 100644
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -245,5 +245,14 @@ config POWER_RESET_SC27XX
  PMICs includes the SC2720, SC2721, SC2723, SC2730
  and SC2731 chips.
 
+config NVMEM_REBOOT_MODE
+   tristate "Generic NVMEM reboot mode driver"
+   select REBOOT_MODE
+   help
+ Say y here will enable reboot mode driver. This will
+ get reboot mode arguments and store it in a NVMEM cell,
+ then the bootloader can read it and take different
+ action according to the mode.
+
 endif
 
diff --git a/drivers/power/reset/Makefile b/drivers/power/reset/Makefile
index 0aebee954ac1..85da3198e4e0 100644
--- a/drivers/power/reset/Makefile
+++ b/drivers/power/reset/Makefile
@@ -29,3 +29,4 @@ obj-$(CONFIG_POWER_RESET_ZX) += zx-reboot.o
 obj-$(CONFIG_REBOOT_MODE) += reboot-mode.o
 obj-$(CONFIG_SYSCON_REBOOT_MODE) += syscon-reboot-mode.o
 obj-$(CONFIG_POWER_RESET_SC27XX) += sc27xx-poweroff.o
+obj-$(CONFIG_NVMEM_REBOOT_MODE) += nvmem-reboot-mode.o
diff --git a/drivers/power/reset/nvmem-reboot-mode.c 
b/drivers/power/reset/nvmem-reboot-mode.c
new file mode 100644
index ..e229308d43e2
--- /dev/null
+++ b/drivers/power/reset/nvmem-reboot-mode.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) Vaisala Oyj. All rights reserved.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct nvmem_reboot_mode {
+   struct reboot_mode_driver reboot;
+   struct nvmem_cell *cell;
+};
+
+static int nvmem_reboot_mode_write(struct reboot_mode_driver *reboot,
+   unsigned int magic)
+{
+   int ret;
+   struct nvmem_reboot_mode *nvmem_rbm;
+
+   nvmem_rbm = container_of(reboot, struct nvmem_reboot_mode, reboot);
+
+   ret = nvmem_cell_write(nvmem_rbm->cell, , sizeof(magic));
+   if (ret < 0)
+   dev_err(reboot->dev, "update reboot mode bits failed\n");
+
+   return ret;
+}
+
+static int nvmem_reboot_mode_probe(struct platform_device *pdev)
+{
+   int ret;
+   struct nvmem_reboot_mode *nvmem_rbm;
+
+   nvmem_rbm = devm_kzalloc(>dev, sizeof(*nvmem_rbm), GFP_KERNEL);
+   if (!nvmem_rbm)
+   return -ENOMEM;
+
+   nvmem_rbm->reboot.dev = >dev;
+   nvmem_rbm->reboot.write = nvmem_reboot_mode_write;
+
+   nvmem_rbm->cell = devm_nvmem_cell_get(>dev, "reboot-mode");
+   if (IS_ERR(nvmem_rbm->cell)) {
+   dev_err(>dev, "failed to get the nvmem cell 
reboot-mode\n");
+   return PTR_ERR(nvmem_rbm->cell);
+   }
+
+   ret = devm_reboot_mode_register(>dev, _rbm->reboot);
+   if (ret)
+   dev_err(>dev, "can't register reboot mode\n");
+
+   return ret;
+}
+
+static const struct of_device_id nvmem_reboot_mode_of_match[] = {
+   { .compatible = "nvmem-reboot-mode" },
+   {}
+};
+MODULE_DEVICE_TABLE(of, nvmem_reboot_mode_of_match);
+
+static struct platform_driver nvmem_reboot_mode_driver = {
+   .probe = nvmem_reboot_mode_probe,
+   .driver = {
+   .name = "nvmem-reboot-mode",
+   .of_match_table = nvmem_reboot_mode_of_match,
+   },
+};
+module_platform_driver(nvmem_reboot_mode_driver);
+
+MODULE_AUTHOR("Nandor Han ");
+MODULE_DESCRIPTION("NVMEM reboot mode driver");
+MODULE_LICENSE("GPL");
-- 
2.17.2

[PATCH v3 0/2] Use NVMEM as reboot-mode write interface

2019-04-21 Thread Han Nandor

Description
---
Extend the reboot mode driver to use a NVMEM cell as writing interface.

Testing
---
The testing is done by configuring DT from a custom board.
The NVMEM cell is configured in an RTC non-volatile memory.
Kernel: 4.14.60 (the patchset was rebased on kernel master)

DT configurations:
`
...
reboot-mode-nvmem@0 {
compatible = "simple-mfd";
reboot-mode {
compatible = "nvmem-reboot-mode";
nvmem-cells = <_mode>;
nvmem-cell-names = "reboot-mode";

mode-test   = <0x21969147>;
};
};
...
reboot_mode: nvmem_reboot_mode@0 {
reg = <0x00 0x4>;
};
...
`

1. Reboot the system using the command `reboot test`

2. Verify that kernel logs show that reboot was done in mode `test`:
PASS
`[  413.957172] reboot: Restarting system with command 'test' `

3. Stop in U-Boot and verify that mode `test` magic value is present
in RTCs non-volatile memory: PASS

Kernel: 5.1.0-rc3

1. Configure `arch/arm/configs/imx_v6_v7_defconfig` to contain 
`CONFIG_NVMEM_REBOOT_MODE=y`
2. Verify that Kernel compiles successful: PASS
`
make ARCH=arm CROSS_COMPILE=arm-linux-gnu- imx_v6_v7_defconfig zImage
...
CC  drivers/power/reset/nvmem-reboot-mode.o
...
Kernel: arch/arm/boot/zImage is ready
`
Changes since v1:
-
 - split the documentation on a separate patch
 - add a missing header

Changes since v2:

 - change the module license to GPL since GPL v2 is deprecated

Nandor Han (2):
  power: reset: nvmem-reboot-mode: use NVMEM as reboot mode write
interface
  dt-bindings: power: reset: add document for NVMEM based reboot-mode

 .../power/reset/nvmem-reboot-mode.txt | 32 
 drivers/power/reset/Kconfig   |  9 +++
 drivers/power/reset/Makefile  |  1 +
 drivers/power/reset/nvmem-reboot-mode.c   | 76 +++
 4 files changed, 118 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.txt
 create mode 100644 drivers/power/reset/nvmem-reboot-mode.c

-- 
2.17.2

[PATCH v3 2/2] dt-bindings: power: reset: add document for NVMEM based reboot-mode

2019-04-21 Thread Han Nandor

Add the device tree bindings document for the NVMEM based reboot-mode
driver.

Signed-off-by: Nandor Han 
---
 .../power/reset/nvmem-reboot-mode.txt | 32 +++
 1 file changed, 32 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.txt

diff --git 
a/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.txt 
b/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.txt
new file mode 100644
index ..2e1b86c31cb3
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.txt
@@ -0,0 +1,32 @@
+NVMEM reboot mode driver
+
+This driver gets reboot mode magic value from reboot-mode driver
+and stores it in a NVMEM cell named "reboot-mode". Then the bootloader
+can read it and take different action according to the magic
+value stored.
+
+This DT node should be represented as a sub-node of a "simple-mfd"
+node.
+
+Required properties:
+- compatible: should be "nvmem-reboot-mode".
+- nvmem-cells: A phandle to the reboot mode provided by a nvmem device.
+- nvmem-cell-names: Should be "reboot-mode".
+
+The rest of the properties should follow the generic reboot-mode description
+found in reboot-mode.txt
+
+Example:
+   reboot-mode-nvmem@0 {
+   compatible = "simple-mfd";
+   reboot-mode {
+   compatible = "nvmem-reboot-mode";
+   nvmem-cells = <_mode>;
+   nvmem-cell-names = "reboot-mode";
+
+   mode-normal = <0x5501>;
+   mode-bootloader = <0x5500>;
+   mode-recovery   = <0x5502>;
+   mode-test   = <0x5503>;
+   };
+   };
-- 
2.17.2

Re: [PATCH v2 0/2] Intel Cherry Trail Whiskey Cove LEDs support

2019-04-21 Thread Hans de Goede


Hi Yauhen,

On 12-02-19 21:58, Yauhen Kharuzhy wrote:

This patch series introduces new driver for controlling LEDs connected
to Intel Cherry Trail Whiskey Cove PMIC (general-purpose LED and charger
status led). Only simple 'always on' and blinking modes are supported
for now, no breathing.

Driver was tested only with Lenovo Yoga Book notebook, and I don't have
any documentation for the PMIC, so proposals and testing are welcome.

v2:
   - Fix comments and code style
   - Add mutex to protect led state
   - Add defaults triggers
   - Fix module license declaration

Yauhen Kharuzhy (2):
   leds: Add Intel Cherry Trail Whiskey Cove PMIC LEDs
   mfd: Add leds MFD cell for intel_soc_pmic_chtwc


I had a discussion with Jacek Anaszewski about this in another thread and
I believe we have come up with a solution for this which should work
nicely and should allow us to move forward with your driver (after
it is reworked to match the solution. So the solution we've come up with is:

1) After thinking a bit more about the primary use-case for this, I've come
to the conclusion that putting LED1 / the charging LED in software-controlled
mode is also the correct thing to do on the GPD win / pocket. The reason for
this is that ideally the LED would glow while charging and be simply solid
on when the battery is full, the hw control does not allow this, so the GPD
win/pocket can benefit from sw-control too.

2) To allow the desired behavior we need to define a new
"-charging-glow-full-solid" trigger in
drivers/power/supply/power_supply_leds.c; and this must be the default
trigger for the Intel Cherry Trail Whiskey Cove LED driver so that
everything will just work. Also we must restore the original hw control
setting on reboot/shut-down so that this is used on the GPD win/pocket when
Linux is not running.

3) To be able to actually implement this new trigger we first need 2 things
in the kernel internal LED APIs:

3a) An API for triggers to put the LED in glowing mode, we've come up
with the following prototype for this:

void led_trigger_glow(struct led_trigger *trigger, unsigned long *cylce_time);

Where cycle_time is the number of milliseconds for a full glow cycle (from off
to full-on to off again).  So if cylce_time is set to 1000 then the LED glows
at 1 Hz, 500, 2 HZ, etc. Note as with led_trigger_blink() the time passed to
led_trigger_glow is passed by reference as the LED driver may round it to
match what the hardware can do and the rounded value is returned to the caller
through the reference.

3b) 3a) in turn will require adding a new optional glow_set callback to
struct led_classdev which will then get called by led_trigger_glow if available.

We've not discussed yet what to do if led_trigger_glow gets called on
a led_classdev which does not implement the new glow_set callback, I guess
the most sensible thing to do then is to fallback to blinking with delay_on
and delay_off set to cylce_time / 2.

If you can make some time to work on this solution that would be great. Please
let me know if you've any questions about the solution outlined below.

Note that glowing is only exported as in kernel functionality, I see no
use-case for exporting this to userspace and keeping this in kernel allows
us to keep things nice and simple.

Regards,

Hans

[PATCH 2/2] uapi/habanalabs: add missing fields in bmon params

2019-04-21 Thread Oded Gabbay

This patch adds missing fields of start address 0 and 1 in the bmon
parameter structure that is received from the user in the debug IOCTL.

Without these fields, the functionality of the bmon trace is broken,
because there is no configuration of the base address of the filter of the
bus monitor.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/goya/goya_coresight.c | 16 
 include/uapi/misc/habanalabs.h|  9 ++---
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c 
b/drivers/misc/habanalabs/goya/goya_coresight.c
index 68726fb4c56a..1ac951f52d1e 100644
--- a/drivers/misc/habanalabs/goya/goya_coresight.c
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -459,10 +459,14 @@ static int goya_config_bmon(struct hl_device *hdev,
if (!input)
return -EINVAL;
 
-   WREG32(base_reg + 0x208, lower_32_bits(input->addr_range0));
-   WREG32(base_reg + 0x20C, upper_32_bits(input->addr_range0));
-   WREG32(base_reg + 0x248, lower_32_bits(input->addr_range1));
-   WREG32(base_reg + 0x24C, upper_32_bits(input->addr_range1));
+   WREG32(base_reg + 0x200, lower_32_bits(input->start_addr0));
+   WREG32(base_reg + 0x204, upper_32_bits(input->start_addr0));
+   WREG32(base_reg + 0x208, lower_32_bits(input->addr_mask0));
+   WREG32(base_reg + 0x20C, upper_32_bits(input->addr_mask0));
+   WREG32(base_reg + 0x240, lower_32_bits(input->start_addr1));
+   WREG32(base_reg + 0x244, upper_32_bits(input->start_addr1));
+   WREG32(base_reg + 0x248, lower_32_bits(input->addr_mask1));
+   WREG32(base_reg + 0x24C, upper_32_bits(input->addr_mask1));
WREG32(base_reg + 0x224, 0);
WREG32(base_reg + 0x234, 0);
WREG32(base_reg + 0x30C, input->bw_win);
@@ -482,8 +486,12 @@ static int goya_config_bmon(struct hl_device *hdev,
WREG32(base_reg + 0x100, 0x11);
WREG32(base_reg + 0x304, 0x1);
} else {
+   WREG32(base_reg + 0x200, 0);
+   WREG32(base_reg + 0x204, 0);
WREG32(base_reg + 0x208, 0x);
WREG32(base_reg + 0x20C, 0x);
+   WREG32(base_reg + 0x240, 0);
+   WREG32(base_reg + 0x244, 0);
WREG32(base_reg + 0x248, 0x);
WREG32(base_reg + 0x24C, 0x);
WREG32(base_reg + 0x224, 0x);
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 613d431da783..8ac292cf4d00 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -374,9 +374,12 @@ struct hl_debug_params_stm {
 };
 
 struct hl_debug_params_bmon {
-   /* Transaction address filter */
-   __u64 addr_range0;
-   __u64 addr_range1;
+   /* Two address ranges that the user can request to filter */
+   __u64 start_addr0;
+   __u64 addr_mask0;
+
+   __u64 start_addr1;
+   __u64 addr_mask1;
 
/* Capture window configuration */
__u32 bw_win;
-- 
2.17.1

[PATCH 1/2] habanalabs: re-factor goya_parse_cb_no_ext_queue()

2019-04-21 Thread Oded Gabbay

This patch re-factors goya_parse_cb_no_ext_queue() to make it more
readable by inverting the check inside the first if statement so the bulk
of the function won't be inside an if statement.

The patch also fixes a spelling error in the name of the function.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/goya/goya.c | 43 ++---
 1 file changed, 21 insertions(+), 22 deletions(-)

diff --git a/drivers/misc/habanalabs/goya/goya.c 
b/drivers/misc/habanalabs/goya/goya.c
index 5100dfbf3acc..ed3c4b81aff1 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -3859,36 +3859,35 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
return rc;
 }
 
-static int goya_parse_cb_no_ext_quque(struct hl_device *hdev,
+static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
struct hl_cs_parser *parser)
 {
struct asic_fixed_properties *asic_prop = >asic_prop;
struct goya_device *goya = hdev->asic_specific;
 
-   if (!(goya->hw_cap_initialized & HW_CAP_MMU)) {
-   /* For internal queue jobs, just check if cb address is valid */
-   if (hl_mem_area_inside_range(
-   (u64) (uintptr_t) parser->user_cb,
-   parser->user_cb_size,
-   asic_prop->sram_user_base_address,
-   asic_prop->sram_end_address))
-   return 0;
+   if (goya->hw_cap_initialized & HW_CAP_MMU)
+   return 0;
 
-   if (hl_mem_area_inside_range(
-   (u64) (uintptr_t) parser->user_cb,
-   parser->user_cb_size,
-   asic_prop->dram_user_base_address,
-   asic_prop->dram_end_address))
-   return 0;
+   /* For internal queue jobs, just check if CB address is valid */
+   if (hl_mem_area_inside_range(
+   (u64) (uintptr_t) parser->user_cb,
+   parser->user_cb_size,
+   asic_prop->sram_user_base_address,
+   asic_prop->sram_end_address))
+   return 0;
 
-   dev_err(hdev->dev,
-   "Internal CB address %px + 0x%x is not in SRAM nor in 
DRAM\n",
-   parser->user_cb, parser->user_cb_size);
+   if (hl_mem_area_inside_range(
+   (u64) (uintptr_t) parser->user_cb,
+   parser->user_cb_size,
+   asic_prop->dram_user_base_address,
+   asic_prop->dram_end_address))
+   return 0;
 
-   return -EFAULT;
-   }
+   dev_err(hdev->dev,
+   "Internal CB address %px + 0x%x is not in SRAM nor in DRAM\n",
+   parser->user_cb, parser->user_cb_size);
 
-   return 0;
+   return -EFAULT;
 }
 
 int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
@@ -3896,7 +3895,7 @@ int goya_cs_parser(struct hl_device *hdev, struct 
hl_cs_parser *parser)
struct goya_device *goya = hdev->asic_specific;
 
if (!parser->ext_queue)
-   return goya_parse_cb_no_ext_quque(hdev, parser);
+   return goya_parse_cb_no_ext_queue(hdev, parser);
 
if ((goya->hw_cap_initialized & HW_CAP_MMU) && parser->use_virt_addr)
return goya_parse_cb_mmu(hdev, parser);
-- 
2.17.1

Re: Adding plain accesses and detecting data races in the LKMM

2019-04-21 Thread Paul E. McKenney

On Sat, Apr 20, 2019 at 11:50:14PM +0900, Akira Yokosawa wrote:
> On Fri, 19 Apr 2019 11:06:41 -0700, Paul E. McKenney wrote:
> > On Sat, Apr 20, 2019 at 12:06:58AM +0900, Akira Yokosawa wrote:
> >> Hi Paul,
> >>
> [...]
> > 
> >>> + (1) The compiler can reorder the load from a to precede the
> >>> + atomic_dec(), (2) Because x86 smp_mb__before_atomic() is only a
> >>> + compiler barrier, the CPU can reorder the preceding store to
> >>> + obj->dead with the later load from a.
> >>> +
> >>> + This could be avoided by using READ_ONCE(), which would prevent the
> >>> + compiler from reordering due to both atomic_dec() and READ_ONCE()
> >>> + being volatile accesses, and is usually preferable for loads from
> >>> + shared variables.  However, weakly ordered CPUs would still be
> >>> + free to reorder the atomic_dec() with the load from a, so a more
> >>> + readable option is to also use smp_mb__after_atomic() as follows:
> >>
> >> The point here is not just "readability", but also the portability of the
> >> code, isn't it?
> > 
> > As Andrea noted, in this particular case, the guarantee that the
> > store to obj->dead precedes the load from x is portable.  Either the
> > smp_mb__before_atomic() or the atomic_dec() must provide the ordering.
> 
> I think I understood this. What I wanted to say was the code for x86 implied
> in the subjunctive sentence:
> 
>   obj->dead = 1;
>   smp_mb__before_atomic();
>   atomic_dec(>ref_count);
>   r1 = READ_ONCE(x);
> 
> , which was not spelled out, is not portable if we expect the ordering of
> atomic_dec() with READ_ONCE().

I now understand that you understood.  ;-)

> > However, you are right that there is some non-portability.  But this
> > non-portability involves the order of the atomic_dec() and the store to x.
> 
> Yes, you've guessed it right.

Don't worry, it won't happen again!

> > So what I did was ...
> > 
> >> Thanks, Akira
> >>
> >>> +
> >>> + WRITE_ONCE(obj->dead, 1);
> >>> + smp_mb__before_atomic();
> >>> + atomic_dec(>ref_count);
> >>> + smp_mb__after_atomic();
> >>> + r1 = READ_ONCE(a);
> >>> +
> >>> + This orders all three accesses against each other, and also makes
> >>> + the intent quite clear.
> > 
> > ... change the above paragraph to read as follows:
> > 
> >  In addition, the example without the smp_mb__after_atomic() does
> >  not necessarily order the atomic_dec() with the load from x.
> >  In contrast, the example with both smp_mb__before_atomic() and
> >  smp_mb__after_atomic() orders all three accesses against each other,
> >  and also makes the intent quite clear.
> > 
> > Does that help?
> 
> This looks a little bit redundant to me. The original one is clear
> enough.
> 
> How about editing the leading sentence above:
> 
> >>> + shared variables.  However, weakly ordered CPUs would still be
> >>> + free to reorder the atomic_dec() with the load from a, so a more
> >>> + readable option is to also use smp_mb__after_atomic() as follows:
> 
> to read as follows?
> 
>  shared variables.  However, weakly ordered CPUs would still be
>  free to reorder the atomic_dec() with the load from x, so a
>  portable and more readable option is to also use
>  smp_mb__after_atomic() as follows:

Adding "portable and", correct?  Makes sense, so I applied this change.

> Obviously, the interesting discussion going on in another thread will
> surely affect this patch.

Quite possibly!  ;-)


Thanx, Paul

> >>>   See Documentation/atomic_{t,bitops}.txt for more information.
> >>>  
> >>> diff --git a/tools/memory-model/linux-kernel.cat 
> >>> b/tools/memory-model/linux-kernel.cat
> >>> index 8dcb37835b61..b6866f93abb8 100644
> >>> --- a/tools/memory-model/linux-kernel.cat
> >>> +++ b/tools/memory-model/linux-kernel.cat
> >>> @@ -28,8 +28,8 @@ include "lock.cat"
> >>>  let rmb = [R \ Noreturn] ; fencerel(Rmb) ; [R \ Noreturn]
> >>>  let wmb = [W] ; fencerel(Wmb) ; [W]
> >>>  let mb = ([M] ; fencerel(Mb) ; [M]) |
> >>> - ([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) |
> >>> - ([M] ; po? ; [RMW] ; fencerel(After-atomic) ; [M]) |
> >>> + ([M] ; fencerel(Before-atomic) ; [RMW]) |
> >>> + ([RMW] ; fencerel(After-atomic) ; [M]) |
> >>>   ([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) |
> >>>   ([M] ; po ; [UL] ; (co | po) ; [LKW] ;
> >>>   fencerel(After-unlock-lock) ; [M])
> >>>
> >>
> > 
>

Re: [PATCH] x86_64: uninline TASK_SIZE

2019-04-21 Thread hpa

On April 21, 2019 11:28:42 AM PDT, Ingo Molnar  wrote:
>
>* Alexey Dobriyan  wrote:
>
>> TASK_SIZE macro is quite deceptive: it looks like a constant but in
>fact
>> compiles to 50+ bytes.
>> 
>> Space savings on x86_64 defconfig:
>> 
>> add/remove: 1/0 grow/shrink: 3/24 up/down: 77/-2247 (-2170)
>> Function old new   delta
>> _task_size -  52 +52
>> mpol_shared_policy_init  344 363 +19
>> shmem_get_unmapped_area   92  97  +5
>> __rseq_handle_notify_resume.cold  34  35  +1
>> copy_from_user_nmi   123 113 -10
>> mmap_address_hint_valid   92  56 -36
>> arch_get_unmapped_area_topdown   471 435 -36
>> tlb_gather_mmu   164 126 -38
>> hugetlb_get_unmapped_area774 736 -38
>> __create_xol_area497 458 -39
>> arch_tlb_gather_mmu  160 120 -40
>> setup_new_exec   380 336 -44
>> __x64_sys_mlockall   378 333 -45
>> __ia32_sys_mlockall  378 333 -45
>> tlb_flush_mmu235 189 -46
>> unmap_page_range20982048 -50
>> copy_mount_options   518 465 -53
>> __get_user_pages17371675 -62
>> get_unmapped_area270 204 -66
>> perf_prepare_sample 11761098 -78
>> perf_callchain_user  549 469 -80
>> mremap_to.isra   545 457 -88
>> arch_tlb_finish_mmu  394 305 -89
>> __do_munmap 1039 927-112
>> elf_map  527 409-118
>> prctl_set_mm15091335-174
>> __rseq_handle_notify_resume 1116 906-210
>> load_elf_binary11761   1-650
>> Total: Before=14121337, After=14119167, chg -0.02%
>> 
>> Signed-off-by: Alexey Dobriyan 
>> ---
>> 
>>  arch/x86/include/asm/processor.h |4 ++--
>>  arch/x86/kernel/Makefile |1 +
>>  arch/x86/kernel/task_size_64.c   |9 +
>>  3 files changed, 12 insertions(+), 2 deletions(-)
>> 
>> --- a/arch/x86/include/asm/processor.h
>> +++ b/arch/x86/include/asm/processor.h
>> @@ -887,8 +887,8 @@ static inline void spin_lock_prefetch(const void
>*x)
>>  
>>  #define TASK_SIZE_LOW   (test_thread_flag(TIF_ADDR32) ? \
>>  IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
>> -#define TASK_SIZE   (test_thread_flag(TIF_ADDR32) ? \
>> -IA32_PAGE_OFFSET : TASK_SIZE_MAX)
>> +unsigned long _task_size(void);
>> +#define TASK_SIZE   _task_size()
>>  #define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child,
>TIF_ADDR32)) ? \
>>  IA32_PAGE_OFFSET : TASK_SIZE_MAX)
>>  
>> --- a/arch/x86/kernel/Makefile
>> +++ b/arch/x86/kernel/Makefile
>> @@ -46,6 +46,7 @@ CFLAGS_irq.o := -I$(src)/../include/asm/trace
>>  
>>  obj-y   := process_$(BITS).o signal.o
>>  obj-$(CONFIG_COMPAT)+= signal_compat.o
>> +obj-$(CONFIG_X86_64)+= task_size_64.o
>>  obj-y   += traps.o idt.o irq.o irq_$(BITS).o 
>> dumpstack_$(BITS).o
>>  obj-y   += time.o ioport.o dumpstack.o nmi.o
>>  obj-$(CONFIG_MODIFY_LDT_SYSCALL)+= ldt.o
>> new file mode 100644
>> --- /dev/null
>> +++ b/arch/x86/kernel/task_size_64.c
>> @@ -0,0 +1,9 @@
>> +#include 
>> +#include 
>> +#include 
>> +
>> +unsigned long _task_size(void)
>> +{
>> +return test_thread_flag(TIF_ADDR32) ? IA32_PAGE_OFFSET :
>TASK_SIZE_MAX;
>> +}
>> +EXPORT_SYMBOL(_task_size);
>
>Good idea - but instead of adding yet another compilation unit, why not
>
>stick _task_size() into arch/x86/kernel/process_64.c, which is the 
>canonical place for process management related arch functions?
>
>Thanks,
>
>   Ingo

Better yet... since TIF_ADDR32 isn't something that changes randomly, perhaps 
this should be a separate variable?
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

Re: [PATCH v4 14/16] locking/rwsem: Guard against making count negative

2019-04-21 Thread Waiman Long

On 4/19/19 3:39 PM, Waiman Long wrote:
> On 04/19/2019 09:15 AM, Peter Zijlstra wrote:
>> On Fri, Apr 19, 2019 at 03:03:04PM +0200, Peter Zijlstra wrote:
>>> On Fri, Apr 19, 2019 at 02:02:07PM +0200, Peter Zijlstra wrote:
 On Fri, Apr 19, 2019 at 12:26:47PM +0200, Peter Zijlstra wrote:
> I thought of a horrible horrible alternative:
 Hurm, that's broken as heck. Let me try again.
>>> So I can't make that scheme work, it all ends up wanting to have
>>> cmpxchg().
>>>
>>> Do we have a performance comparison somewhere of xadd vs cmpxchg
>>> readers? I tried looking in the old threads, but I can't seem to locate
>>> it.
>>>
>>> We need new instructions :/ Or more clever than I can muster just now.
>> In particular, an (unsigned) saturation arithmetic variant of XADD would
>> be very nice to have at this point.
> I just want to clear about my current scheme. There will be 16 bits
> allocated for reader count. I use the MS bit for signaling that there
> are too many readers. So the fast path will fail and the readers will be
> put into the wait list. This effectively limit readers to 32k-1, but it
> doesn't mean the actual reader count cannot go over that. As long as the
> actual count is less than 64k, everything should still work perfectly.
> IOW, even though we have reached the limit of 32k, we need to pile on an
> additional 32k readers to really overflow the count and cause problem.

How about the following chunks to disable preemption temporarily for the
increment-check-decrement sequence?

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index dd92b1a93919..4cc03ac66e13 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -250,6 +250,8 @@ do { \
 #define preempt_enable_notrace()   barrier()
 #define preemptible()  0
 
+#define __preempt_disable_nop  /* preempt_disable() is nop */
+
 #endif /* CONFIG_PREEMPT_COUNT */
 
 #ifdef MODULE
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 043fd29b7534..54029e6af17b 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -256,11 +256,64 @@ static inline struct task_struct
*rwsem_get_owner(struct r
    return (struct task_struct *) (cowner
    ? cowner | (sowner & RWSEM_NONSPINNABLE) : sowner);
 }
+
+/*
+ * If __preempt_disable_nop is defined, calling preempt_disable() and
+ * preempt_enable() directly is the most efficient way. Otherwise, it may
+ * be more efficient to disable and enable interrupt instead for disabling
+ * preemption tempoarily.
+ */
+#ifdef __preempt_disable_nop
+#define disable_preemption()   preempt_disable()
+#define enable_preemption()    preempt_enable()
+#else
+#define disable_preemption()   local_irq_disable()
+#define enable_preemption()    local_irq_enable()
+#endif
+
+/*
+ * When the owner task structure pointer is merged into couunt, less bits
+ * will be available for readers. Therefore, there is a very slight chance
+ * that the reader count may overflow. We try to prevent that from
happening
+ * by checking for the MS bit of the count and failing the trylock attempt
+ * if this bit is set.
+ *
+ * With preemption enabled, there is a remote possibility that preemption
+ * can happen in the narrow timing window between incrementing and
+ * decrementing the reader count and the task is put to sleep for a
+ * considerable amount of time. If sufficient number of such unfortunate
+ * sequence of events happen, we may still overflow the reader count.
+ * To avoid such possibility, we have to disable preemption for the
+ * whole increment-check-decrement sequence.
+ *
+ * The function returns true if there are too many readers and the count
+ * has already been properly decremented so the reader must go directly
+ * into the wait list.
+ */
+static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cnt)
+{
+   bool wait = false;  /* Wait now flag */
+
+   disable_preemption();
+   *cnt = atomic_long_fetch_add_acquire(RWSEM_READER_BIAS,
>count);
+   if (unlikely(*cnt < 0)) {
+   atomic_long_add(-RWSEM_READER_BIAS, >count);
+   wait = true;
+   }
+   enable_preemption();
+   return wait;
+}
 #else /* !CONFIG_RWSEM_OWNER_COUNT */
 static inline struct task_struct *rwsem_get_owner(struct rw_semaphore *sem)
 {
    return READ_ONCE(sem->owner);
 }
+
+static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cnt)
+{
+   *cnt = atomic_long_fetch_add_acquire(RWSEM_READER_BIAS,
>count);
+   return false;
+}
 #endif /* CONFIG_RWSEM_OWNER_COUNT */
 
 /*
@@ -981,32 +1034,18 @@ static inline void clear_wr_nonspinnable(struct
rw_semaph
  * Wait for the read lock to be granted
  */
 static struct rw_semaphore __sched *
-rwsem_down_read_slowpath(struct rw_semaphore *sem, int state, long count)
+rwsem_down_read_slowpath(struct rw_semaphore *sem, int state, const
bool wait)
 {
-   long adjustment = -RWSEM_READER_BIAS;
+   long count,

Re: [PATCH] x86_64: uninline TASK_SIZE

2019-04-21 Thread Alexey Dobriyan

On Sun, Apr 21, 2019 at 01:07:08PM -0700, h...@zytor.com wrote:
> On April 21, 2019 11:28:42 AM PDT, Ingo Molnar  wrote:
> >
> >* Alexey Dobriyan  wrote:
> >
> >> TASK_SIZE macro is quite deceptive: it looks like a constant but in
> >fact
> >> compiles to 50+ bytes.
> >> 
> >> Space savings on x86_64 defconfig:
> >> 
> >> add/remove: 1/0 grow/shrink: 3/24 up/down: 77/-2247 (-2170)
> >> Function old new   delta
> >> _task_size -  52 +52
> >> mpol_shared_policy_init  344 363 +19
> >> shmem_get_unmapped_area   92  97  +5
> >> __rseq_handle_notify_resume.cold  34  35  +1
> >> copy_from_user_nmi   123 113 -10
> >> mmap_address_hint_valid   92  56 -36
> >> arch_get_unmapped_area_topdown   471 435 -36
> >> tlb_gather_mmu   164 126 -38
> >> hugetlb_get_unmapped_area774 736 -38
> >> __create_xol_area497 458 -39
> >> arch_tlb_gather_mmu  160 120 -40
> >> setup_new_exec   380 336 -44
> >> __x64_sys_mlockall   378 333 -45
> >> __ia32_sys_mlockall  378 333 -45
> >> tlb_flush_mmu235 189 -46
> >> unmap_page_range20982048 -50
> >> copy_mount_options   518 465 -53
> >> __get_user_pages17371675 -62
> >> get_unmapped_area270 204 -66
> >> perf_prepare_sample 11761098 -78
> >> perf_callchain_user  549 469 -80
> >> mremap_to.isra   545 457 -88
> >> arch_tlb_finish_mmu  394 305 -89
> >> __do_munmap 1039 927-112
> >> elf_map  527 409-118
> >> prctl_set_mm15091335-174
> >> __rseq_handle_notify_resume 1116 906-210
> >> load_elf_binary11761   1-650
> >> Total: Before=14121337, After=14119167, chg -0.02%
> >> 
> >> Signed-off-by: Alexey Dobriyan 
> >> ---
> >> 
> >>  arch/x86/include/asm/processor.h |4 ++--
> >>  arch/x86/kernel/Makefile |1 +
> >>  arch/x86/kernel/task_size_64.c   |9 +
> >>  3 files changed, 12 insertions(+), 2 deletions(-)
> >> 
> >> --- a/arch/x86/include/asm/processor.h
> >> +++ b/arch/x86/include/asm/processor.h
> >> @@ -887,8 +887,8 @@ static inline void spin_lock_prefetch(const void
> >*x)
> >>  
> >>  #define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
> >>IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
> >> -#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
> >> -  IA32_PAGE_OFFSET : TASK_SIZE_MAX)
> >> +unsigned long _task_size(void);
> >> +#define TASK_SIZE _task_size()
> >>  #define TASK_SIZE_OF(child)   ((test_tsk_thread_flag(child,
> >TIF_ADDR32)) ? \
> >>IA32_PAGE_OFFSET : TASK_SIZE_MAX)
> >>  
> >> --- a/arch/x86/kernel/Makefile
> >> +++ b/arch/x86/kernel/Makefile
> >> @@ -46,6 +46,7 @@ CFLAGS_irq.o := -I$(src)/../include/asm/trace
> >>  
> >>  obj-y := process_$(BITS).o signal.o
> >>  obj-$(CONFIG_COMPAT)  += signal_compat.o
> >> +obj-$(CONFIG_X86_64)  += task_size_64.o
> >>  obj-y += traps.o idt.o irq.o irq_$(BITS).o 
> >> dumpstack_$(BITS).o
> >>  obj-y += time.o ioport.o dumpstack.o nmi.o
> >>  obj-$(CONFIG_MODIFY_LDT_SYSCALL)  += ldt.o
> >> new file mode 100644
> >> --- /dev/null
> >> +++ b/arch/x86/kernel/task_size_64.c
> >> @@ -0,0 +1,9 @@
> >> +#include 
> >> +#include 
> >> +#include 
> >> +
> >> +unsigned long _task_size(void)
> >> +{
> >> +  return test_thread_flag(TIF_ADDR32) ? IA32_PAGE_OFFSET :
> >TASK_SIZE_MAX;
> >> +}
> >> +EXPORT_SYMBOL(_task_size);
> >
> >Good idea - but instead of adding yet another compilation unit, why not
> >
> >stick _task_size() into arch/x86/kernel/process_64.c, which is the 
> >canonical place for process management related arch functions?
> >
> >Thanks,
> >
> > Ingo
> 
> Better yet... since TIF_ADDR32 isn't something that changes randomly, perhaps 
> this should be a separate variable?

Maybe. I only thought about putting every 32-bit related flag under
CONFIG_COMPAT to further eradicate bloat (and force everyone else
to keep an eye on it, ha-ha).

Re: DISCONTIGMEM is deprecated

2019-04-21 Thread Mel Gorman

On Sun, Apr 21, 2019 at 06:26:07AM -0700, Matthew Wilcox wrote:
> On Sun, Apr 21, 2019 at 09:38:59AM +0300, Mike Rapoport wrote:
> > On Fri, Apr 19, 2019 at 07:05:21AM -0700, Matthew Wilcox wrote:
> > > On Fri, Apr 19, 2019 at 10:43:35AM +0100, Mel Gorman wrote:
> > > > DISCONTIG is essentially deprecated and even parisc plans to move to
> > > > SPARSEMEM so there is no need to be fancy, this patch simply disables
> > > > watermark boosting by default on DISCONTIGMEM.
> > > 
> > > I don't think parisc is the only arch which uses DISCONTIGMEM for !NUMA
> > > scenarios.  Grepping the arch/ directories shows:
> > > 
> > > alpha (does support NUMA, but also non-NUMA DISCONTIGMEM)
> > > arc (for supporting more than 1GB of memory)
> > > ia64 (looks complicated ...)
> > > m68k (for multiple chunks of memory)
> > > mips (does support NUMA but also non-NUMA)
> > > parisc (both NUMA and non-NUMA)
> > 
> > i386 NUMA as well
> 
> I clearly over-trimmed.  The original assumption that Mel had was that
> DISCONTIGMEM => NUMA, and that's not true on the above six architectures.
> It is true on i386 ;-)

32-bit NUMA systems should be non-existent in practice. The last NUMA
system I'm aware of that was both NUMA and 32-bit only died somewhere
between 2004 and 2007. If someone is running a 64-bit capable system in
32-bit mode with NUMA, they really are just punishing themselves for fun.

-- 
Mel Gorman
SUSE Labs

[PATCH v2] x86_64: uninline TASK_SIZE

2019-04-21 Thread Alexey Dobriyan

TASK_SIZE macro is quite deceptive: it looks like a constant but in fact
compiles to 50+ bytes.

Space savings on x86_64 defconfig:

add/remove: 1/0 grow/shrink: 3/24 up/down: 77/-2247 (-2170)
Function old new   delta
_task_size -  52 +52
mpol_shared_policy_init  344 363 +19
shmem_get_unmapped_area   92  97  +5
__rseq_handle_notify_resume.cold  34  35  +1
copy_from_user_nmi   123 113 -10
mmap_address_hint_valid   92  56 -36
arch_get_unmapped_area_topdown   471 435 -36
tlb_gather_mmu   164 126 -38
hugetlb_get_unmapped_area774 736 -38
__create_xol_area497 458 -39
arch_tlb_gather_mmu  160 120 -40
setup_new_exec   380 336 -44
__x64_sys_mlockall   378 333 -45
__ia32_sys_mlockall  378 333 -45
tlb_flush_mmu235 189 -46
unmap_page_range20982048 -50
copy_mount_options   518 465 -53
__get_user_pages17371675 -62
get_unmapped_area270 204 -66
perf_prepare_sample 11761098 -78
perf_callchain_user  549 469 -80
mremap_to.isra   545 457 -88
arch_tlb_finish_mmu  394 305 -89
__do_munmap 1039 927-112
elf_map  527 409-118
prctl_set_mm15091335-174
__rseq_handle_notify_resume 1116 906-210
load_elf_binary11761   1-650  <===

Signed-off-by: Alexey Dobriyan 
---

 arch/x86/include/asm/processor.h |4 ++--
 arch/x86/kernel/process_64.c |6 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -887,8 +887,8 @@ static inline void spin_lock_prefetch(const void *x)
 
 #define TASK_SIZE_LOW  (test_thread_flag(TIF_ADDR32) ? \
IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
-#define TASK_SIZE  (test_thread_flag(TIF_ADDR32) ? \
-   IA32_PAGE_OFFSET : TASK_SIZE_MAX)
+unsigned long _task_size(void);
+#define TASK_SIZE  _task_size()
 #define TASK_SIZE_OF(child)((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
 
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -827,3 +827,9 @@ unsigned long KSTK_ESP(struct task_struct *task)
 {
return task_pt_regs(task)->sp;
 }
+
+unsigned long _task_size(void)
+{
+   return test_thread_flag(TIF_ADDR32) ? IA32_PAGE_OFFSET : TASK_SIZE_MAX;
+}
+EXPORT_SYMBOL(_task_size);

[no subject]

2019-04-21 Thread Gavin

Good day Beautiful, i hope this mail meets you well? I know this may seem 
inappropriate so i ask for your forgiveness but i wish to get to know you 
better, if I may be so bold. I consider myself an easy-going man, adventurous, 
honest and fun loving person but I am currently looking for a relationship in 
which I will feel loved. I promise to answer any question that you may want to 
ask me...all i need is just your attention and the chance to know you more.

Please tell me more about yourself, if you do not mind. Hope to hear back from 
you soon.

Gavin.

[locking/rwsem] 21471c203e: WARNING:at_kernel/locking/rwsem.c:#downgrade_write

2019-04-21 Thread kernel test robot

FYI, we noticed the following commit (built with gcc-7):

commit: 21471c203eb7f02124c15127f806f1b145e8d663 ("locking/rwsem: Disable 
reader optimistic spinning adaptively")
https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git WIP.locking/core

in testcase: boot

on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 2G

caused below changes (please refer to attached dmesg/kmsg for entire 
log/backtrace):


++++
|| ce474d0a73 | 21471c203e |
++++
| boot_successes | 35 | 19 |
| boot_failures  | 1  | 17 |
| IP-Config:Auto-configuration_of_network_failed | 1  ||
| WARNING:at_kernel/locking/rwsem.c:#downgrade_write | 0  | 16 |
| RIP:downgrade_write| 0  | 16 |
| WARNING:at_net/sched/sch_generic.c:#dev_watchdog   | 0  | 1  |
| RIP:dev_watchdog   | 0  | 1  |
++++


If you fix the issue, kindly add following tag
Reported-by: kernel test robot 


[  184.736582] WARNING: CPU: 0 PID: 260 at kernel/locking/rwsem.c:1453 
downgrade_write+0x161/0x2b9
[  184.739158] CPU: 0 PID: 260 Comm: in:imuxsock Not tainted 
5.1.0-rc4-00088-g21471c2 #1
[  184.741175] RIP: 0010:downgrade_write+0x161/0x2b9
[  184.742351] Code: 00 48 8b 13 4c 8b 4c 24 08 48 89 e9 4c 8b 04 24 48 c7 c6 
80 a2 c6 82 48 c7 c7 20 a1 c6 82 e8 7d 62 f5 ff 48 ff 05 a7 ba cd 04 <0f> 0b 48 
ff 05 a6 ba cd 04 e9 0a 01 00 00 48 c7 c7 98 e5 ac 83 48
[  184.747015] RSP: :88804fd4fd50 EFLAGS: 00010202
[  184.748361] RAX:  RBX: 8880557e32b8 RCX: 
[  184.750147] RDX: 00835cf46100 RSI: 811e19a8 RDI: ed1009fa9f9f
[  184.752181] RBP: 88805cf46102 R08: ed100d07ccd9 R09: 0007
[  184.754022] R10: ed100aafc658 R11: ed100d07ccd8 R12: 111009fa9fac
[  184.755826] R13: 8880557e3318 R14: 8880557e32c0 R15: 88804fe06aa8
[  184.757646] FS:  7fdfee986700() GS:88806820() 
knlGS:
[  184.759599] CS:  0010 DS:  ES:  CR0: 80050033
[  184.761147] CR2: 7f81d536b6e0 CR3: 4f1e5000 CR4: 06f0
[  184.762891] Call Trace:
[  184.763579]  ? up_write+0x1eb/0x1eb
[  184.764599]  ? vma_compute_subtree_gap+0x139/0x141
[  184.765784]  ? __vma_rb_erase+0x4c3/0x534
[  184.766955]  ? vma_compute_subtree_gap+0x139/0x141
[  184.768170]  __do_munmap+0x612/0x77b
[  184.769080]  __vm_munmap+0xd9/0x149
[  184.770009]  ? __do_munmap+0x77b/0x77b
[  184.771061]  ? mark_held_locks+0x92/0xa8
[  184.772061]  __se_sys_munmap+0x35/0x41
[  184.773037]  __x64_sys_munmap+0x33/0x3d
[  184.774136]  do_syscall_64+0xb2/0x3bd
[  184.775128]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  184.776364] RIP: 0033:0x7fdfefcd94a7
[  184.777377] Code: c7 c0 ff ff ff ff eb 8d 48 8b 15 ec 49 2b 00 f7 d8 64 89 
02 e9 5b ff ff ff 66 2e 0f 1f 84 00 00 00 00 00 b8 0b 00 00 00 0f 05 <48> 3d 01 
f0 ff ff 73 01 c3 48 8b 0d c1 49 2b 00 f7 d8 64 89 01 48
[  184.781760] RSP: 002b:7fdfee985c78 EFLAGS: 0206 ORIG_RAX: 
000b
[  184.783625] RAX: ffda RBX: 7fdfe800 RCX: 7fdfefcd94a7
[  184.785364] RDX:  RSI: 01e7b000 RDI: 7fdfe6185000
[  184.787183] RBP: 00023000 R08:  R09: 
[  184.788862] R10: 4022 R11: 0206 R12: 01e7b000
[  184.790694] R13: 7fffab1ef6cf R14:  R15: 7fdff1213040
[  184.792370] irq event stamp: 132
[  184.793270] hardirqs last  enabled at (131): [] 
console_unlock+0xa36/0xa9c
[  184.795376] hardirqs last disabled at (132): [] 
trace_hardirqs_off_thunk+0x1a/0x1c
[  184.797645] softirqs last  enabled at (128): [] 
__do_softirq+0x735/0x7a7
[  184.799642] softirqs last disabled at (123): [] 
irq_exit+0xba/0xff
[  184.801532] ---[ end trace 230952bfbdc12d4d ]---


To reproduce:

# build kernel
cd linux
cp config-5.1.0-rc4-00088-g21471c2 .config
make HOSTCC=gcc-7 CC=gcc-7 ARCH=x86_64 olddefconfig
make HOSTCC=gcc-7 CC=gcc-7 ARCH=x86_64 prepare
make HOSTCC=gcc-7 CC=gcc-7 ARCH=x86_64 modules_prepare
make HOSTCC=gcc-7 CC=gcc-7 ARCH=x86_64 SHELL=/bin/bash
make HOSTCC=gcc-7 CC=gcc-7 ARCH=x86_64 bzImage


git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k  job-script # job-script is attached in this 
email



Thanks,
eywa

#
# Automatically generated file; DO NOT EDIT.
# Linux/x86_64 5.1.0-rc4 Kernel Configuration
#

#
# Compiler: gcc-7 (Debian 7.3.0-1)

RE: [PATCH 1/2] soc: imx-sc: add i.MX system controller soc driver support

2019-04-21 Thread Anson Huang

Hi, Shawn

Best Regards!
Anson Huang

> -Original Message-
> From: Shawn Guo [mailto:shawn...@kernel.org]
> Sent: Sunday, April 21, 2019 3:42 PM
> To: Anson Huang 
> Cc: stefan.wah...@i2se.com; enric.balle...@collabora.com; linux-
> ker...@vger.kernel.org; he...@sntech.de; marc.w.gonza...@free.fr;
> ezequ...@collabora.com; catalin.mari...@arm.com;
> s.ha...@pengutronix.de; will.dea...@arm.com; Abel Vesa
> ; bjorn.anders...@linaro.org; Andy Gross
> ; ja...@amarulasolutions.com;
> ker...@pengutronix.de; dl-linux-imx ; o...@lixom.net;
> horms+rene...@verge.net.au; feste...@gmail.com; r...@kernel.org;
> linux-arm-ker...@lists.infradead.org; l.st...@pengutronix.de
> Subject: Re: [PATCH 1/2] soc: imx-sc: add i.MX system controller soc driver
> support
> 
> On Sun, Apr 21, 2019 at 03:40:00PM +0800, Shawn Guo wrote:
> > On Thu, Apr 11, 2019 at 06:49:12AM +, Anson Huang wrote:
> > > i.MX8QXP is an ARMv8 SoC which has a Cortex-M4 system controller
> > > inside, the system controller is in charge of controlling power,
> > > clock and fuse etc..
> > >
> > > This patch adds i.MX system controller soc driver support, Linux
> > > kernel has to communicate with system controller via MU (message
> > > unit) IPC to get soc revision, uid etc..
> > >
> > > With this patch, soc info can be read from sysfs:
> > >
> > > i.mx8qxp-mek# cat /sys/devices/soc0/family Freescale i.MX
> > >
> > > i.mx8qxp-mek# cat /sys/devices/soc0/soc_id i.MX8QXP
> > >
> > > i.mx8qxp-mek# cat /sys/devices/soc0/machine Freescale i.MX8QXP MEK
> > >
> > > i.mx8qxp-mek# cat /sys/devices/soc0/revision
> > > 1.1
> > >
> > > i.mx8qxp-mek# cat /sys/devices/soc0/soc_uid
> > > 7B64280B57AC1898
> > >
> > > Signed-off-by: Anson Huang 
> > > ---
> > >  drivers/soc/imx/Kconfig  |   7 ++
> > >  drivers/soc/imx/Makefile |   1 +
> > >  drivers/soc/imx/soc-imx-sc.c | 220
> > > +++
> > >  3 files changed, 228 insertions(+)
> > >  create mode 100644 drivers/soc/imx/soc-imx-sc.c
> >
> > Rather than creating a new driver, please take a look at Abel's
> > generic
> > i.MX8 SoC driver, and see if it can be extended to cover i.MX8QXP.
> 
> Forgot to give pointer to Abel's driver.
> 
> https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.ker
> nel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Fshawnguo%2Flinux.git%
> 2Fcommit%2F%3Fh%3Dimx%2Fdrivers%26id%3Da7e26f356ca12906a164d83c
> 9e9f8527ee7da022data=02%7C01%7Canson.huang%40nxp.com%7C9
> e2705d7449b4c2e23ed08d6c62ce0bb%7C686ea1d3bc2b4c6fa92cd99c5c3016
> 35%7C0%7C0%7C636914293400307709sdata=6ySEs%2B4SE8bvcBCkfoi
> VBafseAYthTED9%2F5qcf25xds%3Dreserved=0
> 

Got it, I didn't notice that this patch bas been accepted, I will redo the 
patch based on it,
thanks.

Anson.

> Shawn

Re: [PATCH v3 3/4] dt-bindings: arm: fsl: Add support for ZII i.MX7 RPU2 board

2019-04-21 Thread Shawn Guo

On Sun, Apr 14, 2019 at 11:34:02AM -0700, Andrey Smirnov wrote:
> Add support for ZII i.MX7 RPU2 board.
> 
> Signed-off-by: Andrey Smirnov 
> Reviewed-by: Rob Herring 
> Cc: Shawn Guo 
> Cc: Chris Healy 
> Cc: Andrew Lunn 
> Cc: Fabio Estevam 
> Cc: Rob Herring 
> Cc: linux-kernel@vger.kernel.org
> Cc: devicet...@vger.kernel.org

Applied, thanks.

Re: [PATCH v3 4/4] ARM: dts: Add support for ZII i.MX7 RPU2 board

2019-04-21 Thread Shawn Guo

On Sun, Apr 14, 2019 at 11:34:03AM -0700, Andrey Smirnov wrote:
> Add support for ZII's i.MX7 based Remote Peripheral Unit 2 (RPU2)
> board.
> 
> Signed-off-by: Andrey Smirnov 
> Reviewed-by: Fabio Estevam 
> Cc: Shawn Guo 
> Cc: Chris Healy 
> Cc: Andrew Lunn 
> Cc: Fabio Estevam 
> Cc: Rob Herring 
> Cc: linux-kernel@vger.kernel.org
> Cc: devicet...@vger.kernel.org

Applied, thanks.

Re: Bug report: A commit about serial8250 cause the output disorderly at the phase of startup

2019-04-21 Thread Hongzhi, Song


Hi all,

Anyone notice this issue?


--Hongzhi


On 4/19/19 10:24 AM, Hongzhi, Song wrote:

1. Issue description:

Boot kernel( >= linux-rt-devel-v5.0.3 ) with qemu.
Then qemu will print following disorderly messages.

At the beginning, the messages are disorderly. But then it becomes 
normally from printing "[    0.00] 000: Linux version..."


--
[    0.019000] 000: tsc: Unable to calibrate against PIT
[    0.002583] 000: 6199.83 BogoMIPS (lpj=3099918)
[    0.521247] 000: Intel(R) Core(TM)2 Duo CPU T7700  @ 2.40GHz
[    0.521247] 000:  (family: 0x6, model: 0xf
[    0.521247] 000: , stepping: 0xb)
[    0.533126] 000: unsupported p6 CPU model 15
[    0.533318] 000: no PMU driver, software events only.
[    0.765082] 000: 1 ACPI AML tables successfully acquired and loaded
[    0.765274] 000:
[    0.785903] 000: Enabled 2 GPEs in block 00 to 0F
[    0.786128] 000:
[    0.835675] 000: acpi PNP0A03:00: fail to add MMCONFIG information, 
can't access extended PCI configuration space under this bridge.

[    0.892056] 000:  5
[    0.892289] 000:  *10
[    0.892416] 000:  11
[    0.892527] 000: )
[    0.892661] 000:

/* skip some repeated contents */

[    5.052149] 000: , 512kB Cache
[    0.00] 000: Linux version 5.0.3-yocto-preempt-rt+ 
(hsong@pek-lpggp1) (gcc version 8.3.0 (GCC)) #24 SMP PREEMPT Thu Apr 
18 03:29:58 EDT 2019
[    0.00] 000: Command line: root=/dev/vda rw highres=off 
console=ttyS0 mem=256M ip=192.168.7.4::192.168.7.3:255.255.255.0 vga=0 
uvesafb.mode_opti0

[    0.00] 000: x86/fpu: x87 FPU will use FXSAVE
[    0.00] 000: BIOS-provided physical RAM map:
[    0.00] 000: BIOS-e820: [mem 
0x-0x0009fbff]
[    0.00] 000: BIOS-e820: [mem 
0x0009fc00-0x0009]
[    0.00] 000: BIOS-e820: [mem 
0x000f-0x000f]
[    0.00] 000: BIOS-e820: [mem 
0x0010-0x0ffdbfff]


--

2. Reproduce:
(1)build kernel: (Attachment is my .config)
make ARCH=x86_64 
CROSS_COMPILE=[path-to-my-cross-toolchain]/x86_64-wrs-linux-


(2)boot kernel with qemu:

qemu-system-x86_64 \
-drive file=qemux86-64.rootfs.ext4,if=virtio,format=raw \
-nographic \
-kernel arch/x86/boot/bzImage \
-append 'root=/dev/vda rw highres=off  console=ttyS0 mem=256M ip=dhcp'

3. Analysis:
I find the following commit from >=linux-rt-devel-v5.0.3. cause the 
issue.


b9d460e serial: 8250: implement write_atomic

Re: [PATCH 1/2] ARM: dts: vf610-zii-dev: Mark i2c0 SCL as GPIO_OPEN_DRAIN

2019-04-21 Thread Shawn Guo

On Sun, Apr 14, 2019 at 11:35:57AM -0700, Andrey Smirnov wrote:
> Mark i2c0 SCL as GPIO_OPEN_DRAIN to fix the following warning:
> 
> gpio-36 (scl): enforced open drain please flag it properly in DT/ACPI 
> DSDT/board file
> 
> Signed-off-by: Andrey Smirnov 
> Cc: Shawn Guo 
> Cc: Chris Healy 
> Cc: Andrew Lunn 
> Cc: Fabio Estevam 
> Cc: linux-kernel@vger.kernel.org

Applied both, thanks.

[PATCH v17 3/3] Documentation/filesystems/proc.txt: add arch_status file

2019-04-21 Thread Aubrey Li

Added /proc//arch_status file, and added AVX512_elapsed_ms in
/proc//arch_status. Report it in Documentation/filesystems/proc.txt

Signed-off-by: Aubrey Li 
Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Andi Kleen 
Cc: Tim Chen 
Cc: Dave Hansen 
Cc: Arjan van de Ven 
Cc: Alexey Dobriyan 
Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Linux API 
---
 Documentation/filesystems/proc.txt | 37 ++
 1 file changed, 37 insertions(+)

diff --git a/Documentation/filesystems/proc.txt 
b/Documentation/filesystems/proc.txt
index 66cad5c86171..cf5114a8fb13 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -45,6 +45,7 @@ Table of Contents
   3.9   /proc//map_files - Information about memory mapped files
   3.10  /proc//timerslack_ns - Task timerslack value
   3.11 /proc//patch_state - Livepatch patch operation state
+  3.12 /proc//arch_status - Task architecture specific information
 
   4Configuring procfs
   4.1  Mount options
@@ -1948,6 +1949,42 @@ patched.  If the patch is being enabled, then the task 
has already been
 patched.  If the patch is being disabled, then the task hasn't been
 unpatched yet.
 
+3.12 /proc//arch_status - task architecture specific status
+---
+When CONFIG_PROC_PID_ARCH_STATUS is enabled, this file displays the
+architecture specific status of the task.
+
+Example
+---
+ $ cat /proc/6753/arch_status
+ AVX512_elapsed_ms:  8
+
+Description
+---
+
+ AVX512_elapsed_ms:
+ --
+  If AVX512 is supported on the machine, this entry shows the milliseconds
+  elapsed since the last time AVX512 usage was recorded. The recording
+  happens on a best effort basis when a task is scheduled out. This means
+  that the value depends on two factors:
+
+1) The time which the task spent on the CPU without being scheduled
+   out. With CPU isolation and a single runnable task this can take
+   several seconds.
+
+2) The time since the task was scheduled out last. Depending on the
+   reason for being scheduled out (time slice exhausted, syscall ...)
+   this can be arbitrary long time.
+
+  As a consequence the value cannot be considered precise and authoritative
+  information. The application which uses this information has to be aware
+  of the overall scenario on the system in order to determine whether a
+  task is a real AVX512 user or not.
+
+  A special value of '-1' indicates that no AVX512 usage was recorded, thus
+  the task is unlikely an AVX512 user, but depends on the workload and the
+  scheduling scenario, it also could be a false negative mentioned above.
 
 --
 Configuring procfs
-- 
2.17.1

[PATCH v17 1/3] proc: add /proc//arch_status

2019-04-21 Thread Aubrey Li

The architecture specific information of the running processes
could be useful to the userland. Add /proc//arch_status
interface support to examine process architecture specific
information externally.

Signed-off-by: Aubrey Li 
Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Andi Kleen 
Cc: Tim Chen 
Cc: Dave Hansen 
Cc: Arjan van de Ven 
Cc: Alexey Dobriyan 
Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Linux API 
---
 arch/x86/Kconfig |  1 +
 fs/proc/Kconfig  | 10 ++
 fs/proc/base.c   | 23 +++
 3 files changed, 34 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5ad92419be19..d5a9c5ddd453 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -208,6 +208,7 @@ config X86
select USER_STACKTRACE_SUPPORT
select VIRT_TO_BUS
select X86_FEATURE_NAMESif PROC_FS
+   select PROC_PID_ARCH_STATUS if PROC_FS
 
 config INSTRUCTION_DECODER
def_bool y
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 817c02b13b1d..101bf5054e81 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -97,3 +97,13 @@ config PROC_CHILDREN
 
  Say Y if you are running any user-space software which takes benefit 
from
  this interface. For example, rkt is such a piece of software.
+
+config PROC_PID_ARCH_STATUS
+   bool "Enable /proc//arch_status file"
+   default n
+   help
+ Provides a way to examine process architecture specific information.
+ See  for more information.
+
+ Say Y if you are running any user-space software which wants to obtain
+ process architecture specific information from this interface.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6a803a0b75df..a890d9f12851 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -94,6 +94,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include "internal.h"
 #include "fd.h"
@@ -2957,6 +2958,22 @@ static int proc_stack_depth(struct seq_file *m, struct 
pid_namespace *ns,
 }
 #endif /* CONFIG_STACKLEAK_METRICS */
 
+/*
+ * Add support for task architecture specific output in /proc/pid/arch_status.
+ * task_arch_status() must be defined in asm/processor.h
+ */
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+# ifndef task_arch_status
+# define task_arch_status(m, task)
+# endif
+static int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
+   struct pid *pid, struct task_struct *task)
+{
+   task_arch_status(m, task);
+   return 0;
+}
+#endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
 /*
  * Thread groups
  */
@@ -3061,6 +3078,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_STACKLEAK_METRICS
ONE("stack_depth", S_IRUGO, proc_stack_depth),
 #endif
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+   ONE("arch_status", S_IRUGO, proc_pid_arch_status),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3449,6 +3469,9 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_LIVEPATCH
ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
 #endif
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+   ONE("arch_status", S_IRUGO, proc_pid_arch_status),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
-- 
2.17.1

[PATCH v17 2/3] /proc/pid/arch_status: Add AVX-512 usage elapsed time

2019-04-21 Thread Aubrey Li

AVX-512 components use could cause core turbo frequency drop. So
it's useful to expose AVX-512 usage elapsed time as a heuristic hint
for the user space job scheduler to cluster the AVX-512 using tasks
together.

Tensorflow example:
$ while [ 1 ]; do cat /proc/tid/arch_status | grep AVX512; sleep 1; done
AVX512_elapsed_ms:  4
AVX512_elapsed_ms:  8
AVX512_elapsed_ms:  4

This means that 4 milliseconds have elapsed since the AVX512 usage
of tensorflow task was detected when the task was scheduled out.

Or:
$ cat /proc/tid/arch_status | grep AVX512
AVX512_elapsed_ms:  -1

The number '-1' indicates that no AVX512 usage recorded before
thus the task unlikely has frequency drop issue.

User space tools may want to further check by:

$ perf stat --pid  -e core_power.lvl2_turbo_license -- sleep 1

 Performance counter stats for process id '3558':

 3,251,565,961  core_power.lvl2_turbo_license

   1.004031387 seconds time elapsed

Non-zero counter value confirms that the task causes frequency drop.

Signed-off-by: Aubrey Li 
Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Andi Kleen 
Cc: Tim Chen 
Cc: Dave Hansen 
Cc: Arjan van de Ven 
Cc: Alexey Dobriyan 
Cc: Andrew Morton 
Cc: Andy Lutomirski 
Cc: Linux API 
---
 arch/x86/include/asm/processor.h |  6 +
 arch/x86/kernel/fpu/xstate.c | 43 
 2 files changed, 49 insertions(+)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2bb3a648fc12..0728848473a2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -991,4 +991,10 @@ enum l1tf_mitigations {
 
 extern enum l1tf_mitigations l1tf_mitigation;
 
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+/* Add support for task architecture specific output in /proc/pid/arch_status 
*/
+void task_arch_status(struct seq_file *m, struct task_struct *task);
+#define task_arch_status task_arch_status
+#endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d7432c2b1051..a0dda11ab72e 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -7,6 +7,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1243,3 +1244,45 @@ int copy_user_to_xstate(struct xregs_state *xsave, const 
void __user *ubuf)
 
return 0;
 }
+
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+/*
+ * Report the amount of time elapsed in millisecond since last AVX512
+ * use in the task.
+ */
+static void avx512_status(struct seq_file *m, struct task_struct *task)
+{
+   unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
+   long delta;
+
+   if (!timestamp) {
+   /*
+* Report -1 if no AVX512 usage
+*/
+   delta = -1;
+   } else {
+   delta = (long)(jiffies - timestamp);
+   /*
+* Cap to LONG_MAX if time difference > LONG_MAX
+*/
+   if (delta < 0)
+   delta = LONG_MAX;
+   delta = jiffies_to_msecs(delta);
+   }
+
+   seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
+   seq_putc(m, '\n');
+}
+
+/*
+ * Report architecture specific information
+ */
+void task_arch_status(struct seq_file *m, struct task_struct *task)
+{
+   /*
+* Report AVX512 state if the processor and build option supported.
+*/
+   if (cpu_feature_enabled(X86_FEATURE_AVX512F))
+   avx512_status(m, task);
+}
+#endif /* CONFIG_PROC_PID_ARCH_STATUS */
-- 
2.17.1

[RFC PATCH 0/5] NUMA Balancer Suite

2019-04-21 Thread 王贇

We have NUMA Balancing feature which always trying to move pages
of a task to the node it executed more, while still got issues:

* page cache can't be handled
* no cgroup level balancing

Suppose we have a box with 4 cpu, two cgroup A & B each running 4 tasks,
below scenery could be easily observed:

NODE0   |   NODE1
|
CPU0CPU1|   CPU2CPU3
task_A0 task_A1 |   task_A2 task_A3
task_B0 task_B1 |   task_B2 task_B3

and usually with the equal memory consumption on each node, when tasks have
similar behavior.

In this case numa balancing try to move pages of task_A0,1 & task_B0,1 to node 
0,
pages of task_A2,3 & task_B2,3 to node 1, but page cache will be located 
randomly,
depends on the first read/write CPU location.

Let's suppose another scenery:

NODE0   |   NODE1
|
CPU0CPU1|   CPU2CPU3
task_A0 task_A1 |   task_B0 task_B1
task_A2 task_A3 |   task_B2 task_B3

By switching the cpu & memory resources of task_A0,1 and task_B0,1, now 
workloads
of cgroup A all on node 0, and cgroup B all on node 1, resource consumption are 
same
but related tasks could share a closer cpu cache, while cache still randomly 
located.

Now what if the workloads generate lot's of page cache, and most of the memory
accessing are page cache writing?

A page cache generated by task_A0 on NODE1 won't follow it to NODE0, but if 
task_A0
was already on NODE0 before it read/write files, caches will be there, so how to
make sure this happen?

Usually we could solve this problem by binding workloads on a single node, if 
the
cgroup A was binding to CPU0,1, then all the caches it generated will be on 
NODE0,
the numa bonus will be maximum.

However, this require a very well administration on specified workloads, 
suppose in our
cases if A & B are with a changing CPU requirement from 0% to 400%, then 
binding to a
single node would be a bad idea.

So what we need is a way to detect memory topology on cgroup level, and try to 
migrate
cpu/mem resources to the node with most of the caches there, as long as the 
resource
is plenty on that node.

This patch set introduced:
  * advanced per-cgroup numa statistic
  * numa preferred node feature
  * Numa Balancer module

Which helps to achieve an easy and flexible numa resource assignment, to gain 
numa bonus
as much as possible.

Michael Wang (5):
  numa: introduce per-cgroup numa balancing locality statistic
  numa: append per-node execution info in memory.numa_stat
  numa: introduce per-cgroup preferred numa node
  numa: introduce numa balancer infrastructure
  numa: numa balancer

 drivers/Makefile |   1 +
 drivers/numa/Makefile|   1 +
 drivers/numa/numa_balancer.c | 715 +++
 include/linux/memcontrol.h   |  99 ++
 include/linux/sched.h|   9 +-
 kernel/sched/debug.c |   8 +
 kernel/sched/fair.c  |  41 +++
 mm/huge_memory.c |   7 +-
 mm/memcontrol.c  | 246 +++
 mm/memory.c  |   9 +-
 mm/mempolicy.c   |   4 +
 11 files changed, 1133 insertions(+), 7 deletions(-)
 create mode 100644 drivers/numa/Makefile
 create mode 100644 drivers/numa/numa_balancer.c

-- 
2.14.4.44.g2045bb6

[RFC PATCH 1/5] numa: introduce per-cgroup numa balancing locality, statistic

2019-04-21 Thread 王贇

This patch introduced numa locality statistic, which try to imply
the numa balancing efficiency per memory cgroup.

By doing 'cat /sys/fs/cgroup/memory/CGROUP_PATH/memory.numa_stat', we
see new output line heading with 'locality', the format is:

  locality 0~9% 10%~19% 20%~29% 30%~39% 40%~49% 50%~59% 60%~69% 70%~79%
80%~89% 90%~100%

interval means that on a task's last numa balancing, the percentage
of accessing local pages, which we called numa balancing locality.

And the number means inside the cgroup, how many ticks we hit tasks with
such locality are running, for example:

  locality 7260278 54860 90493 209327 295801 462784 558897 667242
2786324 7399308

the 7260278 means that this cgroup have some tasks with 0~9% locality
executed 7260278 ticks.

By monitoring the increment, we can check if the workload of a particular
cgroup is doing well with numa, when most of the tasks are running with
locality 0~9%, then something is wrong with your numa policy.

Signed-off-by: Michael Wang 
---
 include/linux/memcontrol.h | 38 +++
 include/linux/sched.h  |  8 +++-
 kernel/sched/debug.c   |  7 +++
 kernel/sched/fair.c|  8 
 mm/huge_memory.c   |  4 +---
 mm/memcontrol.c| 50 ++
 mm/memory.c|  5 ++---
 7 files changed, 113 insertions(+), 7 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 534267947664..bb62e6294484 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -179,6 +179,27 @@ enum memcg_kmem_state {
KMEM_ONLINE,
 };

+#ifdef CONFIG_NUMA_BALANCING
+
+enum memcg_numa_locality_interval {
+   PERCENT_0_9,
+   PERCENT_10_19,
+   PERCENT_20_29,
+   PERCENT_30_39,
+   PERCENT_40_49,
+   PERCENT_50_59,
+   PERCENT_60_69,
+   PERCENT_70_79,
+   PERCENT_80_89,
+   PERCENT_90_100,
+   NR_NL_INTERVAL,
+};
+
+struct memcg_stat_numa {
+   u64 locality[NR_NL_INTERVAL];
+};
+
+#endif
 #if defined(CONFIG_SMP)
 struct memcg_padding {
char x[0];
@@ -311,6 +332,10 @@ struct mem_cgroup {
struct list_head event_list;
spinlock_t event_list_lock;

+#ifdef CONFIG_NUMA_BALANCING
+   struct memcg_stat_numa __percpu *stat_numa;
+#endif
+
struct mem_cgroup_per_node *nodeinfo[0];
/* WARNING: nodeinfo must be the last member here */
 };
@@ -818,6 +843,14 @@ static inline void memcg_memory_event_mm(struct mm_struct 
*mm,
 void mem_cgroup_split_huge_fixup(struct page *head);
 #endif

+#ifdef CONFIG_NUMA_BALANCING
+extern void memcg_stat_numa_update(struct task_struct *p);
+#else
+static inline void memcg_stat_numa_update(struct task_struct *p)
+{
+}
+#endif
+
 #else /* CONFIG_MEMCG */

 #define MEM_CGROUP_ID_SHIFT0
@@ -1156,6 +1189,11 @@ static inline
 void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
 {
 }
+
+static inline void memcg_stat_numa_update(struct task_struct *p)
+{
+}
+
 #endif /* CONFIG_MEMCG */

 /* idx can be of type enum memcg_stat_item or node_stat_item */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1a3c28d997d4..0b01262d110d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1049,8 +1049,14 @@ struct task_struct {
 * scan window were remote/local or failed to migrate. The task scan
 * period is adapted based on the locality of the faults with different
 * weights depending on whether they were shared or private faults
+*
+* 0 -- remote faults
+* 1 -- local faults
+* 2 -- page migration failure
+* 3 -- remote page accessing after page migration
+* 4 -- local page accessing after page migration
 */
-   unsigned long   numa_faults_locality[3];
+   unsigned long   numa_faults_locality[5];

unsigned long   numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 8039d62ae36e..2898f5fa4fba 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -873,6 +873,13 @@ static void sched_show_numa(struct task_struct *p, struct 
seq_file *m)
SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
task_node(p), task_numa_group_id(p));
show_numa_stats(p, m);
+   SEQ_printf(m, "faults_locality local=%lu remote=%lu failed=%lu ",
+   p->numa_faults_locality[1],
+   p->numa_faults_locality[0],
+   p->numa_faults_locality[2]);
+   SEQ_printf(m, "lhit=%lu rhit=%lu\n",
+   p->numa_faults_locality[4],
+   p->numa_faults_locality[3]);
mpol_put(pol);
 #endif
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fdab7eb6f351..ba5a67139d57 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -23,6 +23,7

[RFC PATCH 2/5] numa: append per-node execution info in memory.numa_stat

2019-04-21 Thread 王贇

This patch introduced numa execution information, to imply the numa
efficiency.

By doing 'cat /sys/fs/cgroup/memory/CGROUP_PATH/memory.numa_stat', we
see new output line heading with 'exectime', like:

  exectime 24399843 27865444

which means the tasks of this cgroup executed 24399843 ticks on node 0,
and 27865444 ticks on node 1.

Combined with the memory node info, we can estimate the numa efficiency,
for example the memory.numa_stat show:

  total=4613257 N0=6849 N1=3928327
  ...
  exectime 24399843 27865444

there could be unmovable or cache pages on N1, then good locality could
mean nothing since we are not tracing these type of pages, thus bind the
workloads on the cpus of N1 worth a try, in order to achieve the maximum
performance bonus.

Signed-off-by: Michael Wang 
---
 include/linux/memcontrol.h |  1 +
 mm/memcontrol.c| 13 +
 2 files changed, 14 insertions(+)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bb62e6294484..e784d6252d5e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -197,6 +197,7 @@ enum memcg_numa_locality_interval {

 struct memcg_stat_numa {
u64 locality[NR_NL_INTERVAL];
+   u64 exectime;
 };

 #endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b810d4e9c906..91bcd71fc38a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3409,6 +3409,18 @@ static int memcg_numa_stat_show(struct seq_file *m, void 
*v)
seq_printf(m, " %llu", sum);
}
seq_putc(m, '\n');
+
+   seq_puts(m, "exectime");
+   for_each_online_node(nr) {
+   int cpu;
+   u64 sum = 0;
+
+   for_each_cpu(cpu, cpumask_of_node(nr))
+   sum += per_cpu(memcg->stat_numa->exectime, cpu);
+
+   seq_printf(m, " %llu", sum);
+   }
+   seq_putc(m, '\n');
 #endif

return 0;
@@ -3437,6 +3449,7 @@ void memcg_stat_numa_update(struct task_struct *p)
memcg = mem_cgroup_from_task(p);
if (idx != -1)
this_cpu_inc(memcg->stat_numa->locality[idx]);
+   this_cpu_inc(memcg->stat_numa->exectime);
rcu_read_unlock();
 }
 #endif
-- 
2.14.4.44.g2045bb6

[RFC PATCH 3/5] numa: introduce per-cgroup preferred numa node

2019-04-21 Thread 王贇

This patch add a new entry 'numa_preferred' for each memory cgroup,
by which we can now override the memory policy of the tasks inside
a particular cgroup, combined with numa balancing, we now be able to
migrate the workloads of a cgroup to the specified numa node, in gentle
way.

The load balancing and numa prefer against each other on CPU locations,
which lead into the situation that although a particular node is capable
enough to hold all the workloads, tasks will still spread.

In order to acquire the numa benifit in this situation,  load balancing
should respect the prefer decision as long as the balancing won't be
broken.

This patch try to forbid workloads leave memcg preferred node, when
and only when numa preferred node configured, in case if load balancing
can't find other tasks to move and keep failing, we will then giveup
and allow the migration to happen.

Signed-off-by: Michael Wang 
---
 include/linux/memcontrol.h | 34 +++
 include/linux/sched.h  |  1 +
 kernel/sched/debug.c   |  1 +
 kernel/sched/fair.c| 33 +++
 mm/huge_memory.c   |  3 ++
 mm/memcontrol.c| 82 ++
 mm/memory.c|  4 +++
 mm/mempolicy.c |  4 +++
 8 files changed, 162 insertions(+)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e784d6252d5e..0fd5eeb27c4f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -335,6 +335,8 @@ struct mem_cgroup {

 #ifdef CONFIG_NUMA_BALANCING
struct memcg_stat_numa __percpu *stat_numa;
+   s64 numa_preferred;
+   struct mutex numa_mutex;
 #endif

struct mem_cgroup_per_node *nodeinfo[0];
@@ -846,10 +848,26 @@ void mem_cgroup_split_huge_fixup(struct page *head);

 #ifdef CONFIG_NUMA_BALANCING
 extern void memcg_stat_numa_update(struct task_struct *p);
+extern int memcg_migrate_prep(int target_nid, int page_nid);
+extern int memcg_preferred_nid(struct task_struct *p, gfp_t gfp);
+extern struct page *alloc_page_numa_preferred(gfp_t gfp, unsigned int order);
 #else
 static inline void memcg_stat_numa_update(struct task_struct *p)
 {
 }
+static inline int memcg_migrate_prep(int target_nid, int page_nid)
+{
+   return target_nid;
+}
+static inline int memcg_preferred_nid(struct task_struct *p, gfp_t gfp)
+{
+   return -1;
+}
+static inline struct page *alloc_page_numa_preferred(gfp_t gfp,
+unsigned int order)
+{
+   return NULL;
+}
 #endif

 #else /* CONFIG_MEMCG */
@@ -1195,6 +1213,22 @@ static inline void memcg_stat_numa_update(struct 
task_struct *p)
 {
 }

+static inline int memcg_migrate_prep(int target_nid, int page_nid)
+{
+   return target_nid;
+}
+
+static inline int memcg_preferred_nid(struct task_struct *p, gfp_t gfp)
+{
+   return -1;
+}
+
+static inline struct page *alloc_page_numa_preferred(gfp_t gfp,
+unsigned int order)
+{
+   return NULL;
+}
+
 #endif /* CONFIG_MEMCG */

 /* idx can be of type enum memcg_stat_item or node_stat_item */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0b01262d110d..9f931db1d31f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -422,6 +422,7 @@ struct sched_statistics {
u64 nr_migrations_cold;
u64 nr_failed_migrations_affine;
u64 nr_failed_migrations_running;
+   u64 nr_failed_migrations_memcg;
u64 nr_failed_migrations_hot;
u64 nr_forced_migrations;

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 2898f5fa4fba..32f5fd66f0fe 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -934,6 +934,7 @@ void proc_sched_show_task(struct task_struct *p, struct 
pid_namespace *ns,
P_SCHEDSTAT(se.statistics.nr_migrations_cold);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
+   P_SCHEDSTAT(se.statistics.nr_failed_migrations_memcg);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
P_SCHEDSTAT(se.statistics.nr_forced_migrations);
P_SCHEDSTAT(se.statistics.nr_wakeups);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ba5a67139d57..5d0758e78b96 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6701,6 +6701,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, 
int sd_flag, int wake_f
new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
} else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
/* Fast path */
+   int pnid = memcg_preferred_nid(p, 0);
+
+   if (pnid != NUMA_NO_NODE && pnid !=

[RFC PATCH 4/5] numa: introduce numa balancer infrastructure

2019-04-21 Thread 王贇

Now we have the way to estimate and adjust numa preferred node for each
memcg, next problem is how to use them.

Usually one will bind workloads with cpuset.cpus, combined with cpuset.mems
or maybe better the memory policy to achieve numa bonus, however in complicated
scenery like combined type of workloads or cpushare way of isolation, this
kind of administration could make one crazy, what we need is a way to gain
numa bonus automatically, maybe not maximum but as much as possible.

This patch introduced basic API for kernel module to do numa adjustment,
later coming the numa balancer module to use them and try to gain numa bonus
as much as possible, automatically.

API including:
  * numa preferred control
  * memcg callback hook
  * memcg per-node page number acquire

Signed-off-by: Michael Wang 
---
 include/linux/memcontrol.h |  26 
 mm/memcontrol.c| 101 +
 2 files changed, 127 insertions(+)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0fd5eeb27c4f..7456b862d5a9 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -200,6 +200,11 @@ struct memcg_stat_numa {
u64 exectime;
 };

+struct memcg_callback {
+   void (*init)(struct mem_cgroup *memcg);
+   void (*exit)(struct mem_cgroup *memcg);
+};
+
 #endif
 #if defined(CONFIG_SMP)
 struct memcg_padding {
@@ -337,6 +342,8 @@ struct mem_cgroup {
struct memcg_stat_numa __percpu *stat_numa;
s64 numa_preferred;
struct mutex numa_mutex;
+   void *numa_private;
+   struct list_head numa_list;
 #endif

struct mem_cgroup_per_node *nodeinfo[0];
@@ -851,6 +858,10 @@ extern void memcg_stat_numa_update(struct task_struct *p);
 extern int memcg_migrate_prep(int target_nid, int page_nid);
 extern int memcg_preferred_nid(struct task_struct *p, gfp_t gfp);
 extern struct page *alloc_page_numa_preferred(gfp_t gfp, unsigned int order);
+extern int register_memcg_callback(void *cb);
+extern int unregister_memcg_callback(void *cb);
+extern void config_numa_preferred(struct mem_cgroup *memcg, int nid);
+extern u64 memcg_numa_pages(struct mem_cgroup *memcg, int nid, u32 mask);
 #else
 static inline void memcg_stat_numa_update(struct task_struct *p)
 {
@@ -868,6 +879,21 @@ static inline struct page *alloc_page_numa_preferred(gfp_t 
gfp,
 {
return NULL;
 }
+static inline int register_memcg_callback(void *cb)
+{
+   return -EINVAL;
+}
+static inline int unregister_memcg_callback(void *cb)
+{
+   return -EINVAL;
+}
+static inline void config_numa_preferred(struct mem_cgroup *memcg, int nid)
+{
+}
+static inline u64 memcg_numa_pages(struct mem_cgroup *memcg, int nid, u32 mask)
+{
+   return 0;
+}
 #endif

 #else /* CONFIG_MEMCG */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f1cb1e726430..dc232ecc904f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3525,6 +3525,102 @@ struct page *alloc_page_numa_preferred(gfp_t gfp, 
unsigned int order)
return __alloc_pages_node(pnid, gfp, order);
 }

+static struct memcg_callback *memcg_cb;
+
+static LIST_HEAD(memcg_cb_list);
+static DEFINE_MUTEX(memcg_cb_mutex);
+
+int register_memcg_callback(void *cb)
+{
+   int ret = 0;
+
+   mutex_lock(_cb_mutex);
+   if (memcg_cb || !cb) {
+   ret = -EINVAL;
+   goto out;
+   }
+
+   memcg_cb = (struct memcg_callback *)cb;
+   if (memcg_cb->init) {
+   struct mem_cgroup *memcg;
+
+   list_for_each_entry(memcg, _cb_list, numa_list)
+   memcg_cb->init(memcg);
+   }
+
+out:
+   mutex_unlock(_cb_mutex);
+   return ret;
+}
+EXPORT_SYMBOL(register_memcg_callback);
+
+int unregister_memcg_callback(void *cb)
+{
+   int ret = 0;
+
+   mutex_lock(_cb_mutex);
+   if (!memcg_cb || memcg_cb != cb) {
+   ret = -EINVAL;
+   goto out;
+   }
+
+   if (memcg_cb->exit) {
+   struct mem_cgroup *memcg;
+
+   list_for_each_entry(memcg, _cb_list, numa_list)
+   memcg_cb->exit(memcg);
+   }
+   memcg_cb = NULL;
+
+out:
+   mutex_unlock(_cb_mutex);
+   return ret;
+}
+EXPORT_SYMBOL(unregister_memcg_callback);
+
+void config_numa_preferred(struct mem_cgroup *memcg, int nid)
+{
+   mutex_lock(>numa_mutex);
+   memcg->numa_preferred = nid;
+   mutex_unlock(>numa_mutex);
+}
+EXPORT_SYMBOL(config_numa_preferred);
+
+u64 memcg_numa_pages(struct mem_cgroup *memcg, int nid, u32 mask)
+{
+   if (nid == NUMA_NO_NODE)
+   return mem_cgroup_nr_lru_pages(memcg, mask);
+   else
+   return mem_cgroup_node_nr_lru_pages(memcg, nid, mask);
+}
+EXPORT_SYMBOL(memcg_numa_pages);
+
+static void memcg_online_callback(struct mem_cgroup *memcg)
+{
+   mutex_lock(_cb_mutex);
+   list_add_tail(>numa_list, _cb_list);
+   if (memcg_cb && memcg_cb->init)
+

[RFC PATCH 5/5] numa: numa balancer

2019-04-21 Thread 王贇

numa balancer is a module which will try to automatically adjust numa
balancing stuff to gain numa bonus as much as possible.

For each memory cgroup, we process the work in two steps:

On stage 1 we check cgroup's exectime and memory topology to see
if there could be a candidate for settled down, if we got one then
move onto stage 2.

On stage 2 we try to settle down as much as possible by prefer the
candidate node, if the node no longer suitable or locality keep
downturn, we reset things and new round begin.

Decision made with find_candidate_nid(), should_prefer() and keep_prefer(),
which try to pick a candidate node, see if allowed to prefer it and if
keep doing the prefer.

Tested on the box with 96 cpus with sysbench-mysql-oltp_read_write
testing, 4 mysqld instances created and attached to 4 cgroups, 4
sysbench instances then created and attached to corresponding cgroup
to test the mysql with oltp_read_write script, average eps show:

origin  balancer
4 instances each 12 threads 5241.08 5375.59 +2.50%
4 instances each 24 threads 7497.29 7820.73 +4.13%
4 instances each 36 threads 8985.44 9317.04 +3.55%
4 instances each 48 threads 9716.50 9982.60 +2.66%

Other benchmark liks dbench, pgbench, perf bench numa also tested, and
with different parameters and number of instances/threads, most of
the cases show bonus, some show acceptable regression, and some got no
changes.

TODO:
  * improve the logical to address the regression cases
  * Find a way, maybe, to handle the page cache left on remote
  * find more scenery which could gain benefit

Signed-off-by: Michael Wang 
---
 drivers/Makefile |   1 +
 drivers/numa/Makefile|   1 +
 drivers/numa/numa_balancer.c | 715 +++
 3 files changed, 717 insertions(+)
 create mode 100644 drivers/numa/Makefile
 create mode 100644 drivers/numa/numa_balancer.c

diff --git a/drivers/Makefile b/drivers/Makefile
index c61cde554340..f07936b03870 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -187,3 +187,4 @@ obj-$(CONFIG_UNISYS_VISORBUS)   += visorbus/
 obj-$(CONFIG_SIOX) += siox/
 obj-$(CONFIG_GNSS) += gnss/
 obj-$(CONFIG_INTERCONNECT) += interconnect/
+obj-$(CONFIG_NUMA_BALANCING)   += numa/
diff --git a/drivers/numa/Makefile b/drivers/numa/Makefile
new file mode 100644
index ..acf8a408
--- /dev/null
+++ b/drivers/numa/Makefile
@@ -0,0 +1 @@
+obj-m  += numa_balancer.o
diff --git a/drivers/numa/numa_balancer.c b/drivers/numa/numa_balancer.c
new file mode 100644
index ..25bbe08c82a2
--- /dev/null
+++ b/drivers/numa/numa_balancer.c
@@ -0,0 +1,715 @@
+/*
+ * NUMA Balancer
+ *
+ *  Copyright (C) 2019 Alibaba Group Holding Limited.
+ *  Author: Michael Wang 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static unsigned int debug_level;
+module_param(debug_level, uint, 0644);
+MODULE_PARM_DESC(debug_level, "1 to print decisions, 2 to print both decisions 
and node info");
+
+static int prefer_level = 10;
+module_param(prefer_level, int, 0644);
+MODULE_PARM_DESC(prefer_level, "stop numa prefer when reach this much 
continuous downturn, 0 means no prefer");
+
+static unsigned int locality_level = PERCENT_70_79;
+module_param(locality_level, uint, 0644);
+MODULE_PARM_DESC(locality_level, "consider locality as good when above this 
sector");
+
+static unsigned long period_max = (600 * HZ);
+module_param(period_max, ulong, 0644);
+MODULE_PARM_DESC(period_max, "maximum period between each stage");
+
+static unsigned long period_min = (5 * HZ);
+module_param(period_min, ulong, 0644);
+MODULE_PARM_DESC(period_min, "minimum period between each stage");
+
+static unsigned int cpu_high_wmark = 100;
+module_param(cpu_high_wmark, uint, 0644);
+MODULE_PARM_DESC(cpu_high_wmark, "respect the execution percent rather than 
memory percent when above this cpu usage");
+
+static unsigned int cpu_low_wmark = 10;
+module_param(cpu_low_wmark, uint, 0644);
+MODULE_PARM_DESC(cpu_low_wmark, "consider cgroup as active when above this cpu 
usage");
+
+static unsigned int free_low_wmark = 10;
+module_param(free_low_wmark, uint, 0644);
+MODULE_PARM_DESC(free_low_wmark, "consider node as consumed out when below 
this free percent");
+
+static unsigned int candidate_wmark = 60;
+module_param(candidate_wmark, uint, 0644);
+MODULE_PARM_DESC(candidate_wmark, "consider node as candidate when above this 
execution time or memory percent");
+
+static unsigned int settled_wmark = 90;
+module_param(settled_wmark, uint, 0644);
+MODULE_PARM_DESC(settled_wmark, "consider cgroup settle down on node when 
above this execution time and memory percent,

Re: [PATCH 2/2] soc: sprd: Add Spreadtrum multi-channel data transfer support

2019-04-21 Thread Baolin Wang

Hi Mark,

On Fri, 19 Apr 2019 at 22:50, Mark Brown  wrote:
>
> On Fri, Apr 19, 2019 at 06:54:32PM +0800, Baolin Wang wrote:
> > On Spreadtrum platform, the audio subsystem will use the multi-channel
> > data transfer controller to transfer sound stream between audio subsystem
> > and other AP/CP subsystem.
> >
> > It can support 10 DAC channel and 10 ADC channel, and each channel has
> > 512 bytes depth data fifo. Moreover each channel can be used DMA mode
> > or interrupt mode to transfer data.
>
> Acked-by: Mark Brown 

Thanks for your reviewing.

>
> However if there's no non-audio users of this DSP then it's probably
> better to just move the driver into sound/soc.

Yes, only audio will use this driver. OK, I will move it into
sound/soc if no other objections. Thanks.

-- 
Baolin Wang
Best Regards

RE: [PATCH] signal: trace_signal_deliver when signal_group_exit

2019-04-21 Thread weizhenliang

On 04/20, Oleg Nesterov wrote:
>On 04/20, Zhenliang Wei wrote:
>>
>> --- a/kernel/signal.c
>> +++ b/kernel/signal.c
>> @@ -2441,6 +2441,8 @@ bool get_signal(struct ksignal *ksig)
>>  if (signal_group_exit(signal)) {
>>  ksig->info.si_signo = signr = SIGKILL;
>>  sigdelset(>pending.signal, SIGKILL);
>> +trace_signal_deliver(signr, >info,
>> +>action[signr - 1]);
>
>Well, in this case ksig->info is not fully initialized for TP_STORE_SIGINFO() 
>which reads si_errno/si_code...
>
>How about
>
>   trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO, SIG_DFL)
>
>?
>
>We know that action[SIGKILL] must be SIG_DFL.
>
>And SEND_SIG_NOINFO matches the fact that SIGKILL doesn't have any info,
>collect_signal() sets SI_USER and clears si_errno in this case.
>
>Oleg.

Thank you for your review, I agree with your suggestion, and I will recommit 
the patch later.

Zhenliang Wei.

Re: [PATCH] arm64: dts: lx2160a: add cpu idle support

2019-04-21 Thread Shawn Guo

On Thu, Apr 18, 2019 at 03:42:25AM +, Ran Wang wrote:
> lx2160a supports pw20 which could help save more power during cpu is
> dile. It needs system firmware support via PSCI.
> 
> Signed-off-by: Ran Wang 

Applied, thanks.

Re: [PATCH 1/2] arm64: dts: imx8mm: Add SAI nodes

2019-04-21 Thread Shawn Guo

On Fri, Apr 19, 2019 at 08:20:39PM +, Daniel Baluta wrote:
> i.MX8MM has 5 SAI instances with the following base
> addresses according to RM.
> 
> SAI1 base address: 3001_h
> SAI2 base address: 3002_h
> SAI3 base address: 3003_h
> SAI5 base address: 3005_h
> SAI6 base address: 3006_h
> 
> Signed-off-by: Bai Ping 
> Signed-off-by: Daniel Baluta 
> ---
>  arch/arm64/boot/dts/freescale/imx8mm.dtsi | 68 +++
>  1 file changed, 68 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi 
> b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
> index de3498c2dd44..e9a0b2b6063a 100644
> --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi
> +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
> @@ -171,6 +171,74 @@
>   #size-cells = <1>;
>   ranges;
>  
> + sai1: sai@3001 {
> + compatible = "fsl,imx8mq-sai",

Should be "fsl,imx8mm-sai"?

Shawn

> +  "fsl,imx6sx-sai";
> + reg = <0x3001 0x1>;
> + interrupts = ;
> + clocks = < IMX8MM_CLK_SAI1_IPG>,
> +  < IMX8MM_CLK_SAI1_ROOT>,
> +  < IMX8MM_CLK_DUMMY>, < 
> IMX8MM_CLK_DUMMY>;
> + clock-names = "bus", "mclk1", "mclk2", "mclk3";
> + dmas = < 0 2 0>, < 1 2 0>;
> + dma-names = "rx", "tx";
> + status = "disabled";
> + };
> +
> + sai2: sai@3002 {
> + compatible = "fsl,imx8mq-sai",
> +  "fsl,imx6sx-sai";
> + reg = <0x3002 0x1>;
> + interrupts = ;
> + clocks = < IMX8MM_CLK_SAI2_IPG>,
> + < IMX8MM_CLK_SAI2_ROOT>,
> + < IMX8MM_CLK_DUMMY>, < 
> IMX8MM_CLK_DUMMY>;
> + clock-names = "bus", "mclk1", "mclk2", "mclk3";
> + dmas = < 2 2 0>, < 3 2 0>;
> + dma-names = "rx", "tx";
> + status = "disabled";
> + };
> +
> + sai3: sai@3003 {
> + #sound-dai-cells = <0>;
> + compatible = "fsl,imx8mm-sai", 
> "fsl,imx8mq-sai", "fsl,imx6sx-sai";
> + reg = <0x3003 0x1>;
> + interrupts = ;
> + clocks = < IMX8MM_CLK_SAI3_IPG>,
> +  < IMX8MM_CLK_SAI3_ROOT>,
> +  < IMX8MM_CLK_DUMMY>, < 
> IMX8MM_CLK_DUMMY>;
> + clock-names = "bus", "mclk1", "mclk2", "mclk3";
> + dmas = < 4 2 0>, < 5 2 0>;
> + dma-names = "rx", "tx";
> + status = "disabled";
> + };
> +
> + sai5: sai@3005 {
> + compatible = "fsl,imx8mq-sai", "fsl,imx6sx-sai";
> + reg = <0x3005 0x1>;
> + interrupts = ;
> + clocks = < IMX8MM_CLK_SAI5_IPG>,
> +  < IMX8MM_CLK_SAI5_ROOT>,
> +  < IMX8MM_CLK_DUMMY>, < 
> IMX8MM_CLK_DUMMY>;
> + clock-names = "bus", "mclk1", "mclk2", "mclk3";
> + dmas = < 8 2 0>, < 9 2 0>;
> + dma-names = "rx", "tx";
> + status = "disabled";
> + };
> +
> + sai6: sai@3006 {
> + compatible = "fsl,imx8mq-sai", "fsl,imx6sx-sai";
> + reg = <0x3006 0x1>;
> + interrupts = ;
> + clocks = < IMX8MM_CLK_SAI6_IPG>,
> +  < IMX8MM_CLK_SAI6_ROOT>,
> +  < IMX8MM_CLK_DUMMY>, < 
> IMX8MM_CLK_DUMMY>;
> + clock-names = "bus", "mclk1", "mclk2", "mclk3";
> + dmas = < 10 2 0>, < 11 2 0>;
> + dma-names = "rx", "tx";
> + status = "disabled";
> + };
> +
>   gpio1: gpio@3020 {
>   compatible = "fsl,imx8mm-gpio", 
> "fsl,imx35-gpio";
>   reg = <0x3020 0x1>;
> -- 
> 2.17.1
>

[PATCH] asm/irq_vector.h: fix outdated comments

2019-04-21 Thread Jiang Biao

INVALIDATE_TLB_VECTOR_START has been removed by commit,
52aec3308db8("x86/tlb: replace INVALIDATE_TLB_VECTOR by
CALL_FUNCTION_VECTOR")
And VSYSCALL_EMU_VECTO(204) has also been remove by commit,
3ae36655b97a("x86-64: Rework vsyscall emulation and add vsyscall=
parameter")
but the comments here are outdated, update them.

Signed-off-by: Jiang Biao 
---
 arch/x86/include/asm/irq_vectors.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/irq_vectors.h 
b/arch/x86/include/asm/irq_vectors.h
index 548d90bbf919..889f8b1b5b7f 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -18,8 +18,8 @@
  *  Vectors   0 ...  31 : system traps and exceptions - hardcoded events
  *  Vectors  32 ... 127 : device interrupts
  *  Vector  128 : legacy int80 syscall interface
- *  Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device 
interrupts
- *  Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
+ *  Vectors 129 ... LOCAL_TIMER_VECTOR-1
+ *  Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
  *
  * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
  *
-- 
2.17.2 (Apple Git-113)

Re: [PATCH 2/2] X86/kdump: fall back to reserve high crashkernel memory

2019-04-21 Thread Dave Young

On 04/21/19 at 08:26pm, Ingo Molnar wrote:
> 
> * Dave Young  wrote:
> 
> > crashkernel=xM tries to reserve crashkernel memory under 4G, which
> > is enough for usual cases.  But this could fail sometimes, for example
> > one tries to reserve a big chunk like 2G, it is possible to fail.
> > 
> > So let the crashkernel=xM just fall back to use high memory in case it
> > fails to find a suitable low range.  Do not set the ,high as default
> > because it allocs extra low memory for DMA buffers and swiotlb, this is
> > not always necessary for all machines. Typically like crashkernel=128M
> > usually work with low reservation under 4G, so still keep <4G as default.
> > 
> > Signed-off-by: Dave Young 
> > ---
> >  Documentation/admin-guide/kernel-parameters.txt |7 +--
> >  arch/x86/kernel/setup.c |   22 
> > ++
> >  2 files changed, 19 insertions(+), 10 deletions(-)
> > 
> > --- linux-x86.orig/arch/x86/kernel/setup.c
> > +++ linux-x86/arch/x86/kernel/setup.c
> > @@ -541,21 +541,27 @@ static void __init reserve_crashkernel(v
> > }
> >  
> > /* 0 means: find the address automatically */
> > -   if (crash_base <= 0) {
> > +   if (!crash_base) {
> > /*
> >  * Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
> > -* as old kexec-tools loads bzImage below that, unless
> > -* "crashkernel=size[KMG],high" is specified.
> > +* as crashkernel=x,high allocs memory over 4G, also allocs
> 
> s/allocs
>  /allocates
> 
> > +* 256M extra low memory for DMA buffers and swiotlb.
> > +* but the extra memory is not required for all machines.
> > +* So prefer low memory first, and fallback to high memory
> 
> s/fallback
>  /fall back
> 
> > +* unless "crashkernel=size[KMG],high" is specified.
> >  */
> > -   crash_base = memblock_find_in_range(CRASH_ALIGN,
> > -   high ? CRASH_ADDR_HIGH_MAX
> > -: CRASH_ADDR_LOW_MAX,
> > -   crash_size, CRASH_ALIGN);
> > +   if (!high)
> > +   crash_base = memblock_find_in_range(CRASH_ALIGN,
> > +   CRASH_ADDR_LOW_MAX,
> > +   crash_size, CRASH_ALIGN);
> > +   if (!crash_base)
> > +   crash_base = memblock_find_in_range(CRASH_ALIGN,
> > +   CRASH_ADDR_HIGH_MAX,
> > +   crash_size, CRASH_ALIGN);
> > if (!crash_base) {
> > pr_info("crashkernel reservation failed - No suitable 
> > area found.\n");
> > return;
> > }
> > -
> > } else {
> > unsigned long long start;
> >  
> > --- linux-x86.orig/Documentation/admin-guide/kernel-parameters.txt
> > +++ linux-x86/Documentation/admin-guide/kernel-parameters.txt
> > @@ -704,8 +704,11 @@
> > upon panic. This parameter reserves the physical
> > memory region [offset, offset + size] for that kernel
> > image. If '@offset' is omitted, then a suitable offset
> > -   is selected automatically. Check
> > -   Documentation/kdump/kdump.txt for further details.
> > +   is selected automatically.
> > +   [KNL, x86_64] select a region under 4G first, and
> > +   fallback to reserve region above 4G in case without
> 
> s/fallback
>  /fall back
> 
> > +   '@offset'.
> > +   See Documentation/kdump/kdump.txt for further details.
> >  
> > crashkernel=range1:size1[,range2:size2,...][@offset]
> > [KNL] Same as above, but depends on the memory
> 
> With the nits fixed:
> 
> Reviewed-by: Ingo Molnar 

Thanks for review, will reply to 2/2 with an update of those spelling
issues.

Dave

[PATCH 0/3] Apple iBridge support

2019-04-21 Thread Ronald Tschalär

2016 and 2017 MacBook Pro's have a T1 chip that drives the Touch Bar,
ambient light sensor, webcam, and fingerprint sensor; this shows up
as an iBridge USB device in the system. These patches provide initial
support for the Touch Bar and ALS - the webcam is already handled by
existing drivers, and no information is currently known on how to access
the fingerprint sensor (other than it's apparently via one of the extra
interfaces available in the OS X USB configuration).

One thing of note here is that both the ALS and (some of) the Touch Bar
functionality are exposed via the same USB interface (and hence same
hid_device), so both drivers need to share this device. This
necessitated creating a demux hid driver in the mfd driver to which
multiple hid devices can be attached, and implied not being able to make
use of the existing hid-sensor-als driver.

Ronald Tschalär (3):
  mfd: apple-ibridge: Add Apple iBridge MFD driver.
  HID: apple-ib-tb: Add driver for the Touch Bar on MacBook Pro's.
  iio: light: apple-ib-als: Add driver for ALS on iBridge chip.

 drivers/hid/Kconfig   |   10 +
 drivers/hid/Makefile  |1 +
 drivers/hid/apple-ib-tb.c | 1288 +
 drivers/iio/light/Kconfig |   12 +
 drivers/iio/light/Makefile|1 +
 drivers/iio/light/apple-ib-als.c  |  694 
 drivers/mfd/Kconfig   |   15 +
 drivers/mfd/Makefile  |1 +
 drivers/mfd/apple-ibridge.c   |  883 
 include/linux/mfd/apple-ibridge.h |   39 +
 10 files changed, 2944 insertions(+)
 create mode 100644 drivers/hid/apple-ib-tb.c
 create mode 100644 drivers/iio/light/apple-ib-als.c
 create mode 100644 drivers/mfd/apple-ibridge.c
 create mode 100644 include/linux/mfd/apple-ibridge.h

-- 
2.20.1

[PATCH 2/3] HID: apple-ib-tb: Add driver for the Touch Bar on MacBook Pro's.

2019-04-21 Thread Ronald Tschalär

This driver enables basic touch bar functionality: enabling it, switching
between modes on FN key press, and dimming and turning the display
off/on when idle/active.

Signed-off-by: Ronald Tschalär 
---
 drivers/hid/Kconfig   |   10 +
 drivers/hid/Makefile  |1 +
 drivers/hid/apple-ib-tb.c | 1288 +
 3 files changed, 1299 insertions(+)
 create mode 100644 drivers/hid/apple-ib-tb.c

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 41e9935fc584..f0a65bb4be6e 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -135,6 +135,16 @@ config HID_APPLE
Say Y here if you want support for keyboards of Apple iBooks, 
PowerBooks,
MacBooks, MacBook Pros and Apple Aluminum.
 
+config HID_APPLE_IBRIDGE_TB
+   tristate "Apple iBridge Touch Bar"
+   depends on MFD_APPLE_IBRIDGE
+   ---help---
+   Say Y here if you want support for the Touch Bar on recent
+   MacBook Pros.
+
+   To compile this driver as a module, choose M here: the
+   module will be called apple-ib-tb.
+
 config HID_APPLEIR
tristate "Apple infrared receiver"
depends on (USB_HID)
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index 896a51ce7ce0..dedd8049d3fb 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_HID_ACCUTOUCH)   += hid-accutouch.o
 obj-$(CONFIG_HID_ALPS) += hid-alps.o
 obj-$(CONFIG_HID_ACRUX)+= hid-axff.o
 obj-$(CONFIG_HID_APPLE)+= hid-apple.o
+obj-$(CONFIG_HID_APPLE_IBRIDGE_TB) += apple-ib-tb.o
 obj-$(CONFIG_HID_APPLEIR)  += hid-appleir.o
 obj-$(CONFIG_HID_ASUS) += hid-asus.o
 obj-$(CONFIG_HID_AUREAL)   += hid-aureal.o
diff --git a/drivers/hid/apple-ib-tb.c b/drivers/hid/apple-ib-tb.c
new file mode 100644
index ..6b72ff56b17f
--- /dev/null
+++ b/drivers/hid/apple-ib-tb.c
@@ -0,0 +1,1288 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Apple Touch Bar Driver
+ *
+ * Copyright (c) 2017-2018 Ronald Tschalär
+ */
+
+/*
+ * Recent MacBookPro models (13,[23] and 14,[23]) have a touch bar, which
+ * is exposed via several USB interfaces. MacOS supports a fancy mode
+ * where arbitrary buttons can be defined; this driver currently only
+ * supports the simple mode that consists of 3 predefined layouts
+ * (escape-only, esc + special keys, and esc + function keys).
+ *
+ * The first USB HID interface supports two reports, an input report that
+ * is used to report the key presses, and an output report which can be
+ * used to set the touch bar "mode": touch bar off (in which case no touches
+ * are reported at all), escape key only, escape + 12 function keys, and
+ * escape + several special keys (including brightness, audio volume,
+ * etc). The second interface supports several, complex reports, most of
+ * which are unknown at this time, but one of which has been determined to
+ * allow for controlling of the touch bar's brightness: off (though touches
+ * are still reported), dimmed, and full brightness. This driver makes
+ * use of these two reports.
+ */
+
+#define dev_fmt(fmt) "tb: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define HID_UP_APPLE   0xff12
+#define HID_USAGE_MODE (HID_UP_CUSTOM | 0x0004)
+#define HID_USAGE_APPLE_APP(HID_UP_APPLE  | 0x0001)
+#define HID_USAGE_DISP (HID_UP_APPLE  | 0x0021)
+
+#define APPLETB_MAX_TB_KEYS13  /* ESC, F1-F12 */
+
+#define APPLETB_CMD_MODE_ESC   0
+#define APPLETB_CMD_MODE_FN1
+#define APPLETB_CMD_MODE_SPCL  2
+#define APPLETB_CMD_MODE_OFF   3
+#define APPLETB_CMD_MODE_NONE  255
+
+#define APPLETB_CMD_DISP_ON1
+#define APPLETB_CMD_DISP_DIM   2
+#define APPLETB_CMD_DISP_OFF   4
+#define APPLETB_CMD_DISP_NONE  255
+
+#define APPLETB_FN_MODE_FKEYS  0
+#define APPLETB_FN_MODE_NORM   1
+#define APPLETB_FN_MODE_INV2
+#define APPLETB_FN_MODE_SPCL   3
+#define APPLETB_FN_MODE_MAXAPPLETB_FN_MODE_SPCL
+
+#define APPLETB_DEVID_KEYBOARD 1
+#define APPLETB_DEVID_TOUCHPAD 2
+
+#define APPLETB_MAX_DIM_TIME   30
+
+static int appletb_tb_def_idle_timeout = 5 * 60;
+module_param_named(idle_timeout, appletb_tb_def_idle_timeout, int, 0444);
+MODULE_PARM_DESC(idle_timeout, "Default touch bar idle timeout (in seconds); 0 
disables touch bar, -1 disables timeout");
+
+static int appletb_tb_def_dim_timeout = -2;
+module_param_named(dim_timeout, appletb_tb_def_dim_timeout, int, 0444);
+MODULE_PARM_DESC(dim_timeout, "Default touch bar dim timeout (in seconds); 0 
means always dimmmed, -1 disables dimming, [-2] calculates timeout based on 
idle-timeout");
+
+static int appletb_tb_def_fn_mode = APPLETB_FN_MODE_NORM;
+module_param_named(fnmode, appletb_tb_def_fn_mode, int, 0444);
+MODULE_PARM_DESC(fnmode, "Default Fn key mode: 0 = f-keys only, [1] = fn key 
switches from special to f-keys, 2 = inverse of 1, 3 =

[PATCH 1/3] mfd: apple-ibridge: Add Apple iBridge MFD driver.

2019-04-21 Thread Ronald Tschalär

The iBridge device provides access to several devices, including:
- the Touch Bar
- the iSight webcam
- the light sensor
- the fingerprint sensor

This driver provides the core support for managing the iBridge device
and the access to the underlying devices. In particular, since the
functionality for the touch bar and light sensor is exposed via USB HID
interfaces, and the same HID device is used for multiple functions, this
driver provides a multiplexing layer that allows multiple HID drivers to
be registered for a given HID device. This allows the touch bar and ALS
driver to be separated out into their own modules.

Signed-off-by: Ronald Tschalär 
---
 drivers/mfd/Kconfig   |  15 +
 drivers/mfd/Makefile  |   1 +
 drivers/mfd/apple-ibridge.c   | 883 ++
 include/linux/mfd/apple-ibridge.h |  39 ++
 4 files changed, 938 insertions(+)
 create mode 100644 drivers/mfd/apple-ibridge.c
 create mode 100644 include/linux/mfd/apple-ibridge.h

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 76f9909cf396..d55fa77faacf 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1916,5 +1916,20 @@ config RAVE_SP_CORE
  Select this to get support for the Supervisory Processor
  device found on several devices in RAVE line of hardware.
 
+config MFD_APPLE_IBRIDGE
+   tristate "Apple iBridge chip"
+   depends on ACPI
+   depends on USB_HID
+   depends on X86 || COMPILE_TEST
+   select MFD_CORE
+   help
+ This MFD provides the core support for the Apple iBridge chip
+ found on recent MacBookPro's. The drivers for the Touch Bar
+ (apple-ib-tb) and light sensor (apple-ib-als) need to be
+ enabled separately.
+
+ To compile this driver as a module, choose M here: the
+ module will be called apple-ibridge.
+
 endmenu
 endif
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 12980a4ad460..c364e0e9d313 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -241,4 +241,5 @@ obj-$(CONFIG_MFD_MXS_LRADC) += mxs-lradc.o
 obj-$(CONFIG_MFD_SC27XX_PMIC)  += sprd-sc27xx-spi.o
 obj-$(CONFIG_RAVE_SP_CORE) += rave-sp.o
 obj-$(CONFIG_MFD_ROHM_BD718XX) += rohm-bd718x7.o
+obj-$(CONFIG_MFD_APPLE_IBRIDGE)+= apple-ibridge.o
 
diff --git a/drivers/mfd/apple-ibridge.c b/drivers/mfd/apple-ibridge.c
new file mode 100644
index ..56d325396961
--- /dev/null
+++ b/drivers/mfd/apple-ibridge.c
@@ -0,0 +1,883 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Apple iBridge Driver
+ *
+ * Copyright (c) 2018 Ronald Tschalär
+ */
+
+/**
+ * MacBookPro models with a Touch Bar (13,[23] and 14,[23]) have an Apple
+ * iBridge chip (also known as T1 chip) which exposes the touch bar,
+ * built-in webcam (iSight), ambient light sensor, and Secure Enclave
+ * Processor (SEP) for TouchID. It shows up in the system as a USB device
+ * with 3 configurations: 'Default iBridge Interfaces', 'Default iBridge
+ * Interfaces(OS X)', and 'Default iBridge Interfaces(Recovery)'. While
+ * the second one is used by MacOS to provide the fancy touch bar
+ * functionality with custom buttons etc, this driver just uses the first.
+ *
+ * In the first (default after boot) configuration, 4 usb interfaces are
+ * exposed: 2 related to the webcam, and 2 USB HID interfaces representing
+ * the touch bar and the ambient light sensor (and possibly the SEP,
+ * though at this point in time nothing is known about that). The webcam
+ * interfaces are already handled by the uvcvideo driver; furthermore, the
+ * handling of the input reports when "keys" on the touch bar are pressed
+ * is already handled properly by the generic USB HID core. This leaves
+ * the management of the touch bar modes (e.g. switching between function
+ * and special keys when the FN key is pressed), the touch bar display
+ * (dimming and turning off), the key-remapping when the FN key is
+ * pressed, and handling of the light sensor.
+ *
+ * This driver is implemented as an MFD driver, with the touch bar and ALS
+ * functions implemented by appropriate subdrivers (mfd cells). Because
+ * both those are basically hid drivers, but the current kernel driver
+ * structure does not allow more than one driver per device, this driver
+ * implements a demuxer for hid drivers: it registers itself as a hid
+ * driver with the core, and in turn it lets the subdrivers register
+ * themselves as hid drivers with this driver; the callbacks from the core
+ * are then forwarded to the subdrivers.
+ *
+ * Lastly, this driver also takes care of the power-management for the
+ * iBridge when suspending and resuming.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../hid/usbhid/usbhid.h"
+
+#define USB_ID_VENDOR_APPLE0x05ac
+#define USB_ID_PRODUCT_IBRIDGE 0x8600
+
+#define APPLETB_BASIC_CONFIG   1
+
+#defineLOG_DEV(ib_dev)

[PATCH 3/3] iio: light: apple-ib-als: Add driver for ALS on iBridge chip.

2019-04-21 Thread Ronald Tschalär

On 2016/2017 MacBook Pro's with a Touch Bar the ALS is attached to,
and exposed via the iBridge device. This provides the driver for that
sensor.

Signed-off-by: Ronald Tschalär 
---
 drivers/iio/light/Kconfig|  12 +
 drivers/iio/light/Makefile   |   1 +
 drivers/iio/light/apple-ib-als.c | 694 +++
 3 files changed, 707 insertions(+)
 create mode 100644 drivers/iio/light/apple-ib-als.c

diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig
index 36f458433480..49159fab1c0e 100644
--- a/drivers/iio/light/Kconfig
+++ b/drivers/iio/light/Kconfig
@@ -64,6 +64,18 @@ config APDS9960
  To compile this driver as a module, choose M here: the
  module will be called apds9960
 
+config APPLE_IBRIDGE_ALS
+   tristate "Apple iBridge ambient light sensor"
+   select IIO_BUFFER
+   select IIO_TRIGGERED_BUFFER
+   depends on MFD_APPLE_IBRIDGE
+   help
+ Say Y here to build the driver for the Apple iBridge ALS
+ sensor.
+
+ To compile this driver as a module, choose M here: the
+ module will be called apple-ib-als.
+
 config BH1750
tristate "ROHM BH1750 ambient light sensor"
depends on I2C
diff --git a/drivers/iio/light/Makefile b/drivers/iio/light/Makefile
index 286bf3975372..144d918917f7 100644
--- a/drivers/iio/light/Makefile
+++ b/drivers/iio/light/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_ADJD_S311) += adjd_s311.o
 obj-$(CONFIG_AL3320A)  += al3320a.o
 obj-$(CONFIG_APDS9300) += apds9300.o
 obj-$(CONFIG_APDS9960) += apds9960.o
+obj-$(CONFIG_APPLE_IBRIDGE_ALS)+= apple-ib-als.o
 obj-$(CONFIG_BH1750)   += bh1750.o
 obj-$(CONFIG_BH1780)   += bh1780.o
 obj-$(CONFIG_CM32181)  += cm32181.o
diff --git a/drivers/iio/light/apple-ib-als.c b/drivers/iio/light/apple-ib-als.c
new file mode 100644
index ..1718fcbe304f
--- /dev/null
+++ b/drivers/iio/light/apple-ib-als.c
@@ -0,0 +1,694 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Apple Ambient Light Sensor Driver
+ *
+ * Copyright (c) 2017-2018 Ronald Tschalär
+ */
+
+/*
+ * MacBookPro models with an iBridge chip (13,[23] and 14,[23]) have an
+ * ambient light sensor that is exposed via one of the USB interfaces on
+ * the iBridge as a standard HID light sensor. However, we cannot use the
+ * existing hid-sensor-als driver, for two reasons:
+ *
+ * 1. The hid-sensor-als driver is part of the hid-sensor-hub which in turn
+ *is a hid driver, but you can't have more than one hid driver per hid
+ *device, which is a problem because the touch bar also needs to
+ *register as a driver for this hid device.
+ *
+ * 2. While the hid-sensors-als driver stores sensor readings received via
+ *interrupt in an iio buffer, reads on the sysfs
+ *.../iio:deviceX/in_illuminance_YYY attribute result in a get of the
+ *feature report; however, in the case of this sensor here the
+ *illuminance field of that report is always 0. Instead, the input
+ *report needs to be requested.
+ */
+
+#define dev_fmt(fmt) "als: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define APPLEALS_DYN_SENS  0   /* our dynamic sensitivity */
+#define APPLEALS_DEF_CHANGE_SENS   APPLEALS_DYN_SENS
+
+struct appleals_device {
+   struct appleib_device   *ib_dev;
+   struct device   *log_dev;
+   struct hid_device   *hid_dev;
+   struct hid_report   *cfg_report;
+   struct hid_field*illum_field;
+   struct iio_dev  *iio_dev;
+   struct iio_trigger  *iio_trig;
+   int cur_sensitivity;
+   int cur_hysteresis;
+   boolevents_enabled;
+};
+
+static struct hid_driver appleals_hid_driver;
+
+/*
+ * This is a primitive way to get a relative sensitivity, one where we get
+ * notified when the value changes by a certain percentage rather than some
+ * absolute value. MacOS somehow manages to configure the sensor to work this
+ * way (with a 15% relative sensitivity), but I haven't been able to figure
+ * out how so far. So until we do, this provides a less-than-perfect
+ * simulation.
+ *
+ * When the brightness value is within one of the ranges, the sensitivity is
+ * set to that range's sensitivity. But in order to reduce flapping when the
+ * brightness is right on the border between two ranges, the ranges overlap
+ * somewhat (by at least one sensitivity), and sensitivity is only changed if
+ * the value leaves the current sensitivity's range.
+ *
+ * The values chosen for the map are somewhat arbitrary: a compromise of not
+ * too many ranges (and hence changing the sensitivity) but not too small or
+ * large of a percentage of the min and max values in the range (currently
+ * from 7.5% to 30%, i.e. within a factor of 2 of 15%), as well as just plain

[PATCH 2/2 update] X86/kdump: fall back to reserve high crashkernel memory

2019-04-21 Thread Dave Young

crashkernel=xM tries to reserve crashkernel memory under 4G, which
is enough for usual cases.  But this could fail sometimes, for example
one tries to reserve a big chunk like 2G, it is possible to fail.

So let the crashkernel=xM just fall back to use high memory in case it
fails to find a suitable low range.  Do not set the ,high as default
because it allocates extra low memory for DMA buffers and swiotlb, this is
not always necessary for all machines. Typically like crashkernel=128M
usually work with low reservation under 4G, so still keep <4G as default.

Signed-off-by: Dave Young 
Reviewed-by: Ingo Molnar 
---
 Documentation/admin-guide/kernel-parameters.txt |7 +--
 arch/x86/kernel/setup.c |   22 ++
 2 files changed, 19 insertions(+), 10 deletions(-)

--- linux-x86.orig/arch/x86/kernel/setup.c
+++ linux-x86/arch/x86/kernel/setup.c
@@ -541,21 +541,27 @@ static void __init reserve_crashkernel(v
}
 
/* 0 means: find the address automatically */
-   if (crash_base <= 0) {
+   if (!crash_base) {
/*
 * Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
-* as old kexec-tools loads bzImage below that, unless
-* "crashkernel=size[KMG],high" is specified.
+* crashkernel=x,high reserves memory over 4G, also allocates
+* 256M extra low memory for DMA buffers and swiotlb.
+* but the extra memory is not required for all machines.
+* So prefer low memory first, and fall back to high memory
+* unless "crashkernel=size[KMG],high" is specified.
 */
-   crash_base = memblock_find_in_range(CRASH_ALIGN,
-   high ? CRASH_ADDR_HIGH_MAX
-: CRASH_ADDR_LOW_MAX,
-   crash_size, CRASH_ALIGN);
+   if (!high)
+   crash_base = memblock_find_in_range(CRASH_ALIGN,
+   CRASH_ADDR_LOW_MAX,
+   crash_size, CRASH_ALIGN);
+   if (!crash_base)
+   crash_base = memblock_find_in_range(CRASH_ALIGN,
+   CRASH_ADDR_HIGH_MAX,
+   crash_size, CRASH_ALIGN);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable 
area found.\n");
return;
}
-
} else {
unsigned long long start;
 
--- linux-x86.orig/Documentation/admin-guide/kernel-parameters.txt
+++ linux-x86/Documentation/admin-guide/kernel-parameters.txt
@@ -704,8 +704,11 @@
upon panic. This parameter reserves the physical
memory region [offset, offset + size] for that kernel
image. If '@offset' is omitted, then a suitable offset
-   is selected automatically. Check
-   Documentation/kdump/kdump.txt for further details.
+   is selected automatically.
+   [KNL, x86_64] select a region under 4G first, and
+   fall back to reserve region above 4G in case without
+   '@offset'.
+   See Documentation/kdump/kdump.txt for further details.
 
crashkernel=range1:size1[,range2:size2,...][@offset]
[KNL] Same as above, but depends on the memory

Re: [PATCH 2/2 update] X86/kdump: fall back to reserve high crashkernel memory

2019-04-21 Thread Baoquan He

On 04/22/19 at 11:19am, Dave Young wrote:
> crashkernel=xM tries to reserve crashkernel memory under 4G, which
> is enough for usual cases.  But this could fail sometimes, for example
> one tries to reserve a big chunk like 2G, it is possible to fail.
> 
> So let the crashkernel=xM just fall back to use high memory in case it
> fails to find a suitable low range.  Do not set the ,high as default
> because it allocates extra low memory for DMA buffers and swiotlb, this is
> not always necessary for all machines. Typically like crashkernel=128M
> usually work with low reservation under 4G, so still keep <4G as default.
> 
> Signed-off-by: Dave Young 
> Reviewed-by: Ingo Molnar 
> ---

Ack the whole series, thanks for the effort.

Acked-by: Baoquan He 

>  Documentation/admin-guide/kernel-parameters.txt |7 +--
>  arch/x86/kernel/setup.c |   22 ++
>  2 files changed, 19 insertions(+), 10 deletions(-)
> 
> --- linux-x86.orig/arch/x86/kernel/setup.c
> +++ linux-x86/arch/x86/kernel/setup.c
> @@ -541,21 +541,27 @@ static void __init reserve_crashkernel(v
>   }
>  
>   /* 0 means: find the address automatically */
> - if (crash_base <= 0) {
> + if (!crash_base) {
>   /*
>* Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
> -  * as old kexec-tools loads bzImage below that, unless
> -  * "crashkernel=size[KMG],high" is specified.
> +  * crashkernel=x,high reserves memory over 4G, also allocates
> +  * 256M extra low memory for DMA buffers and swiotlb.
> +  * but the extra memory is not required for all machines.
> +  * So prefer low memory first, and fall back to high memory
> +  * unless "crashkernel=size[KMG],high" is specified.
>*/
> - crash_base = memblock_find_in_range(CRASH_ALIGN,
> - high ? CRASH_ADDR_HIGH_MAX
> -  : CRASH_ADDR_LOW_MAX,
> - crash_size, CRASH_ALIGN);
> + if (!high)
> + crash_base = memblock_find_in_range(CRASH_ALIGN,
> + CRASH_ADDR_LOW_MAX,
> + crash_size, CRASH_ALIGN);
> + if (!crash_base)
> + crash_base = memblock_find_in_range(CRASH_ALIGN,
> + CRASH_ADDR_HIGH_MAX,
> + crash_size, CRASH_ALIGN);
>   if (!crash_base) {
>   pr_info("crashkernel reservation failed - No suitable 
> area found.\n");
>   return;
>   }
> -
>   } else {
>   unsigned long long start;
>  
> --- linux-x86.orig/Documentation/admin-guide/kernel-parameters.txt
> +++ linux-x86/Documentation/admin-guide/kernel-parameters.txt
> @@ -704,8 +704,11 @@
>   upon panic. This parameter reserves the physical
>   memory region [offset, offset + size] for that kernel
>   image. If '@offset' is omitted, then a suitable offset
> - is selected automatically. Check
> - Documentation/kdump/kdump.txt for further details.
> + is selected automatically.
> + [KNL, x86_64] select a region under 4G first, and
> + fall back to reserve region above 4G in case without
> + '@offset'.
> + See Documentation/kdump/kdump.txt for further details.
>  
>   crashkernel=range1:size1[,range2:size2,...][@offset]
>   [KNL] Same as above, but depends on the memory

Re: [PATCH] ARM: dts: imx: Add mclk0 clock for SAI

2019-04-21 Thread S.j. Wang

Hi

> 
> 
> On Sat, Apr 20, 2019 at 09:12:52AM +, Daniel Baluta wrote:
> > From: Shengjiu Wang 
> >
> > SAI has 4 clock sources, which can be selected using MSEL bit of SAI
> > TCR2 register.
> 
> I have a doubt at this statement. As far as I can understand, this MSEL is
> probably used by its internal clock MUX, so it's not really proving that SAI
> has 4 MCLK inputs. What I know is that SAI block itself only has 3 MCLK
> inputs as we defined in DT. It's just internally connects bus clock or MCLK1
> to input0 of clock MUX's and connects MCLK[1-3] to input[1-3]. So adding an
> MCLK0 here doesn't sound a right way to me. Unless someone can justify
> for it, I think we should just fix it from driver side.
> 
> Thanks
> Nicolin
>

The MSEL bit width is 2 bit, so there is 4 options,  the MCLK0 maybe the same 
input as
MCLK1 or bus clock as you said, so we think may be better to show this relation 
in DT, 
And this is DT's capability.  Driver don't care about which clock connect to 
MCLK0, 
it only need to know there is 4 MCLK from DT. 

Best regards
Wang shengjiu

Re: [PATCH v4 05/12] soc: mediatek: cmdq: move the CMDQ_IRQ_MASK into cmdq driver data

2019-04-21 Thread CK Hu

Hi, Bibby:

On Mon, 2019-04-15 at 20:58 +0800, Bibby Hsieh wrote:
> The interrupt mask and thread number has positive correlation,
> so we move the CMDQ_IRQ_MASK into cmdq driver data and calculate
> it by thread number.

Reviewed-by: CK Hu 

> 
> Signed-off-by: Bibby Hsieh 
> ---
>  drivers/mailbox/mtk-cmdq-mailbox.c | 12 +++-
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c 
> b/drivers/mailbox/mtk-cmdq-mailbox.c
> index 22811784dc7d..87617dc7504d 100644
> --- a/drivers/mailbox/mtk-cmdq-mailbox.c
> +++ b/drivers/mailbox/mtk-cmdq-mailbox.c
> @@ -17,7 +17,6 @@
>  #include 
>  
>  #define CMDQ_OP_CODE_MASK(0xff << CMDQ_OP_CODE_SHIFT)
> -#define CMDQ_IRQ_MASK0x
>  #define CMDQ_NUM_CMD(t)  (t->cmd_buf_size / 
> CMDQ_INST_SIZE)
>  
>  #define CMDQ_CURR_IRQ_STATUS 0x10
> @@ -71,6 +70,7 @@ struct cmdq {
>   void __iomem*base;
>   u32 irq;
>   u32 thread_nr;
> + u32 irq_mask;
>   struct cmdq_thread  *thread;
>   struct clk  *clock;
>   boolsuspended;
> @@ -284,11 +284,11 @@ static irqreturn_t cmdq_irq_handler(int irq, void *dev)
>   unsigned long irq_status, flags = 0L;
>   int bit;
>  
> - irq_status = readl(cmdq->base + CMDQ_CURR_IRQ_STATUS) & CMDQ_IRQ_MASK;
> - if (!(irq_status ^ CMDQ_IRQ_MASK))
> + irq_status = readl(cmdq->base + CMDQ_CURR_IRQ_STATUS) & cmdq->irq_mask;
> + if (!(irq_status ^ cmdq->irq_mask))
>   return IRQ_NONE;
>  
> - for_each_clear_bit(bit, _status, fls(CMDQ_IRQ_MASK)) {
> + for_each_clear_bit(bit, _status, cmdq->thread_nr) {
>   struct cmdq_thread *thread = >thread[bit];
>  
>   spin_lock_irqsave(>chan->lock, flags);
> @@ -472,6 +472,9 @@ static int cmdq_probe(struct platform_device *pdev)
>   dev_err(dev, "failed to get irq\n");
>   return -EINVAL;
>   }
> +
> + cmdq->thread_nr = (u32)(unsigned long)of_device_get_match_data(dev);
> + cmdq->irq_mask = GENMASK(cmdq->thread_nr - 1, 0);
>   err = devm_request_irq(dev, cmdq->irq, cmdq_irq_handler, IRQF_SHARED,
>  "mtk_cmdq", cmdq);
>   if (err < 0) {
> @@ -488,7 +491,6 @@ static int cmdq_probe(struct platform_device *pdev)
>   return PTR_ERR(cmdq->clock);
>   }
>  
> - cmdq->thread_nr = (u32)(unsigned long)of_device_get_match_data(dev);
>   cmdq->mbox.dev = dev;
>   cmdq->mbox.chans = devm_kcalloc(dev, cmdq->thread_nr,
>   sizeof(*cmdq->mbox.chans), GFP_KERNEL);

Re: [PATCH v4 06/12] soc: mediatek: cmdq: support mt8183 gce function

2019-04-21 Thread CK Hu

Hi, Bibby:

On Mon, 2019-04-15 at 20:58 +0800, Bibby Hsieh wrote:
> add mt8183 compatible name for supporting gce function

Reviewed-by: CK Hu 

> 
> Signed-off-by: Bibby Hsieh 
> ---
>  drivers/mailbox/mtk-cmdq-mailbox.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c 
> b/drivers/mailbox/mtk-cmdq-mailbox.c
> index 87617dc7504d..6db1e2dd2dea 100644
> --- a/drivers/mailbox/mtk-cmdq-mailbox.c
> +++ b/drivers/mailbox/mtk-cmdq-mailbox.c
> @@ -538,6 +538,7 @@ static const struct dev_pm_ops cmdq_pm_ops = {
>  
>  static const struct of_device_id cmdq_of_ids[] = {
>   {.compatible = "mediatek,mt8173-gce", .data = (void *)16},
> + {.compatible = "mediatek,mt8183-gce", .data = (void *)24},
>   {}
>  };
>

Re: [PATCH v4 07/12] soc: mediatek: cmdq: clear the event in cmdq initial flow

2019-04-21 Thread CK Hu

Hi, Bibby:

On Mon, 2019-04-15 at 20:58 +0800, Bibby Hsieh wrote:
> GCE hardware stored event information in own internal sysram,
> if the initial value in those sysram is not zero value
> it will cause a situation that gce can wait the event immediately
> after client ask gce to wait event but not really trigger the
> corresponding hardware.
> 
> In order to make sure that the wait event function is
> exactly correct, we need to clear the sysram value in
> cmdq initial flow.
> 
> Fixes: 623a6143a845 ("mailbox: mediatek: Add Mediatek CMDQ driver")

Reviewed-by: CK Hu 

> 
> Signed-off-by: Bibby Hsieh 
> ---
>  drivers/mailbox/mtk-cmdq-mailbox.c   | 5 +
>  include/linux/mailbox/mtk-cmdq-mailbox.h | 2 ++
>  include/linux/soc/mediatek/mtk-cmdq.h| 3 ---
>  3 files changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c 
> b/drivers/mailbox/mtk-cmdq-mailbox.c
> index 6db1e2dd2dea..4e744cf2c3fb 100644
> --- a/drivers/mailbox/mtk-cmdq-mailbox.c
> +++ b/drivers/mailbox/mtk-cmdq-mailbox.c
> @@ -33,6 +33,7 @@
>  #define CMDQ_THR_END_ADDR0x24
>  #define CMDQ_THR_WAIT_TOKEN  0x30
>  #define CMDQ_THR_PRIORITY0x40
> +#define CMDQ_SYNC_TOKEN_UPDATE   0x68
>  
>  #define CMDQ_THR_ACTIVE_SLOT_CYCLES  0x3200
>  #define CMDQ_THR_ENABLED 0x1
> @@ -103,8 +104,12 @@ static void cmdq_thread_resume(struct cmdq_thread 
> *thread)
>  
>  static void cmdq_init(struct cmdq *cmdq)
>  {
> + int i;
> +
>   WARN_ON(clk_enable(cmdq->clock) < 0);
>   writel(CMDQ_THR_ACTIVE_SLOT_CYCLES, cmdq->base + CMDQ_THR_SLOT_CYCLES);
> + for (i = 0; i <= CMDQ_MAX_EVENT; i++)
> + writel(i, cmdq->base + CMDQ_SYNC_TOKEN_UPDATE);
>   clk_disable(cmdq->clock);
>  }
>  
> diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h 
> b/include/linux/mailbox/mtk-cmdq-mailbox.h
> index ccb73422c2fa..911475da7a53 100644
> --- a/include/linux/mailbox/mtk-cmdq-mailbox.h
> +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h
> @@ -19,6 +19,8 @@
>  #define CMDQ_WFE_UPDATE  BIT(31)
>  #define CMDQ_WFE_WAITBIT(15)
>  #define CMDQ_WFE_WAIT_VALUE  0x1
> +/** cmdq event maximum */
> +#define CMDQ_MAX_EVENT   0x3ff
>  
>  /*
>   * CMDQ_CODE_MASK:
> diff --git a/include/linux/soc/mediatek/mtk-cmdq.h 
> b/include/linux/soc/mediatek/mtk-cmdq.h
> index 54ade13a9b15..4e8899972db4 100644
> --- a/include/linux/soc/mediatek/mtk-cmdq.h
> +++ b/include/linux/soc/mediatek/mtk-cmdq.h
> @@ -13,9 +13,6 @@
>  
>  #define CMDQ_NO_TIMEOUT  0xu
>  
> -/** cmdq event maximum */
> -#define CMDQ_MAX_EVENT   0x3ff
> -
>  struct cmdq_pkt;
>  
>  struct cmdq_client {

Re: [PATCH] ARM: dts: imx: Add mclk0 clock for SAI

2019-04-21 Thread Nicolin Chen

On Mon, Apr 22, 2019 at 03:30:26AM +, S.j. Wang wrote:
> > > SAI has 4 clock sources, which can be selected using MSEL bit of SAI
> > > TCR2 register.
> > 
> > I have a doubt at this statement. As far as I can understand, this MSEL is
> > probably used by its internal clock MUX, so it's not really proving that SAI
> > has 4 MCLK inputs. What I know is that SAI block itself only has 3 MCLK
> > inputs as we defined in DT. It's just internally connects bus clock or MCLK1
> > to input0 of clock MUX's and connects MCLK[1-3] to input[1-3]. So adding an
> > MCLK0 here doesn't sound a right way to me. Unless someone can justify
> > for it, I think we should just fix it from driver side.
> > 
> > Thanks
> > Nicolin
> >
> 
> The MSEL bit width is 2 bit, so there is 4 options,  the MCLK0 maybe the same 
> input as
> MCLK1 or bus clock as you said, so we think may be better to show this 
> relation in DT, 
> And this is DT's capability.  Driver don't care about which clock connect to 
> MCLK0, 
> it only need to know there is 4 MCLK from DT. 

I know what it is. But it feels weird that we add an MCLK0 just
because of what a register filed has, and there's no "MCLK0" be
mentioned in the RM at all. My point is that if SAI doesn't have
a port named "MCLK0", I don't feel it's that convincing to have
it in the DT.

Usually in DT we define the clock sources of an entire IP block
in audio use cases, not for an internal clock MUX. But taking a
step back, it might not be really wrong to do so, since the MUX
is a part of the hardware. If we redefine the MCLK[0-4] as "four
clock sources of SAI's clock MUX selecting a clock for bit clock
and frame clock providing" in the binding doc, I feel it'd make
a lot of sense.

[PATCH] pinctrl: intel: Clear interrupt status in unmask callback

2019-04-21 Thread Kai-Heng Feng

Commit a939bb57cd47 ("pinctrl: intel: implement gpio_irq_enable") was
added because clearing interrupt status bit is required to avoid
unexpected behavior.

Turns out the unmask callback also needs the fix, which can solve weird
IRQ triggering issues on I2C touchpad ELAN1200.

Signed-off-by: Kai-Heng Feng 
---
 drivers/pinctrl/intel/pinctrl-intel.c | 35 ---
 1 file changed, 5 insertions(+), 30 deletions(-)

diff --git a/drivers/pinctrl/intel/pinctrl-intel.c 
b/drivers/pinctrl/intel/pinctrl-intel.c
index 3b1818184207..53878604537e 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -913,35 +913,6 @@ static void intel_gpio_irq_ack(struct irq_data *d)
}
 }
 
-static void intel_gpio_irq_enable(struct irq_data *d)
-{
-   struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
-   struct intel_pinctrl *pctrl = gpiochip_get_data(gc);
-   const struct intel_community *community;
-   const struct intel_padgroup *padgrp;
-   int pin;
-
-   pin = intel_gpio_to_pin(pctrl, irqd_to_hwirq(d), , );
-   if (pin >= 0) {
-   unsigned int gpp, gpp_offset, is_offset;
-   unsigned long flags;
-   u32 value;
-
-   gpp = padgrp->reg_num;
-   gpp_offset = padgroup_offset(padgrp, pin);
-   is_offset = community->is_offset + gpp * 4;
-
-   raw_spin_lock_irqsave(>lock, flags);
-   /* Clear interrupt status first to avoid unexpected interrupt */
-   writel(BIT(gpp_offset), community->regs + is_offset);
-
-   value = readl(community->regs + community->ie_offset + gpp * 4);
-   value |= BIT(gpp_offset);
-   writel(value, community->regs + community->ie_offset + gpp * 4);
-   raw_spin_unlock_irqrestore(>lock, flags);
-   }
-}
-
 static void intel_gpio_irq_mask_unmask(struct irq_data *d, bool mask)
 {
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
@@ -963,6 +934,11 @@ static void intel_gpio_irq_mask_unmask(struct irq_data *d, 
bool mask)
reg = community->regs + community->ie_offset + gpp * 4;
 
raw_spin_lock_irqsave(>lock, flags);
+
+   /* Clear interrupt status first to avoid unexpected interrupt */
+   if (!mask)
+   writel(BIT(gpp_offset), community->regs + 
community->is_offset + gpp * 4);
+
value = readl(reg);
if (mask)
value &= ~BIT(gpp_offset);
@@ -1106,7 +1082,6 @@ static irqreturn_t intel_gpio_irq(int irq, void *data)
 
 static struct irq_chip intel_gpio_irqchip = {
.name = "intel-gpio",
-   .irq_enable = intel_gpio_irq_enable,
.irq_ack = intel_gpio_irq_ack,
.irq_mask = intel_gpio_irq_mask,
.irq_unmask = intel_gpio_irq_unmask,
-- 
2.17.1

[PATCH 2/2] arm64: defconfig: Add i.MX8 SoC driver

2019-04-21 Thread Anson Huang

This patch selects CONFIG_IMX8_SOC by default to support
i.MX8 SoC driver.

Signed-off-by: Anson Huang 
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 2d9c390..c8ab62e 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -677,6 +677,7 @@ CONFIG_RPMSG_QCOM_GLINK_SMEM=m
 CONFIG_RPMSG_QCOM_SMD=y
 CONFIG_RASPBERRYPI_POWER=y
 CONFIG_IMX_GPCV2_PM_DOMAINS=y
+CONFIG_IMX8_SOC=y
 CONFIG_QCOM_COMMAND_DB=y
 CONFIG_QCOM_GENI_SE=y
 CONFIG_QCOM_GLINK_SSR=m
-- 
2.7.4

[PATCH 1/2] soc: imx: add i.MX8QXP SoC driver support

2019-04-21 Thread Anson Huang

Add i.MX8QXP SoC driver support, introduce driver dependency
into Kconfig as CONFIG_IMX_SCU must be selected to support
i.MX8QXP, also need to use platform driver model to make sure
IMX_SCU driver is probed before i.MX8 SoC driver.

With this patch, SoC info can be read from sysfs:

i.mx8qxp-mek# cat /sys/devices/soc0/family
Freescale i.MX

i.mx8qxp-mek# cat /sys/devices/soc0/soc_id
i.MX8QXP

i.mx8qxp-mek# cat /sys/devices/soc0/machine
Freescale i.MX8QXP MEK

i.mx8qxp-mek# cat /sys/devices/soc0/revision
1.1

Signed-off-by: Anson Huang 
---
 drivers/soc/imx/Kconfig|   4 ++
 drivers/soc/imx/Makefile   |   2 +-
 drivers/soc/imx/soc-imx8.c | 126 ++---
 3 files changed, 124 insertions(+), 8 deletions(-)

diff --git a/drivers/soc/imx/Kconfig b/drivers/soc/imx/Kconfig
index d80f899..a4353f9 100644
--- a/drivers/soc/imx/Kconfig
+++ b/drivers/soc/imx/Kconfig
@@ -7,4 +7,8 @@ config IMX_GPCV2_PM_DOMAINS
select PM_GENERIC_DOMAINS
default y if SOC_IMX7D
 
+config IMX8_SOC
+   bool "i.MX8 SoC driver"
+   depends on ARCH_MXC && IMX_SCU
+
 endmenu
diff --git a/drivers/soc/imx/Makefile b/drivers/soc/imx/Makefile
index d6b529e0..5bf23f8 100644
--- a/drivers/soc/imx/Makefile
+++ b/drivers/soc/imx/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_HAVE_IMX_GPC) += gpc.o
 obj-$(CONFIG_IMX_GPCV2_PM_DOMAINS) += gpcv2.o
-obj-$(CONFIG_ARCH_MXC) += soc-imx8.o
+obj-$(CONFIG_IMX8_SOC) += soc-imx8.o
diff --git a/drivers/soc/imx/soc-imx8.c b/drivers/soc/imx/soc-imx8.c
index fc6429f..3ad0b1c 100644
--- a/drivers/soc/imx/soc-imx8.c
+++ b/drivers/soc/imx/soc-imx8.c
@@ -3,8 +3,11 @@
  * Copyright 2019 NXP.
  */
 
+#include 
+#include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -16,12 +19,61 @@
 #define IMX8MQ_SW_INFO_B1  0x40
 #define IMX8MQ_SW_MAGIC_B1 0xff0055aa
 
+#define IMX8_SOC_DRIVER_NAME   "imx8-soc"
+
+static struct imx_sc_ipc *soc_ipc_handle;
+static struct platform_device *imx8_soc_pdev;
+
+struct imx_sc_msg_misc_get_soc_id {
+   struct imx_sc_rpc_msg hdr;
+   union {
+   struct {
+   u32 control;
+   u16 resource;
+   } send;
+   struct {
+   u32 id;
+   u16 reserved;
+   } resp;
+   } data;
+};
+
 struct imx8_soc_data {
char *name;
u32 (*soc_revision)(void);
+   bool have_imx_scu;
 };
 
-static u32 __init imx8mq_soc_revision(void)
+static u32 imx8qxp_soc_revision(void)
+{
+   struct imx_sc_msg_misc_get_soc_id msg;
+   struct imx_sc_rpc_msg *hdr = 
+   u32 rev = 0;
+   int ret;
+
+   hdr->ver = IMX_SC_RPC_VERSION;
+   hdr->svc = IMX_SC_RPC_SVC_MISC;
+   hdr->func = IMX_SC_MISC_FUNC_GET_CONTROL;
+   hdr->size = 3;
+
+   msg.data.send.control = IMX_SC_C_ID;
+   msg.data.send.resource = IMX_SC_R_SYSTEM;
+
+   ret = imx_scu_call_rpc(soc_ipc_handle, , true);
+   if (ret) {
+   dev_err(_soc_pdev->dev,
+   "misc get control failed, ret %d\n", ret);
+   return rev;
+   }
+
+   /* format revision value passed from SCU firmware */
+   rev = (msg.data.resp.id >> 5) & 0xf;
+   rev = (((rev >> 2) + 1) << 4) | (rev & 0x3);
+
+   return rev;
+}
+
+static u32 imx8mq_soc_revision(void)
 {
struct device_node *np;
void __iomem *ocotp_base;
@@ -49,10 +101,18 @@ static u32 __init imx8mq_soc_revision(void)
 static const struct imx8_soc_data imx8mq_soc_data = {
.name = "i.MX8MQ",
.soc_revision = imx8mq_soc_revision,
+   .have_imx_scu = false,
+};
+
+static const struct imx8_soc_data imx8qxp_soc_data = {
+   .name = "i.MX8QXP",
+   .soc_revision = imx8qxp_soc_revision,
+   .have_imx_scu = true,
 };
 
 static const struct of_device_id imx8_soc_match[] = {
{ .compatible = "fsl,imx8mq", .data = _soc_data, },
+   { .compatible = "fsl,imx8qxp", .data = _soc_data, },
{ }
 };
 
@@ -61,7 +121,7 @@ static const struct of_device_id imx8_soc_match[] = {
kasprintf(GFP_KERNEL, "%d.%d", (soc_rev >> 4) & 0xf,  soc_rev & 0xf) : \
"unknown"
 
-static int __init imx8_soc_init(void)
+static int imx8_soc_probe(struct platform_device *pdev)
 {
struct soc_device_attribute *soc_dev_attr;
struct soc_device *soc_dev;
@@ -83,25 +143,37 @@ static int __init imx8_soc_init(void)
goto free_soc;
 
id = of_match_node(imx8_soc_match, root);
-   if (!id)
+   if (!id) {
+   ret = -ENODEV;
goto free_soc;
+   }
 
of_node_put(root);
 
data = id->data;
if (data) {
+   if (data->have_imx_scu) {
+   ret = imx_scu_get_handle(_ipc_handle);
+   if (ret)
+   goto free_soc;
+   }
+
soc_dev_attr->soc_id = data->name;

1 2 >

1 - 100 of 102 matches

Mail list logo