Re: linux-next: manual merge of the userns tree with the mips tree

2017-08-07 Thread Ralf Baechle
On Tue, Aug 08, 2017 at 03:10:04PM +1000, Stephen Rothwell wrote:

(Maciej added to cc.)

> Hi Eric,
> 
> Today's linux-next merge of the userns tree got a conflict in:
> 
>   arch/mips/kernel/traps.c
> 
> between commit:
> 
>   260a789828aa ("MIPS: signal: Remove unreachable code from 
> force_fcr31_sig().")
> 
> from the mips tree and commit:
> 
>   ea1b75cf9138 ("signal/mips: Document a conflict with SI_USER with SIGFPE")
> 
> from the userns tree.
> 
> I fixed it up (the former removed the code updated by the latter) and
> can carry the fix as necessary. This is now fixed as far as linux-next
> is concerned, but any non trivial conflicts should be mentioned to your
> upstream maintainer when your tree is submitted for merging.  You may
> also want to consider cooperating with the maintainer of the conflicting
> tree to minimise any particularly complex conflicts.

Eric,

after yesterday's emails on the topic I think commit ea1b75cf9138 ("signal/
mips: Document a conflict with SI_USER with SIGFPE") should be dropped.

  Ralf


Re: linux-next: manual merge of the userns tree with the mips tree

2017-08-07 Thread Ralf Baechle
On Tue, Aug 08, 2017 at 03:10:04PM +1000, Stephen Rothwell wrote:

(Maciej added to cc.)

> Hi Eric,
> 
> Today's linux-next merge of the userns tree got a conflict in:
> 
>   arch/mips/kernel/traps.c
> 
> between commit:
> 
>   260a789828aa ("MIPS: signal: Remove unreachable code from 
> force_fcr31_sig().")
> 
> from the mips tree and commit:
> 
>   ea1b75cf9138 ("signal/mips: Document a conflict with SI_USER with SIGFPE")
> 
> from the userns tree.
> 
> I fixed it up (the former removed the code updated by the latter) and
> can carry the fix as necessary. This is now fixed as far as linux-next
> is concerned, but any non trivial conflicts should be mentioned to your
> upstream maintainer when your tree is submitted for merging.  You may
> also want to consider cooperating with the maintainer of the conflicting
> tree to minimise any particularly complex conflicts.

Eric,

after yesterday's emails on the topic I think commit ea1b75cf9138 ("signal/
mips: Document a conflict with SI_USER with SIGFPE") should be dropped.

  Ralf


[PATCH] mmc: mxcmmc: Handle return value of clk_prepare_enable

2017-08-07 Thread Arvind Yadav
clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav 
---
 drivers/mmc/host/mxcmmc.c | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index fb3ca82..c016820 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -1098,8 +1098,13 @@ static int mxcmci_probe(struct platform_device *pdev)
goto out_free;
}
 
-   clk_prepare_enable(host->clk_per);
-   clk_prepare_enable(host->clk_ipg);
+   ret = clk_prepare_enable(host->clk_per);
+   if (ret)
+   goto out_free;
+
+   ret = clk_prepare_enable(host->clk_ipg);
+   if (ret)
+   goto out_clk_per_put;
 
mxcmci_softreset(host);
 
@@ -1168,8 +1173,9 @@ static int mxcmci_probe(struct platform_device *pdev)
dma_release_channel(host->dma);
 
 out_clk_put:
-   clk_disable_unprepare(host->clk_per);
clk_disable_unprepare(host->clk_ipg);
+out_clk_per_put:
+   clk_disable_unprepare(host->clk_per);
 
 out_free:
mmc_free_host(mmc);
@@ -1212,10 +1218,17 @@ static int __maybe_unused mxcmci_resume(struct device 
*dev)
 {
struct mmc_host *mmc = dev_get_drvdata(dev);
struct mxcmci_host *host = mmc_priv(mmc);
+   int ret;
 
-   clk_prepare_enable(host->clk_per);
-   clk_prepare_enable(host->clk_ipg);
-   return 0;
+   ret = clk_prepare_enable(host->clk_per);
+   if (ret)
+   return ret;
+
+   ret = clk_prepare_enable(host->clk_ipg);
+   if (ret)
+   clk_disable_unprepare(host->clk_per);
+
+   return ret;
 }
 
 static SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume);
-- 
1.9.1



[PATCH] mmc: mxcmmc: Handle return value of clk_prepare_enable

2017-08-07 Thread Arvind Yadav
clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav 
---
 drivers/mmc/host/mxcmmc.c | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index fb3ca82..c016820 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -1098,8 +1098,13 @@ static int mxcmci_probe(struct platform_device *pdev)
goto out_free;
}
 
-   clk_prepare_enable(host->clk_per);
-   clk_prepare_enable(host->clk_ipg);
+   ret = clk_prepare_enable(host->clk_per);
+   if (ret)
+   goto out_free;
+
+   ret = clk_prepare_enable(host->clk_ipg);
+   if (ret)
+   goto out_clk_per_put;
 
mxcmci_softreset(host);
 
@@ -1168,8 +1173,9 @@ static int mxcmci_probe(struct platform_device *pdev)
dma_release_channel(host->dma);
 
 out_clk_put:
-   clk_disable_unprepare(host->clk_per);
clk_disable_unprepare(host->clk_ipg);
+out_clk_per_put:
+   clk_disable_unprepare(host->clk_per);
 
 out_free:
mmc_free_host(mmc);
@@ -1212,10 +1218,17 @@ static int __maybe_unused mxcmci_resume(struct device 
*dev)
 {
struct mmc_host *mmc = dev_get_drvdata(dev);
struct mxcmci_host *host = mmc_priv(mmc);
+   int ret;
 
-   clk_prepare_enable(host->clk_per);
-   clk_prepare_enable(host->clk_ipg);
-   return 0;
+   ret = clk_prepare_enable(host->clk_per);
+   if (ret)
+   return ret;
+
+   ret = clk_prepare_enable(host->clk_ipg);
+   if (ret)
+   clk_disable_unprepare(host->clk_per);
+
+   return ret;
 }
 
 static SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume);
-- 
1.9.1



Re: [lkp-robot] [mm] 7674270022: will-it-scale.per_process_ops -19.3% regression

2017-08-07 Thread Nadav Amit
Nadav Amit  wrote:

> Minchan Kim  wrote:
> 
>> Hi,
>> 
>> On Tue, Aug 08, 2017 at 09:19:23AM +0800, kernel test robot wrote:
>>> Greeting,
>>> 
>>> FYI, we noticed a -19.3% regression of will-it-scale.per_process_ops due to 
>>> commit:
>>> 
>>> 
>>> commit: 76742700225cad9df49f05399381ac3f1ec3dc60 ("mm: fix 
>>> MADV_[FREE|DONTNEED] TLB flush miss problem")
>>> url: 
>>> https://github.com/0day-ci/linux/commits/Nadav-Amit/mm-migrate-prevent-racy-access-to-tlb_flush_pending/20170802-205715
>>> 
>>> 
>>> in testcase: will-it-scale
>>> on test machine: 88 threads Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz with 
>>> 64G memory
>>> with following parameters:
>>> 
>>> nr_task: 16
>>> mode: process
>>> test: brk1
>>> cpufreq_governor: performance
>>> 
>>> test-description: Will It Scale takes a testcase and runs it from 1 through 
>>> to n parallel copies to see if the testcase will scale. It builds both a 
>>> process and threads based test in order to see any differences between the 
>>> two.
>>> test-url: https://github.com/antonblanchard/will-it-scale
>> 
>> Thanks for the report.
>> Could you explain what kinds of workload you are testing?
>> 
>> Does it calls frequently madvise(MADV_DONTNEED) in parallel on multiple
>> threads?
> 
> According to the description it is "testcase:brk increase/decrease of one
> page”. According to the mode it spawns multiple processes, not threads.
> 
> Since a single page is unmapped each time, and the iTLB-loads increase
> dramatically, I would suspect that for some reason a full TLB flush is
> caused during do_munmap().
> 
> If I find some free time, I’ll try to profile the workload - but feel free
> to beat me to it.

The root-cause appears to be that tlb_finish_mmu() does not call
dec_tlb_flush_pending() - as it should. Any chance you can take care of it?

Having said that it appears that cpumask_any_but() is really inefficient
since it does not have an optimization for the case in which
small_const_nbits(nbits)==true. When I find some free time, I’ll try to deal
with it.

Thanks,
Nadav

Re: [lkp-robot] [mm] 7674270022: will-it-scale.per_process_ops -19.3% regression

2017-08-07 Thread Nadav Amit
Nadav Amit  wrote:

> Minchan Kim  wrote:
> 
>> Hi,
>> 
>> On Tue, Aug 08, 2017 at 09:19:23AM +0800, kernel test robot wrote:
>>> Greeting,
>>> 
>>> FYI, we noticed a -19.3% regression of will-it-scale.per_process_ops due to 
>>> commit:
>>> 
>>> 
>>> commit: 76742700225cad9df49f05399381ac3f1ec3dc60 ("mm: fix 
>>> MADV_[FREE|DONTNEED] TLB flush miss problem")
>>> url: 
>>> https://github.com/0day-ci/linux/commits/Nadav-Amit/mm-migrate-prevent-racy-access-to-tlb_flush_pending/20170802-205715
>>> 
>>> 
>>> in testcase: will-it-scale
>>> on test machine: 88 threads Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz with 
>>> 64G memory
>>> with following parameters:
>>> 
>>> nr_task: 16
>>> mode: process
>>> test: brk1
>>> cpufreq_governor: performance
>>> 
>>> test-description: Will It Scale takes a testcase and runs it from 1 through 
>>> to n parallel copies to see if the testcase will scale. It builds both a 
>>> process and threads based test in order to see any differences between the 
>>> two.
>>> test-url: https://github.com/antonblanchard/will-it-scale
>> 
>> Thanks for the report.
>> Could you explain what kinds of workload you are testing?
>> 
>> Does it calls frequently madvise(MADV_DONTNEED) in parallel on multiple
>> threads?
> 
> According to the description it is "testcase:brk increase/decrease of one
> page”. According to the mode it spawns multiple processes, not threads.
> 
> Since a single page is unmapped each time, and the iTLB-loads increase
> dramatically, I would suspect that for some reason a full TLB flush is
> caused during do_munmap().
> 
> If I find some free time, I’ll try to profile the workload - but feel free
> to beat me to it.

The root-cause appears to be that tlb_finish_mmu() does not call
dec_tlb_flush_pending() - as it should. Any chance you can take care of it?

Having said that it appears that cpumask_any_but() is really inefficient
since it does not have an optimization for the case in which
small_const_nbits(nbits)==true. When I find some free time, I’ll try to deal
with it.

Thanks,
Nadav

[PATCH] mmc: wmt-sdmmc: Handle return value of clk_prepare_enable

2017-08-07 Thread Arvind Yadav
clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav 
---
 drivers/mmc/host/wmt-sdmmc.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c
index 21ebba8..e64f930 100644
--- a/drivers/mmc/host/wmt-sdmmc.c
+++ b/drivers/mmc/host/wmt-sdmmc.c
@@ -856,7 +856,9 @@ static int wmt_mci_probe(struct platform_device *pdev)
goto fail5;
}
 
-   clk_prepare_enable(priv->clk_sdmmc);
+   ret = clk_prepare_enable(priv->clk_sdmmc);
+   if (ret)
+   goto fail6;
 
/* configure the controller to a known 'ready' state */
wmt_reset_hardware(mmc);
@@ -866,6 +868,8 @@ static int wmt_mci_probe(struct platform_device *pdev)
dev_info(>dev, "WMT SDHC Controller initialized\n");
 
return 0;
+fail6:
+   clk_put(priv->clk_sdmmc);
 fail5:
free_irq(dma_irq, priv);
 fail4:
-- 
1.9.1



[PATCH] mmc: wmt-sdmmc: Handle return value of clk_prepare_enable

2017-08-07 Thread Arvind Yadav
clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav 
---
 drivers/mmc/host/wmt-sdmmc.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c
index 21ebba8..e64f930 100644
--- a/drivers/mmc/host/wmt-sdmmc.c
+++ b/drivers/mmc/host/wmt-sdmmc.c
@@ -856,7 +856,9 @@ static int wmt_mci_probe(struct platform_device *pdev)
goto fail5;
}
 
-   clk_prepare_enable(priv->clk_sdmmc);
+   ret = clk_prepare_enable(priv->clk_sdmmc);
+   if (ret)
+   goto fail6;
 
/* configure the controller to a known 'ready' state */
wmt_reset_hardware(mmc);
@@ -866,6 +868,8 @@ static int wmt_mci_probe(struct platform_device *pdev)
dev_info(>dev, "WMT SDHC Controller initialized\n");
 
return 0;
+fail6:
+   clk_put(priv->clk_sdmmc);
 fail5:
free_irq(dma_irq, priv);
 fail4:
-- 
1.9.1



[PATCH 1/4] usb: mtu3: add generic compatible string

2017-08-07 Thread Chunfeng Yun
The mtu3 driver is a generic driver for MediaTek usb3 DRD IP, add
a generic compatible to avoid confusion when support new SoCs but
use a compatible with specific SoC's name "mt8173".

Signed-off-by: Chunfeng Yun 
---
 drivers/usb/mtu3/mtu3_plat.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/mtu3/mtu3_plat.c b/drivers/usb/mtu3/mtu3_plat.c
index 0d3ebb3..088e3e6 100644
--- a/drivers/usb/mtu3/mtu3_plat.c
+++ b/drivers/usb/mtu3/mtu3_plat.c
@@ -500,6 +500,7 @@ static int __maybe_unused mtu3_resume(struct device *dev)
 
 static const struct of_device_id mtu3_of_match[] = {
{.compatible = "mediatek,mt8173-mtu3",},
+   {.compatible = "mediatek,mtu3",},
{},
 };
 
-- 
1.7.9.5



[PATCH 1/4] usb: mtu3: add generic compatible string

2017-08-07 Thread Chunfeng Yun
The mtu3 driver is a generic driver for MediaTek usb3 DRD IP, add
a generic compatible to avoid confusion when support new SoCs but
use a compatible with specific SoC's name "mt8173".

Signed-off-by: Chunfeng Yun 
---
 drivers/usb/mtu3/mtu3_plat.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/mtu3/mtu3_plat.c b/drivers/usb/mtu3/mtu3_plat.c
index 0d3ebb3..088e3e6 100644
--- a/drivers/usb/mtu3/mtu3_plat.c
+++ b/drivers/usb/mtu3/mtu3_plat.c
@@ -500,6 +500,7 @@ static int __maybe_unused mtu3_resume(struct device *dev)
 
 static const struct of_device_id mtu3_of_match[] = {
{.compatible = "mediatek,mt8173-mtu3",},
+   {.compatible = "mediatek,mtu3",},
{},
 };
 
-- 
1.7.9.5



[PATCH 2/4] usb: xhci-mtk: add generic compatible string

2017-08-07 Thread Chunfeng Yun
The xhci-mtk driver is a generic driver for MediaTek xHCI IP, add
a generic compatible to avoid confusion when support new SoCs but
use a compatible with specific SoC's name "mt8173".

Signed-off-by: Chunfeng Yun 
---
 drivers/usb/host/xhci-mtk.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
index 67d5dc7..d2934b9 100644
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -795,6 +795,7 @@ static int __maybe_unused xhci_mtk_resume(struct device 
*dev)
 #ifdef CONFIG_OF
 static const struct of_device_id mtk_xhci_of_match[] = {
{ .compatible = "mediatek,mt8173-xhci"},
+   { .compatible = "mediatek,xhci-mtk"},
{ },
 };
 MODULE_DEVICE_TABLE(of, mtk_xhci_of_match);
-- 
1.7.9.5



[PATCH 2/4] usb: xhci-mtk: add generic compatible string

2017-08-07 Thread Chunfeng Yun
The xhci-mtk driver is a generic driver for MediaTek xHCI IP, add
a generic compatible to avoid confusion when support new SoCs but
use a compatible with specific SoC's name "mt8173".

Signed-off-by: Chunfeng Yun 
---
 drivers/usb/host/xhci-mtk.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
index 67d5dc7..d2934b9 100644
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -795,6 +795,7 @@ static int __maybe_unused xhci_mtk_resume(struct device 
*dev)
 #ifdef CONFIG_OF
 static const struct of_device_id mtk_xhci_of_match[] = {
{ .compatible = "mediatek,mt8173-xhci"},
+   { .compatible = "mediatek,xhci-mtk"},
{ },
 };
 MODULE_DEVICE_TABLE(of, mtk_xhci_of_match);
-- 
1.7.9.5



[PATCH 3/4] dt-bindings: mt8173-mtu3: add generic compatible and rename file

2017-08-07 Thread Chunfeng Yun
The mt8173-mtu3.txt actually holds the bindings for all mediatek
SoCs with usb3 DRD IP, so add a generic compatible and change the
name to mtu3.txt.

Signed-off-by: Chunfeng Yun 
---
 .../bindings/usb/{mt8173-mtu3.txt => mtu3.txt} |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)
 rename Documentation/devicetree/bindings/usb/{mt8173-mtu3.txt => mtu3.txt} 
(95%)

diff --git a/Documentation/devicetree/bindings/usb/mt8173-mtu3.txt 
b/Documentation/devicetree/bindings/usb/mtu3.txt
similarity index 95%
rename from Documentation/devicetree/bindings/usb/mt8173-mtu3.txt
rename to Documentation/devicetree/bindings/usb/mtu3.txt
index 1d7c3bc..832741d 100644
--- a/Documentation/devicetree/bindings/usb/mt8173-mtu3.txt
+++ b/Documentation/devicetree/bindings/usb/mtu3.txt
@@ -1,7 +1,9 @@
 The device node for Mediatek USB3.0 DRD controller
 
 Required properties:
- - compatible : should be "mediatek,mt8173-mtu3"
+ - compatible : should be one of
+   "mediatek,mt8173-mtu3" (deprecated, use "mediatek,mtu3" instead),
+   "mediatek,mtu3"
  - reg : specifies physical base address and size of the registers
  - reg-names: should be "mac" for device IP and "ippc" for IP port control
  - interrupts : interrupt used by the device IP
@@ -44,7 +46,7 @@ Optional properties:
 Sub-nodes:
 The xhci should be added as subnode to mtu3 as shown in the following example
 if host mode is enabled. The DT binding details of xhci can be found in:
-Documentation/devicetree/bindings/usb/mt8173-xhci.txt
+Documentation/devicetree/bindings/usb/xhci-mtk.txt
 
 Example:
 ssusb: usb@11271000 {
-- 
1.7.9.5



[PATCH 3/4] dt-bindings: mt8173-mtu3: add generic compatible and rename file

2017-08-07 Thread Chunfeng Yun
The mt8173-mtu3.txt actually holds the bindings for all mediatek
SoCs with usb3 DRD IP, so add a generic compatible and change the
name to mtu3.txt.

Signed-off-by: Chunfeng Yun 
---
 .../bindings/usb/{mt8173-mtu3.txt => mtu3.txt} |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)
 rename Documentation/devicetree/bindings/usb/{mt8173-mtu3.txt => mtu3.txt} 
(95%)

diff --git a/Documentation/devicetree/bindings/usb/mt8173-mtu3.txt 
b/Documentation/devicetree/bindings/usb/mtu3.txt
similarity index 95%
rename from Documentation/devicetree/bindings/usb/mt8173-mtu3.txt
rename to Documentation/devicetree/bindings/usb/mtu3.txt
index 1d7c3bc..832741d 100644
--- a/Documentation/devicetree/bindings/usb/mt8173-mtu3.txt
+++ b/Documentation/devicetree/bindings/usb/mtu3.txt
@@ -1,7 +1,9 @@
 The device node for Mediatek USB3.0 DRD controller
 
 Required properties:
- - compatible : should be "mediatek,mt8173-mtu3"
+ - compatible : should be one of
+   "mediatek,mt8173-mtu3" (deprecated, use "mediatek,mtu3" instead),
+   "mediatek,mtu3"
  - reg : specifies physical base address and size of the registers
  - reg-names: should be "mac" for device IP and "ippc" for IP port control
  - interrupts : interrupt used by the device IP
@@ -44,7 +46,7 @@ Optional properties:
 Sub-nodes:
 The xhci should be added as subnode to mtu3 as shown in the following example
 if host mode is enabled. The DT binding details of xhci can be found in:
-Documentation/devicetree/bindings/usb/mt8173-xhci.txt
+Documentation/devicetree/bindings/usb/xhci-mtk.txt
 
 Example:
 ssusb: usb@11271000 {
-- 
1.7.9.5



[PATCH 4/4] dt-bindings: mt8173-xhci: add generic compatible and rename file

2017-08-07 Thread Chunfeng Yun
The mt8173-xhci.txt actually holds the bindings for all mediatek
SoCs with xHCI controller, so add a generic compatible and change
the name to xhci-mtk.txt to reflect that.

Signed-off-by: Chunfeng Yun 
---
 .../bindings/usb/{mt8173-xhci.txt => xhci-mtk.txt} |   10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)
 rename Documentation/devicetree/bindings/usb/{mt8173-xhci.txt => xhci-mtk.txt} 
(92%)

diff --git a/Documentation/devicetree/bindings/usb/mt8173-xhci.txt 
b/Documentation/devicetree/bindings/usb/xhci-mtk.txt
similarity index 92%
rename from Documentation/devicetree/bindings/usb/mt8173-xhci.txt
rename to Documentation/devicetree/bindings/usb/xhci-mtk.txt
index 0acfc8a..1ce77c7 100644
--- a/Documentation/devicetree/bindings/usb/mt8173-xhci.txt
+++ b/Documentation/devicetree/bindings/usb/xhci-mtk.txt
@@ -11,7 +11,9 @@ into two parts.
 
 
 Required properties:
- - compatible : should contain "mediatek,mt8173-xhci"
+ - compatible : should be one of
+   "mediatek,mt8173-xhci" (deprecated, use "mediatek,xhci-mtk" instead),
+   "mediatek,xhci-mtk"
  - reg : specifies physical base address and size of the registers
  - reg-names: should be "mac" for xHCI MAC and "ippc" for IP port control
  - interrupts : interrupt used by the controller
@@ -68,10 +70,12 @@ usb30: usb@1127 {
 
 In the case, xhci is added as subnode to mtu3. An example and the DT binding
 details of mtu3 can be found in:
-Documentation/devicetree/bindings/usb/mt8173-mtu3.txt
+Documentation/devicetree/bindings/usb/mtu3.txt
 
 Required properties:
- - compatible : should contain "mediatek,mt8173-xhci"
+ - compatible : should be one of
+   "mediatek,mt8173-xhci" (deprecated, use "mediatek,xhci-mtk" instead),
+   "mediatek,xhci-mtk"
  - reg : specifies physical base address and size of the registers
  - reg-names: should be "mac" for xHCI MAC
  - interrupts : interrupt used by the host controller
-- 
1.7.9.5



[PATCH 4/4] dt-bindings: mt8173-xhci: add generic compatible and rename file

2017-08-07 Thread Chunfeng Yun
The mt8173-xhci.txt actually holds the bindings for all mediatek
SoCs with xHCI controller, so add a generic compatible and change
the name to xhci-mtk.txt to reflect that.

Signed-off-by: Chunfeng Yun 
---
 .../bindings/usb/{mt8173-xhci.txt => xhci-mtk.txt} |   10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)
 rename Documentation/devicetree/bindings/usb/{mt8173-xhci.txt => xhci-mtk.txt} 
(92%)

diff --git a/Documentation/devicetree/bindings/usb/mt8173-xhci.txt 
b/Documentation/devicetree/bindings/usb/xhci-mtk.txt
similarity index 92%
rename from Documentation/devicetree/bindings/usb/mt8173-xhci.txt
rename to Documentation/devicetree/bindings/usb/xhci-mtk.txt
index 0acfc8a..1ce77c7 100644
--- a/Documentation/devicetree/bindings/usb/mt8173-xhci.txt
+++ b/Documentation/devicetree/bindings/usb/xhci-mtk.txt
@@ -11,7 +11,9 @@ into two parts.
 
 
 Required properties:
- - compatible : should contain "mediatek,mt8173-xhci"
+ - compatible : should be one of
+   "mediatek,mt8173-xhci" (deprecated, use "mediatek,xhci-mtk" instead),
+   "mediatek,xhci-mtk"
  - reg : specifies physical base address and size of the registers
  - reg-names: should be "mac" for xHCI MAC and "ippc" for IP port control
  - interrupts : interrupt used by the controller
@@ -68,10 +70,12 @@ usb30: usb@1127 {
 
 In the case, xhci is added as subnode to mtu3. An example and the DT binding
 details of mtu3 can be found in:
-Documentation/devicetree/bindings/usb/mt8173-mtu3.txt
+Documentation/devicetree/bindings/usb/mtu3.txt
 
 Required properties:
- - compatible : should contain "mediatek,mt8173-xhci"
+ - compatible : should be one of
+   "mediatek,mt8173-xhci" (deprecated, use "mediatek,xhci-mtk" instead),
+   "mediatek,xhci-mtk"
  - reg : specifies physical base address and size of the registers
  - reg-names: should be "mac" for xHCI MAC
  - interrupts : interrupt used by the host controller
-- 
1.7.9.5



[PATCH] ARM: dts: DRA7: Add pcie1 dt node for EP mode

2017-08-07 Thread Kishon Vijay Abraham I
Add pcie1 dt node in order for the controller to operate in
endpoint mode. However since none of the dra7 based boards have
slots configured to operate in endpoint mode, keep EP mode
disabled.

Signed-off-by: Kishon Vijay Abraham I 
---
 arch/arm/boot/dts/am571x-idk.dts|  9 +
 arch/arm/boot/dts/am572x-idk.dts|  7 ++-
 arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi |  7 ++-
 arch/arm/boot/dts/dra7-evm.dts  |  4 
 arch/arm/boot/dts/dra7.dtsi | 23 ++-
 arch/arm/boot/dts/dra72-evm-common.dtsi |  4 
 6 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts
index adc70fb091a2..0c0bb4e93f25 100644
--- a/arch/arm/boot/dts/am571x-idk.dts
+++ b/arch/arm/boot/dts/am571x-idk.dts
@@ -96,3 +96,12 @@
status = "okay";
};
 };
+
+_rc {
+   status = "okay";
+   gpios = < 23 GPIO_ACTIVE_HIGH>;
+};
+
+_ep {
+   gpios = < 23 GPIO_ACTIVE_HIGH>;
+};
diff --git a/arch/arm/boot/dts/am572x-idk.dts b/arch/arm/boot/dts/am572x-idk.dts
index 940fcbe5380b..5ff75004afcf 100644
--- a/arch/arm/boot/dts/am572x-idk.dts
+++ b/arch/arm/boot/dts/am572x-idk.dts
@@ -88,7 +88,12 @@
load-gpios = < 19 GPIO_ACTIVE_LOW>;
 };
 
- {
+_rc {
+   status = "okay";
+   gpios = < 23 GPIO_ACTIVE_HIGH>;
+};
+
+_ep {
gpios = < 23 GPIO_ACTIVE_HIGH>;
 };
 
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi 
b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
index fdfe5b16b806..d433a50cd18a 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
@@ -570,7 +570,12 @@
};
 };
 
- {
+_rc {
+   status = "ok";
+   gpios = < 8 GPIO_ACTIVE_LOW>;
+};
+
+_ep {
gpios = < 8 GPIO_ACTIVE_LOW>;
 };
 
diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts
index f47fc4daf062..57bd75909d96 100644
--- a/arch/arm/boot/dts/dra7-evm.dts
+++ b/arch/arm/boot/dts/dra7-evm.dts
@@ -720,3 +720,7 @@
status = "okay";
};
 };
+
+_rc {
+   status = "okay";
+};
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 0f0f6f58bd18..e6f2c6a15dc1 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -196,6 +196,7 @@
scm_conf1: scm_conf@1c04 {
compatible = "syscon";
reg = <0x1c04 0x0020>;
+   #syscon-cells = <2>;
};
 
scm_conf_pcie: scm_conf@1c24 {
@@ -287,7 +288,11 @@
#address-cells = <1>;
ranges = <0x5100 0x5100 0x3000
  0x00x2000 0x1000>;
-   pcie1: pcie@5100 {
+   /**
+* To enable PCI endpoint mode, disable the pcie1_rc
+* node and enable pcie1_ep mode.
+*/
+   pcie1_rc: pcie@5100 {
compatible = "ti,dra7-pcie";
reg = <0x5100 0x2000>, <0x51002000 0x14c>, 
<0x1000 0x2000>;
reg-names = "rc_dbics", "ti_conf", "config";
@@ -309,12 +314,28 @@
<0 0 0 2 _intc 2>,
<0 0 0 3 _intc 3>,
<0 0 0 4 _intc 4>;
+   status = "disabled";
pcie1_intc: interrupt-controller {
interrupt-controller;
#address-cells = <0>;
#interrupt-cells = <1>;
};
};
+
+   pcie1_ep: pcie_ep@5100 {
+   compatible = "ti,dra7-pcie-ep";
+   reg = <0x5100 0x28>, <0x51002000 0x14c>, 
<0x51001000 0x28>, <0x1000 0x1000>;
+   reg-names = "ep_dbics", "ti_conf", "ep_dbics2", 
"addr_space";
+   interrupts = <0 232 0x4>;
+   num-lanes = <1>;
+   num-ib-windows = <4>;
+   num-ob-windows = <16>;
+   ti,hwmods = "pcie1";
+   phys = <_phy>;
+   phy-names = "pcie-phy0";
+   ti,syscon-unaligned-access = <_conf1 0x14 
2>;
+   status = "disabled";
+   };
};
 
axi@1 {
diff --git 

[PATCH] ARM: dts: DRA7: Add pcie1 dt node for EP mode

2017-08-07 Thread Kishon Vijay Abraham I
Add pcie1 dt node in order for the controller to operate in
endpoint mode. However since none of the dra7 based boards have
slots configured to operate in endpoint mode, keep EP mode
disabled.

Signed-off-by: Kishon Vijay Abraham I 
---
 arch/arm/boot/dts/am571x-idk.dts|  9 +
 arch/arm/boot/dts/am572x-idk.dts|  7 ++-
 arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi |  7 ++-
 arch/arm/boot/dts/dra7-evm.dts  |  4 
 arch/arm/boot/dts/dra7.dtsi | 23 ++-
 arch/arm/boot/dts/dra72-evm-common.dtsi |  4 
 6 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts
index adc70fb091a2..0c0bb4e93f25 100644
--- a/arch/arm/boot/dts/am571x-idk.dts
+++ b/arch/arm/boot/dts/am571x-idk.dts
@@ -96,3 +96,12 @@
status = "okay";
};
 };
+
+_rc {
+   status = "okay";
+   gpios = < 23 GPIO_ACTIVE_HIGH>;
+};
+
+_ep {
+   gpios = < 23 GPIO_ACTIVE_HIGH>;
+};
diff --git a/arch/arm/boot/dts/am572x-idk.dts b/arch/arm/boot/dts/am572x-idk.dts
index 940fcbe5380b..5ff75004afcf 100644
--- a/arch/arm/boot/dts/am572x-idk.dts
+++ b/arch/arm/boot/dts/am572x-idk.dts
@@ -88,7 +88,12 @@
load-gpios = < 19 GPIO_ACTIVE_LOW>;
 };
 
- {
+_rc {
+   status = "okay";
+   gpios = < 23 GPIO_ACTIVE_HIGH>;
+};
+
+_ep {
gpios = < 23 GPIO_ACTIVE_HIGH>;
 };
 
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi 
b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
index fdfe5b16b806..d433a50cd18a 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
@@ -570,7 +570,12 @@
};
 };
 
- {
+_rc {
+   status = "ok";
+   gpios = < 8 GPIO_ACTIVE_LOW>;
+};
+
+_ep {
gpios = < 8 GPIO_ACTIVE_LOW>;
 };
 
diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts
index f47fc4daf062..57bd75909d96 100644
--- a/arch/arm/boot/dts/dra7-evm.dts
+++ b/arch/arm/boot/dts/dra7-evm.dts
@@ -720,3 +720,7 @@
status = "okay";
};
 };
+
+_rc {
+   status = "okay";
+};
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 0f0f6f58bd18..e6f2c6a15dc1 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -196,6 +196,7 @@
scm_conf1: scm_conf@1c04 {
compatible = "syscon";
reg = <0x1c04 0x0020>;
+   #syscon-cells = <2>;
};
 
scm_conf_pcie: scm_conf@1c24 {
@@ -287,7 +288,11 @@
#address-cells = <1>;
ranges = <0x5100 0x5100 0x3000
  0x00x2000 0x1000>;
-   pcie1: pcie@5100 {
+   /**
+* To enable PCI endpoint mode, disable the pcie1_rc
+* node and enable pcie1_ep mode.
+*/
+   pcie1_rc: pcie@5100 {
compatible = "ti,dra7-pcie";
reg = <0x5100 0x2000>, <0x51002000 0x14c>, 
<0x1000 0x2000>;
reg-names = "rc_dbics", "ti_conf", "config";
@@ -309,12 +314,28 @@
<0 0 0 2 _intc 2>,
<0 0 0 3 _intc 3>,
<0 0 0 4 _intc 4>;
+   status = "disabled";
pcie1_intc: interrupt-controller {
interrupt-controller;
#address-cells = <0>;
#interrupt-cells = <1>;
};
};
+
+   pcie1_ep: pcie_ep@5100 {
+   compatible = "ti,dra7-pcie-ep";
+   reg = <0x5100 0x28>, <0x51002000 0x14c>, 
<0x51001000 0x28>, <0x1000 0x1000>;
+   reg-names = "ep_dbics", "ti_conf", "ep_dbics2", 
"addr_space";
+   interrupts = <0 232 0x4>;
+   num-lanes = <1>;
+   num-ib-windows = <4>;
+   num-ob-windows = <16>;
+   ti,hwmods = "pcie1";
+   phys = <_phy>;
+   phy-names = "pcie-phy0";
+   ti,syscon-unaligned-access = <_conf1 0x14 
2>;
+   status = "disabled";
+   };
};
 
axi@1 {
diff --git 

[PATCH] memory: mtk-smi: Handle return value of clk_prepare_enable

2017-08-07 Thread Arvind Yadav
clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav 
---
 drivers/memory/mtk-smi.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c
index 4afbc41..edf36f0 100644
--- a/drivers/memory/mtk-smi.c
+++ b/drivers/memory/mtk-smi.c
@@ -321,6 +321,7 @@ static int mtk_smi_common_probe(struct platform_device 
*pdev)
struct resource *res;
const struct of_device_id *of_id;
enum mtk_smi_gen smi_gen;
+   int ret;
 
if (!dev->pm_domain)
return -EPROBE_DEFER;
@@ -359,7 +360,9 @@ static int mtk_smi_common_probe(struct platform_device 
*pdev)
if (IS_ERR(common->clk_async))
return PTR_ERR(common->clk_async);
 
-   clk_prepare_enable(common->clk_async);
+   ret = clk_prepare_enable(common->clk_async);
+   if (ret)
+   return ret;
}
pm_runtime_enable(dev);
platform_set_drvdata(pdev, common);
-- 
1.9.1



[PATCH] memory: mtk-smi: Handle return value of clk_prepare_enable

2017-08-07 Thread Arvind Yadav
clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav 
---
 drivers/memory/mtk-smi.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c
index 4afbc41..edf36f0 100644
--- a/drivers/memory/mtk-smi.c
+++ b/drivers/memory/mtk-smi.c
@@ -321,6 +321,7 @@ static int mtk_smi_common_probe(struct platform_device 
*pdev)
struct resource *res;
const struct of_device_id *of_id;
enum mtk_smi_gen smi_gen;
+   int ret;
 
if (!dev->pm_domain)
return -EPROBE_DEFER;
@@ -359,7 +360,9 @@ static int mtk_smi_common_probe(struct platform_device 
*pdev)
if (IS_ERR(common->clk_async))
return PTR_ERR(common->clk_async);
 
-   clk_prepare_enable(common->clk_async);
+   ret = clk_prepare_enable(common->clk_async);
+   if (ret)
+   return ret;
}
pm_runtime_enable(dev);
platform_set_drvdata(pdev, common);
-- 
1.9.1



Re: [RFC PATCH 0/1] Add hugetlbfs support to memfd_create()

2017-08-07 Thread Michal Hocko
Hi,
I am one foot out of office and will be offline for two days so I
didn't get to review the patch yet but this information is an useful
information about the usecase that should be in the patch directly for
future reference.

On Mon 07-08-17 16:47:51, Mike Kravetz wrote:
> This patch came out of discussions in this e-mail thread [1].
> 
> The Oracle JVM team is developing a new garbage collection model.  This
> new model requires multiple mappings of the same anonymous memory.  One
> straight forward way to accomplish this is with memfd_create.  They can
> use the returned fd to create multiple mappings of the same memory.
> 
> The JVM today has an option to use (static hugetlb) huge pages.  If this
> option is specified, they would like to use the same garbage collection
> model requiring multiple mappings to the same memory.  Using hugetlbfs,
> it is possible to explicitly mount a filesystem and specify file paths
> in order to get an fd that can be used for multiple mappings.  However,
> this introduces additional system admin work and coordination.
> 
> Ideally they would like to get a hugetlbfs fd without requiring explicit
> mounting of a filesystem.   Today, mmap and shmget can make use of
> hugetlbfs without explicitly mounting a filesystem.  The patch adds this
> functionality to hugetlbfs.
> 
> A new flag MFD_HUGETLB is introduced to request a hugetlbfs file.  Like
> other system calls where hugetlb can be requested, the huge page size
> can be encoded in the flags argument is the non-default huge page size
> is desired.  hugetlbfs does not support sealing operations, therefore
> specifying MFD_ALLOW_SEALING with MFD_HUGETLB will result in EINVAL.
> 
> Of course, the memfd_man page would need updating if this type of
> functionality moves forward.
> 
> [1] https://lkml.org/lkml/2017/7/6/564
> 
> Mike Kravetz (1):
>   mm/shmem: add hugetlbfs support to memfd_create()
> 
>  include/uapi/linux/memfd.h | 24 
>  mm/shmem.c | 37 +++--
>  2 files changed, 55 insertions(+), 6 deletions(-)
> 
> -- 
> 2.7.5

-- 
Michal Hocko
SUSE Labs


Re: [RFC PATCH 0/1] Add hugetlbfs support to memfd_create()

2017-08-07 Thread Michal Hocko
Hi,
I am one foot out of office and will be offline for two days so I
didn't get to review the patch yet but this information is an useful
information about the usecase that should be in the patch directly for
future reference.

On Mon 07-08-17 16:47:51, Mike Kravetz wrote:
> This patch came out of discussions in this e-mail thread [1].
> 
> The Oracle JVM team is developing a new garbage collection model.  This
> new model requires multiple mappings of the same anonymous memory.  One
> straight forward way to accomplish this is with memfd_create.  They can
> use the returned fd to create multiple mappings of the same memory.
> 
> The JVM today has an option to use (static hugetlb) huge pages.  If this
> option is specified, they would like to use the same garbage collection
> model requiring multiple mappings to the same memory.  Using hugetlbfs,
> it is possible to explicitly mount a filesystem and specify file paths
> in order to get an fd that can be used for multiple mappings.  However,
> this introduces additional system admin work and coordination.
> 
> Ideally they would like to get a hugetlbfs fd without requiring explicit
> mounting of a filesystem.   Today, mmap and shmget can make use of
> hugetlbfs without explicitly mounting a filesystem.  The patch adds this
> functionality to hugetlbfs.
> 
> A new flag MFD_HUGETLB is introduced to request a hugetlbfs file.  Like
> other system calls where hugetlb can be requested, the huge page size
> can be encoded in the flags argument is the non-default huge page size
> is desired.  hugetlbfs does not support sealing operations, therefore
> specifying MFD_ALLOW_SEALING with MFD_HUGETLB will result in EINVAL.
> 
> Of course, the memfd_man page would need updating if this type of
> functionality moves forward.
> 
> [1] https://lkml.org/lkml/2017/7/6/564
> 
> Mike Kravetz (1):
>   mm/shmem: add hugetlbfs support to memfd_create()
> 
>  include/uapi/linux/memfd.h | 24 
>  mm/shmem.c | 37 +++--
>  2 files changed, 55 insertions(+), 6 deletions(-)
> 
> -- 
> 2.7.5

-- 
Michal Hocko
SUSE Labs


Re: [PATCH] mm: ratelimit PFNs busy info message

2017-08-07 Thread Michael Ellerman
Andrew Morton  writes:

> On Wed,  2 Aug 2017 13:44:57 -0400 Jonathan Toppins  
> wrote:
>
>> The RDMA subsystem can generate several thousand of these messages per
>> second eventually leading to a kernel crash. Ratelimit these messages
>> to prevent this crash.
>
> Well...  why are all these EBUSY's occurring?  It sounds inefficient (at
> least) but if it is expected, normal and unavoidable then perhaps we
> should just remove that message altogether?

We see them on powerpc sometimes when CMA is unable to make large
allocations for the hash table of a KVM guest.

At least in that context they're not useful, CMA will try the
allocation again, and if it really can't allocate then CMA will print
more useful information itself.

So I'd vote for dropping the message and letting the callers decide what
to do.

cheers


Re: [PATCH] mm: ratelimit PFNs busy info message

2017-08-07 Thread Michael Ellerman
Andrew Morton  writes:

> On Wed,  2 Aug 2017 13:44:57 -0400 Jonathan Toppins  
> wrote:
>
>> The RDMA subsystem can generate several thousand of these messages per
>> second eventually leading to a kernel crash. Ratelimit these messages
>> to prevent this crash.
>
> Well...  why are all these EBUSY's occurring?  It sounds inefficient (at
> least) but if it is expected, normal and unavoidable then perhaps we
> should just remove that message altogether?

We see them on powerpc sometimes when CMA is unable to make large
allocations for the hash table of a KVM guest.

At least in that context they're not useful, CMA will try the
allocation again, and if it really can't allocate then CMA will print
more useful information itself.

So I'd vote for dropping the message and letting the callers decide what
to do.

cheers


Re: [RESEND PATCH] bcache: Don't reinvent the wheel but use existing llist API

2017-08-07 Thread Coly Li
On 2017/8/8 下午12:12, Byungchul Park wrote:
> On Mon, Aug 07, 2017 at 06:18:35PM +0800, Coly Li wrote:
>> On 2017/8/7 下午4:38, Byungchul Park wrote:
>>> Although llist provides proper APIs, they are not used. Make them used.
>>>
>>> Signed-off-by: Byungchul Park > Only have a question about why not using llist_for_each_entry(), it's
> 
> Hello,
> 
> The reason is to keep the original logic unchanged. The logic already
> does as if it's the safe version against removal.
> 
>> still OK with llist_for_each_entry_safe(). The rested part is good to me.
>>
>> Acked-by: Coly Li 
>>
>>> ---
>>>  drivers/md/bcache/closure.c | 17 +++--
>>>  1 file changed, 3 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
>>> index 864e673..1841d03 100644
>>> --- a/drivers/md/bcache/closure.c
>>> +++ b/drivers/md/bcache/closure.c
>>> @@ -64,27 +64,16 @@ void closure_put(struct closure *cl)
>>>  void __closure_wake_up(struct closure_waitlist *wait_list)
>>>  {
>>> struct llist_node *list;
>>> -   struct closure *cl;
>>> +   struct closure *cl, *t;
>>> struct llist_node *reverse = NULL;
>>>  
>>> list = llist_del_all(_list->list);
>>>  
>>> /* We first reverse the list to preserve FIFO ordering and fairness */
>>> -
>>> -   while (list) {
>>> -   struct llist_node *t = list;
>>> -   list = llist_next(list);
>>> -
>>> -   t->next = reverse;
>>> -   reverse = t;
>>> -   }
>>> +   reverse = llist_reverse_order(list);
>>>  
>>> /* Then do the wakeups */
>>> -
>>> -   while (reverse) {
>>> -   cl = container_of(reverse, struct closure, list);
>>> -   reverse = llist_next(reverse);
>>> -
>>> +   llist_for_each_entry_safe(cl, t, reverse, list) {
>>
>> Just wondering why not using llist_for_each_entry(), or you use the
>> _safe version on purpose ?
> 
> If I use llist_for_each_entry(), then it would change the original
> behavior. Is it ok?
> 

I feel llist_for_each_entry() keeps the original behavior, and variable
't' can be removed. Anyway, either llist_for_each_entry() or
llist_for_each_entry_safe() works correctly and well here. Any one you
use is OK to me, thanks for your informative reply :-)



-- 
Coly Li


Re: [RESEND PATCH] bcache: Don't reinvent the wheel but use existing llist API

2017-08-07 Thread Coly Li
On 2017/8/8 下午12:12, Byungchul Park wrote:
> On Mon, Aug 07, 2017 at 06:18:35PM +0800, Coly Li wrote:
>> On 2017/8/7 下午4:38, Byungchul Park wrote:
>>> Although llist provides proper APIs, they are not used. Make them used.
>>>
>>> Signed-off-by: Byungchul Park > Only have a question about why not using llist_for_each_entry(), it's
> 
> Hello,
> 
> The reason is to keep the original logic unchanged. The logic already
> does as if it's the safe version against removal.
> 
>> still OK with llist_for_each_entry_safe(). The rested part is good to me.
>>
>> Acked-by: Coly Li 
>>
>>> ---
>>>  drivers/md/bcache/closure.c | 17 +++--
>>>  1 file changed, 3 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
>>> index 864e673..1841d03 100644
>>> --- a/drivers/md/bcache/closure.c
>>> +++ b/drivers/md/bcache/closure.c
>>> @@ -64,27 +64,16 @@ void closure_put(struct closure *cl)
>>>  void __closure_wake_up(struct closure_waitlist *wait_list)
>>>  {
>>> struct llist_node *list;
>>> -   struct closure *cl;
>>> +   struct closure *cl, *t;
>>> struct llist_node *reverse = NULL;
>>>  
>>> list = llist_del_all(_list->list);
>>>  
>>> /* We first reverse the list to preserve FIFO ordering and fairness */
>>> -
>>> -   while (list) {
>>> -   struct llist_node *t = list;
>>> -   list = llist_next(list);
>>> -
>>> -   t->next = reverse;
>>> -   reverse = t;
>>> -   }
>>> +   reverse = llist_reverse_order(list);
>>>  
>>> /* Then do the wakeups */
>>> -
>>> -   while (reverse) {
>>> -   cl = container_of(reverse, struct closure, list);
>>> -   reverse = llist_next(reverse);
>>> -
>>> +   llist_for_each_entry_safe(cl, t, reverse, list) {
>>
>> Just wondering why not using llist_for_each_entry(), or you use the
>> _safe version on purpose ?
> 
> If I use llist_for_each_entry(), then it would change the original
> behavior. Is it ok?
> 

I feel llist_for_each_entry() keeps the original behavior, and variable
't' can be removed. Anyway, either llist_for_each_entry() or
llist_for_each_entry_safe() works correctly and well here. Any one you
use is OK to me, thanks for your informative reply :-)



-- 
Coly Li


Re: [PATCH] arm64: correct modules range of kernel virtual memory layout

2017-08-07 Thread Miles Chen
On Tue, 2017-08-08 at 12:44 +0800, Miles Chen wrote:
> On Mon, 2017-08-07 at 15:01 +0100, Will Deacon wrote:
> > On Mon, Aug 07, 2017 at 02:18:00PM +0100, Ard Biesheuvel wrote:
> > > On 7 August 2017 at 14:16, Will Deacon  wrote:
> > > > On Mon, Aug 07, 2017 at 07:04:46PM +0800, Miles Chen wrote:
> > > >> The commit f80fb3a3d508 ("arm64: add support for kernel ASLR")
> > > >> moved module virtual address to
> > > >> [module_alloc_base, module_alloc_base + MODULES_VSIZE).
> > > >>
> > > >> Display module information of the virtual kernel
> > > >> memory layout by using module_alloc_base.
> > > >>
> > > >> testing output:
> > > >> 1) Current implementation:
> > > >> Virtual kernel memory layout:
> > > >>   modules : 0xff80 - 0xff800800   (   128 MB)
> > > >> 2) this patch + KASLR:
> > > >> Virtual kernel memory layout:
> > > >>   modules : 0xff800056 - 0xff800856   (   128 MB)
> > > >> 3) this patch + KASLR and a dummy seed:
> > > >> Virtual kernel memory layout:
> > > >>   modules : 0xffa7df637000 - 0xffa7e7637000   (   128 MB)
> > > >>
> > > >> Signed-off-by: Miles Chen 
> > > >> ---
> > > >>  arch/arm64/mm/init.c | 5 +++--
> > > >>  1 file changed, 3 insertions(+), 2 deletions(-)
> > > >
> > > > Does this mean the modules code in our pt dumper is busted
> > > > (arch/arm64/mm/dump.c)? Also, what about KASAN, which uses these 
> > > > addresses
> > > > too (in kasan_init)? Should we just remove MODULES_VADDR and MODULES_END
> > > > altogether?
> > > >
> > > 
> > > I don't think we need this patch. The 'module' line simply prints the
> > > VA region that is reserved for modules. The fact that we end up
> > > putting them elsewhere when running randomized does not necessarily
> > > mean this line should reflect that.
> > 
> > I was more concerned by other users of MODULES_VADDR tbh, although I see
> > now that we don't randomize the module region if kasan is enabled. Still,
> > the kcore code adds the modules region as a separate area (distinct from
> > vmalloc) if MODULES_VADDR is defined, the page table dumping code uses
> > MODULES_VADDR to identify the module region and I think we'll get false
> > positives from is_vmalloc_or_module_addr, which again uses the static
> > region.
> > 
> > So, given that MODULES_VADDR never points at the module area, can't we get
> > rid of it?
> 
> Agreed.MODULES_VADDR should be phased out. Considering the kernel
> modules live somewhere between [VMALLOC_START, VMALLOC_END) now:
> (arch/arm64/kernel/module.c:module_alloc). I suggest the following
> changes:
> 
> 1. is_vmalloc_or_module_addr() should return is_vmalloc_addr() directly
> 2. arch/arm64/mm/dump.c does not need MODULES_VADDR and MODULES_END.
> 3. kasan uses [module_alloc_base, module_alloc_base + MODULES_VSIZE) to
> get the shadow memory? (the kernel modules still live in this range when
> kasan is enabled)
> 4. remove modules line in kernel memory layout
> (optional, thanks for Ard's feedback)
> 5. remove MODULE_VADDR, MODULES_END definition

I was wrong about this. is_vmalloc_or_module_addr() is defined
in mm/vmalloc and it uses MODULES_VADDR and MODULES_END.
May it is better to give MODULES_VADDR and MODULES_END
proper values, not remove them.

> Miles
> > 
> > Will
> 
> 




Re: [PATCH] arm64: correct modules range of kernel virtual memory layout

2017-08-07 Thread Miles Chen
On Tue, 2017-08-08 at 12:44 +0800, Miles Chen wrote:
> On Mon, 2017-08-07 at 15:01 +0100, Will Deacon wrote:
> > On Mon, Aug 07, 2017 at 02:18:00PM +0100, Ard Biesheuvel wrote:
> > > On 7 August 2017 at 14:16, Will Deacon  wrote:
> > > > On Mon, Aug 07, 2017 at 07:04:46PM +0800, Miles Chen wrote:
> > > >> The commit f80fb3a3d508 ("arm64: add support for kernel ASLR")
> > > >> moved module virtual address to
> > > >> [module_alloc_base, module_alloc_base + MODULES_VSIZE).
> > > >>
> > > >> Display module information of the virtual kernel
> > > >> memory layout by using module_alloc_base.
> > > >>
> > > >> testing output:
> > > >> 1) Current implementation:
> > > >> Virtual kernel memory layout:
> > > >>   modules : 0xff80 - 0xff800800   (   128 MB)
> > > >> 2) this patch + KASLR:
> > > >> Virtual kernel memory layout:
> > > >>   modules : 0xff800056 - 0xff800856   (   128 MB)
> > > >> 3) this patch + KASLR and a dummy seed:
> > > >> Virtual kernel memory layout:
> > > >>   modules : 0xffa7df637000 - 0xffa7e7637000   (   128 MB)
> > > >>
> > > >> Signed-off-by: Miles Chen 
> > > >> ---
> > > >>  arch/arm64/mm/init.c | 5 +++--
> > > >>  1 file changed, 3 insertions(+), 2 deletions(-)
> > > >
> > > > Does this mean the modules code in our pt dumper is busted
> > > > (arch/arm64/mm/dump.c)? Also, what about KASAN, which uses these 
> > > > addresses
> > > > too (in kasan_init)? Should we just remove MODULES_VADDR and MODULES_END
> > > > altogether?
> > > >
> > > 
> > > I don't think we need this patch. The 'module' line simply prints the
> > > VA region that is reserved for modules. The fact that we end up
> > > putting them elsewhere when running randomized does not necessarily
> > > mean this line should reflect that.
> > 
> > I was more concerned by other users of MODULES_VADDR tbh, although I see
> > now that we don't randomize the module region if kasan is enabled. Still,
> > the kcore code adds the modules region as a separate area (distinct from
> > vmalloc) if MODULES_VADDR is defined, the page table dumping code uses
> > MODULES_VADDR to identify the module region and I think we'll get false
> > positives from is_vmalloc_or_module_addr, which again uses the static
> > region.
> > 
> > So, given that MODULES_VADDR never points at the module area, can't we get
> > rid of it?
> 
> Agreed.MODULES_VADDR should be phased out. Considering the kernel
> modules live somewhere between [VMALLOC_START, VMALLOC_END) now:
> (arch/arm64/kernel/module.c:module_alloc). I suggest the following
> changes:
> 
> 1. is_vmalloc_or_module_addr() should return is_vmalloc_addr() directly
> 2. arch/arm64/mm/dump.c does not need MODULES_VADDR and MODULES_END.
> 3. kasan uses [module_alloc_base, module_alloc_base + MODULES_VSIZE) to
> get the shadow memory? (the kernel modules still live in this range when
> kasan is enabled)
> 4. remove modules line in kernel memory layout
> (optional, thanks for Ard's feedback)
> 5. remove MODULE_VADDR, MODULES_END definition

I was wrong about this. is_vmalloc_or_module_addr() is defined
in mm/vmalloc and it uses MODULES_VADDR and MODULES_END.
May it is better to give MODULES_VADDR and MODULES_END
proper values, not remove them.

> Miles
> > 
> > Will
> 
> 




[PATCH 2/2] ARM: dts: am572x-idk: Fix GPIO polarity for MMC1 card detect

2017-08-07 Thread Kishon Vijay Abraham I
The GPIO polarity for MMC1 card detect is set to '0' which means
active-high. However the polarity should be active-low. Fix it
here.

Signed-off-by: Kishon Vijay Abraham I 
---
 arch/arm/boot/dts/am572x-idk.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/am572x-idk.dts b/arch/arm/boot/dts/am572x-idk.dts
index 9da6d83ca185..940fcbe5380b 100644
--- a/arch/arm/boot/dts/am572x-idk.dts
+++ b/arch/arm/boot/dts/am572x-idk.dts
@@ -81,7 +81,7 @@
vmmc-supply = <_3d>;
vmmc_aux-supply = <_reg>;
bus-width = <4>;
-   cd-gpios = < 27 0>; /* gpio 219 */
+   cd-gpios = < 27 GPIO_ACTIVE_LOW>; /* gpio 219 */
 };
 
  {
-- 
2.11.0



[PATCH 1/2] ARM: dts: am571x-idk: Fix GPIO polarity for MMC1 card detect

2017-08-07 Thread Kishon Vijay Abraham I
The GPIO polarity for MMC1 card detect is set to '0' which means
active-high. However the polarity should be active-low. Fix it
here.

Signed-off-by: Kishon Vijay Abraham I 
---
 arch/arm/boot/dts/am571x-idk.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts
index 7b207835b2d1..adc70fb091a2 100644
--- a/arch/arm/boot/dts/am571x-idk.dts
+++ b/arch/arm/boot/dts/am571x-idk.dts
@@ -68,7 +68,7 @@
status = "okay";
vmmc-supply = <_reg>;
bus-width = <4>;
-   cd-gpios = < 27 0>; /* gpio 219 */
+   cd-gpios = < 27 GPIO_ACTIVE_LOW>; /* gpio 219 */
 };
 
 _dwc3_2 {
-- 
2.11.0



[PATCH 2/2] ARM: dts: am572x-idk: Fix GPIO polarity for MMC1 card detect

2017-08-07 Thread Kishon Vijay Abraham I
The GPIO polarity for MMC1 card detect is set to '0' which means
active-high. However the polarity should be active-low. Fix it
here.

Signed-off-by: Kishon Vijay Abraham I 
---
 arch/arm/boot/dts/am572x-idk.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/am572x-idk.dts b/arch/arm/boot/dts/am572x-idk.dts
index 9da6d83ca185..940fcbe5380b 100644
--- a/arch/arm/boot/dts/am572x-idk.dts
+++ b/arch/arm/boot/dts/am572x-idk.dts
@@ -81,7 +81,7 @@
vmmc-supply = <_3d>;
vmmc_aux-supply = <_reg>;
bus-width = <4>;
-   cd-gpios = < 27 0>; /* gpio 219 */
+   cd-gpios = < 27 GPIO_ACTIVE_LOW>; /* gpio 219 */
 };
 
  {
-- 
2.11.0



[PATCH 1/2] ARM: dts: am571x-idk: Fix GPIO polarity for MMC1 card detect

2017-08-07 Thread Kishon Vijay Abraham I
The GPIO polarity for MMC1 card detect is set to '0' which means
active-high. However the polarity should be active-low. Fix it
here.

Signed-off-by: Kishon Vijay Abraham I 
---
 arch/arm/boot/dts/am571x-idk.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts
index 7b207835b2d1..adc70fb091a2 100644
--- a/arch/arm/boot/dts/am571x-idk.dts
+++ b/arch/arm/boot/dts/am571x-idk.dts
@@ -68,7 +68,7 @@
status = "okay";
vmmc-supply = <_reg>;
bus-width = <4>;
-   cd-gpios = < 27 0>; /* gpio 219 */
+   cd-gpios = < 27 GPIO_ACTIVE_LOW>; /* gpio 219 */
 };
 
 _dwc3_2 {
-- 
2.11.0



[PATCH] mmc: host: omap_hsmmc: Add CMD23 capability to omap_hsmmc driver

2017-08-07 Thread Kishon Vijay Abraham I
omap_hsmmc driver always relied on CMD12 to stop transmission.
However if CMD12 is not issued at the correct timing, the card will
indicate a out of range error. With certain cards in some of the
DRA7 based boards, -EIO error is observed. By Adding CMD23 capability,
the MMC core will send MMC_SET_BLOCK_COUNT command before
MMC_READ_MULTIPLE_BLOCK/MMC_WRITE_MULTIPLE_BLOCK commands.

commit a04e6bae9e6f12 ("mmc: core: check also R1 response for
stop commands") exposed this bug in omap_hsmmc driver.

Signed-off-by: Kishon Vijay Abraham I 
---
 drivers/mmc/host/omap_hsmmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 04ff3c97a535..2ab4788d021f 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -2086,7 +2086,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
mmc->max_seg_size = mmc->max_req_size;
 
mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
-MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE;
+MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE | MMC_CAP_CMD23;
 
mmc->caps |= mmc_pdata(host)->caps;
if (mmc->caps & MMC_CAP_8_BIT_DATA)
-- 
2.11.0



[PATCH] mmc: host: omap_hsmmc: Add CMD23 capability to omap_hsmmc driver

2017-08-07 Thread Kishon Vijay Abraham I
omap_hsmmc driver always relied on CMD12 to stop transmission.
However if CMD12 is not issued at the correct timing, the card will
indicate a out of range error. With certain cards in some of the
DRA7 based boards, -EIO error is observed. By Adding CMD23 capability,
the MMC core will send MMC_SET_BLOCK_COUNT command before
MMC_READ_MULTIPLE_BLOCK/MMC_WRITE_MULTIPLE_BLOCK commands.

commit a04e6bae9e6f12 ("mmc: core: check also R1 response for
stop commands") exposed this bug in omap_hsmmc driver.

Signed-off-by: Kishon Vijay Abraham I 
---
 drivers/mmc/host/omap_hsmmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 04ff3c97a535..2ab4788d021f 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -2086,7 +2086,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
mmc->max_seg_size = mmc->max_req_size;
 
mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
-MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE;
+MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE | MMC_CAP_CMD23;
 
mmc->caps |= mmc_pdata(host)->caps;
if (mmc->caps & MMC_CAP_8_BIT_DATA)
-- 
2.11.0



linux-next: manual merge of the userns tree with the mips tree

2017-08-07 Thread Stephen Rothwell
Hi Eric,

Today's linux-next merge of the userns tree got a conflict in:

  arch/mips/kernel/traps.c

between commit:

  260a789828aa ("MIPS: signal: Remove unreachable code from force_fcr31_sig().")

from the mips tree and commit:

  ea1b75cf9138 ("signal/mips: Document a conflict with SI_USER with SIGFPE")

from the userns tree.

I fixed it up (the former removed the code updated by the latter) and
can carry the fix as necessary. This is now fixed as far as linux-next
is concerned, but any non trivial conflicts should be mentioned to your
upstream maintainer when your tree is submitted for merging.  You may
also want to consider cooperating with the maintainer of the conflicting
tree to minimise any particularly complex conflicts.

-- 
Cheers,
Stephen Rothwell


linux-next: manual merge of the userns tree with the mips tree

2017-08-07 Thread Stephen Rothwell
Hi Eric,

Today's linux-next merge of the userns tree got a conflict in:

  arch/mips/kernel/traps.c

between commit:

  260a789828aa ("MIPS: signal: Remove unreachable code from force_fcr31_sig().")

from the mips tree and commit:

  ea1b75cf9138 ("signal/mips: Document a conflict with SI_USER with SIGFPE")

from the userns tree.

I fixed it up (the former removed the code updated by the latter) and
can carry the fix as necessary. This is now fixed as far as linux-next
is concerned, but any non trivial conflicts should be mentioned to your
upstream maintainer when your tree is submitted for merging.  You may
also want to consider cooperating with the maintainer of the conflicting
tree to minimise any particularly complex conflicts.

-- 
Cheers,
Stephen Rothwell


[PATCH v2] scheduler: enhancement to show_state_filter

2017-08-07 Thread Yafang Shao
Sometimes we want to get tasks in TASK_RUNNING sepcifically,
instead of dump all tasks.
For example, when the loadavg are high, we want to dump
tasks in TASK_RUNNING and TASK_UNINTERRUPTIBLE, which contribute
to system load. But mostly there're lots of tasks in Sleep state,
which occupies almost all of the kernel log buffer, even overflows
it, that causes the useful messages get lost. Although we can
enlarge the kernel log buffer, but that's not a good idea.

So I made this change to make the show_state_filter more flexible,
and then we can dump the tasks in TASK_RUNNING specifically.

Signed-off-by: Yafang Shao 
---
 drivers/tty/sysrq.c | 2 +-
 include/linux/sched.h   | 1 +
 include/linux/sched/debug.h | 6 --
 kernel/sched/core.c | 7 ---
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 3ffc1ce..86db51b 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -291,7 +291,7 @@ static void sysrq_handle_showstate(int key)
 
 static void sysrq_handle_showstate_blocked(int key)
 {
-   show_state_filter(TASK_UNINTERRUPTIBLE);
+   show_state_filter(TASK_UNINTERRUPTIBLE << 1);
 }
 static struct sysrq_key_op sysrq_showstate_blocked_op = {
.handler= sysrq_handle_showstate_blocked,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8337e2d..318f149 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -82,6 +82,7 @@
 #define TASK_NOLOAD1024
 #define TASK_NEW   2048
 #define TASK_STATE_MAX 4096
+#define TASK_ALL_BITS  ((TASK_STATE_MAX << 1) - 1)
 
 #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
 
diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
index e0eaee5..c844689 100644
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_SCHED_DEBUG_H
 #define _LINUX_SCHED_DEBUG_H
 
+#include 
+
 /*
  * Various scheduler/task debugging interfaces:
  */
@@ -10,13 +12,13 @@
 extern void dump_cpu_task(int cpu);
 
 /*
- * Only dump TASK_* tasks. (0 for all tasks)
+ * Only dump TASK_* tasks. (TASK_ALL_BITS for all tasks)
  */
 extern void show_state_filter(unsigned long state_filter);
 
 static inline void show_state(void)
 {
-   show_state_filter(0);
+   show_state_filter(TASK_ALL_BITS);
 }
 
 struct pt_regs;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0869b20..f9b9529 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5161,19 +5161,20 @@ void show_state_filter(unsigned long state_filter)
 */
touch_nmi_watchdog();
touch_all_softlockup_watchdogs();
-   if (!state_filter || (p->state & state_filter))
+   /* in case we want to set TASK_RUNNING specifically */
+   if ((p->state != TASK_RUNNING ? p->state << 1 : 1) & 
state_filter)
sched_show_task(p);
}
 
 #ifdef CONFIG_SCHED_DEBUG
-   if (!state_filter)
+   if (state_filter == TASK_ALL_BITS)
sysrq_sched_debug_show();
 #endif
rcu_read_unlock();
/*
 * Only show locks if all tasks are dumped:
 */
-   if (!state_filter)
+   if (state_filter == TASK_ALL_BITS)
debug_show_all_locks();
 }
 
-- 
1.8.3.1



[PATCH v2] scheduler: enhancement to show_state_filter

2017-08-07 Thread Yafang Shao
Sometimes we want to get tasks in TASK_RUNNING sepcifically,
instead of dump all tasks.
For example, when the loadavg are high, we want to dump
tasks in TASK_RUNNING and TASK_UNINTERRUPTIBLE, which contribute
to system load. But mostly there're lots of tasks in Sleep state,
which occupies almost all of the kernel log buffer, even overflows
it, that causes the useful messages get lost. Although we can
enlarge the kernel log buffer, but that's not a good idea.

So I made this change to make the show_state_filter more flexible,
and then we can dump the tasks in TASK_RUNNING specifically.

Signed-off-by: Yafang Shao 
---
 drivers/tty/sysrq.c | 2 +-
 include/linux/sched.h   | 1 +
 include/linux/sched/debug.h | 6 --
 kernel/sched/core.c | 7 ---
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 3ffc1ce..86db51b 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -291,7 +291,7 @@ static void sysrq_handle_showstate(int key)
 
 static void sysrq_handle_showstate_blocked(int key)
 {
-   show_state_filter(TASK_UNINTERRUPTIBLE);
+   show_state_filter(TASK_UNINTERRUPTIBLE << 1);
 }
 static struct sysrq_key_op sysrq_showstate_blocked_op = {
.handler= sysrq_handle_showstate_blocked,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8337e2d..318f149 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -82,6 +82,7 @@
 #define TASK_NOLOAD1024
 #define TASK_NEW   2048
 #define TASK_STATE_MAX 4096
+#define TASK_ALL_BITS  ((TASK_STATE_MAX << 1) - 1)
 
 #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
 
diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
index e0eaee5..c844689 100644
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_SCHED_DEBUG_H
 #define _LINUX_SCHED_DEBUG_H
 
+#include 
+
 /*
  * Various scheduler/task debugging interfaces:
  */
@@ -10,13 +12,13 @@
 extern void dump_cpu_task(int cpu);
 
 /*
- * Only dump TASK_* tasks. (0 for all tasks)
+ * Only dump TASK_* tasks. (TASK_ALL_BITS for all tasks)
  */
 extern void show_state_filter(unsigned long state_filter);
 
 static inline void show_state(void)
 {
-   show_state_filter(0);
+   show_state_filter(TASK_ALL_BITS);
 }
 
 struct pt_regs;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0869b20..f9b9529 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5161,19 +5161,20 @@ void show_state_filter(unsigned long state_filter)
 */
touch_nmi_watchdog();
touch_all_softlockup_watchdogs();
-   if (!state_filter || (p->state & state_filter))
+   /* in case we want to set TASK_RUNNING specifically */
+   if ((p->state != TASK_RUNNING ? p->state << 1 : 1) & 
state_filter)
sched_show_task(p);
}
 
 #ifdef CONFIG_SCHED_DEBUG
-   if (!state_filter)
+   if (state_filter == TASK_ALL_BITS)
sysrq_sched_debug_show();
 #endif
rcu_read_unlock();
/*
 * Only show locks if all tasks are dumped:
 */
-   if (!state_filter)
+   if (state_filter == TASK_ALL_BITS)
debug_show_all_locks();
 }
 
-- 
1.8.3.1



[PATCH] spi/bcm63xx-hspi: fix error return code in bcm63xx_hsspi_probe()

2017-08-07 Thread Gustavo A. R. Silva
platform_get_irq() returns an error code, but the spi-bcm63xx-hsspi
driver ignores it and always returns -ENXIO. This is not correct and,
prevents -EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print and propagate the return value of platform_get_irq on failure.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/spi/spi-bcm63xx-hsspi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c
index 475a790..cbcba61 100644
--- a/drivers/spi/spi-bcm63xx-hsspi.c
+++ b/drivers/spi/spi-bcm63xx-hsspi.c
@@ -338,8 +338,8 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev)
 
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
-   dev_err(dev, "no irq\n");
-   return -ENXIO;
+   dev_err(dev, "no irq: %d\n", irq);
+   return irq;
}
 
res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-- 
2.5.0



[PATCH] spi/bcm63xx-hspi: fix error return code in bcm63xx_hsspi_probe()

2017-08-07 Thread Gustavo A. R. Silva
platform_get_irq() returns an error code, but the spi-bcm63xx-hsspi
driver ignores it and always returns -ENXIO. This is not correct and,
prevents -EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print and propagate the return value of platform_get_irq on failure.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/spi/spi-bcm63xx-hsspi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c
index 475a790..cbcba61 100644
--- a/drivers/spi/spi-bcm63xx-hsspi.c
+++ b/drivers/spi/spi-bcm63xx-hsspi.c
@@ -338,8 +338,8 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev)
 
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
-   dev_err(dev, "no irq\n");
-   return -ENXIO;
+   dev_err(dev, "no irq: %d\n", irq);
+   return irq;
}
 
res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-- 
2.5.0



[PATCH] spi/bcm63xx: fix error return code in bcm63xx_spi_probe()

2017-08-07 Thread Gustavo A. R. Silva
platform_get_irq() returns an error code, but the spi-bcm63xx driver
ignores it and always returns -ENXIO. This is not correct and,
prevents -EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print and propagate the return value of platform_get_irq on failure.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/spi/spi-bcm63xx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
index 84c7356..bfe5754 100644
--- a/drivers/spi/spi-bcm63xx.c
+++ b/drivers/spi/spi-bcm63xx.c
@@ -530,8 +530,8 @@ static int bcm63xx_spi_probe(struct platform_device *pdev)
 
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
-   dev_err(dev, "no irq\n");
-   return -ENXIO;
+   dev_err(dev, "no irq: %d\n", irq);
+   return irq;
}
 
clk = devm_clk_get(dev, "spi");
-- 
2.5.0



[PATCH] spi/bcm63xx: fix error return code in bcm63xx_spi_probe()

2017-08-07 Thread Gustavo A. R. Silva
platform_get_irq() returns an error code, but the spi-bcm63xx driver
ignores it and always returns -ENXIO. This is not correct and,
prevents -EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print and propagate the return value of platform_get_irq on failure.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/spi/spi-bcm63xx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
index 84c7356..bfe5754 100644
--- a/drivers/spi/spi-bcm63xx.c
+++ b/drivers/spi/spi-bcm63xx.c
@@ -530,8 +530,8 @@ static int bcm63xx_spi_probe(struct platform_device *pdev)
 
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
-   dev_err(dev, "no irq\n");
-   return -ENXIO;
+   dev_err(dev, "no irq: %d\n", irq);
+   return irq;
}
 
clk = devm_clk_get(dev, "spi");
-- 
2.5.0



Re: [MD] Crash with 4.12+ kernel and high disk load -- bisected to 4ad23a976413: MD: use per-cpu counter for writes_pending

2017-08-07 Thread Shaohua Li
On Mon, Aug 07, 2017 at 01:20:25PM +0200, Dominik Brodowski wrote:
> Neil, Shaohua,
> 
> following up on David R's bug message: I have observed something similar
> on v4.12.[345] and v4.13-rc4, but not on v4.11. This is a RAID1 (on bare
> metal partitions, /dev/sdaX and /dev/sdbY linked together). In case it
> matters: Further upwards are cryptsetup, a DM volume group, then logical
> volumes, and then filesystems (ext4, but also happened with xfs).
> 
> In a tedious bisect (the bug wasn't as quickly reproducible as I would like,
> but happened when I repeatedly created large lvs and filled them with some
> content, while compiling kernels in parallel), I was able to track this
> down to:
> 
> 
> commit 4ad23a976413aa57fe5ba7a25953dc35ccca5b71
> Author: NeilBrown 
> Date:   Wed Mar 15 14:05:14 2017 +1100
> 
> MD: use per-cpu counter for writes_pending
> 
> The 'writes_pending' counter is used to determine when the
> array is stable so that it can be marked in the superblock
> as "Clean".  Consequently it needs to be updated frequently
> but only checked for zero occasionally.  Recent changes to
> raid5 cause the count to be updated even more often - once
> per 4K rather than once per bio.  This provided
> justification for making the updates more efficient.
> 
> ...
> 
> 
> CC'ing t...@kernel.org, as 4ad23a976413 is the first (and only?) user
> of percpu_ref_switch_to_atomic_sync() introduced in 210f7cdcf088.
> 
> Applying a415c0f10627 on top of 4ad23a976413 does *not* fix the issue, but
> reverting all of a2bfc6753065, a415c0f10627 and 4ad23a976413 seems to fix
> the issue for v4.12.5.

Spent some time to check this one, unfortunately I can't find how that patch
makes rcu stall. the percpu part looks good to me too. Can you double check if
reverting 4ad23a976413aa57 makes the issue go away? When the rcu stall happens,
what the /sys/block/md/md0/array_state? please also attach /proc/mdstat. When
you say the mdx_raid1 threads are in 'R' state, can you double check if the
/proc/pid/stack always 0xff?

Thanks,
Shaohua
> In addition, I can provide the following stack traces, which appear in dmesg
> around the time the system becomes more or less unusuable, with one or more
> of the md[0123]_raid1 threads in the "R" state.
> 
> ...  ...
> [  142.275244] INFO: rcu_sched self-detected stall on CPU
> [  142.275386]  4-...: (5999 ticks this GP) idle=d8a/141/0 
> softirq=2404/2404 fqs=2954
> [  142.275441]   (t=6000 jiffies g=645 c=644 q=199031)
> [  142.275490] NMI backtrace for cpu 4
> [  142.275537] CPU: 4 PID: 1164 Comm: md2_raid1 Not tainted 4.12.4 #2
> [  142.275586] Hardware name: MSI MS-7522/MSI X58 Pro (MS-7522)  , BIOS 
> V8.14B8 11/09/2012
> [  142.275640] Call Trace:
> [  142.275683]  
> [  142.275728]  dump_stack+0x4d/0x6a
> [  142.275775]  nmi_cpu_backtrace+0x9b/0xa0
> [  142.275822]  ? irq_force_complete_move+0xf0/0xf0
> [  142.275869]  nmi_trigger_cpumask_backtrace+0x8f/0xc0
> [  142.275918]  arch_trigger_cpumask_backtrace+0x14/0x20
> [  142.275967]  rcu_dump_cpu_stacks+0x8f/0xd9
> [  142.276016]  rcu_check_callbacks+0x62e/0x780
> [  142.276064]  ? acct_account_cputime+0x17/0x20
> [  142.276111]  update_process_times+0x2a/0x50
> [  142.276159]  tick_sched_handle.isra.18+0x2d/0x30
> [  142.276222]  tick_sched_timer+0x38/0x70
> [  142.276283]  __hrtimer_run_queues+0xbe/0x120
> [  142.276345]  hrtimer_interrupt+0xa3/0x190
> [  142.276408]  local_apic_timer_interrupt+0x33/0x60
> [  142.276471]  smp_apic_timer_interrupt+0x33/0x50
> [  142.276534]  apic_timer_interrupt+0x86/0x90
> [  142.276598] RIP: 0010:__wake_up+0x44/0x50
> [  142.276658] RSP: 0018:c9f8fd88 EFLAGS: 0246 ORIG_RAX: 
> ff10
> [  142.276742] RAX: 81a84bc0 RBX: 880235cf8800 RCX: 
> 
> [  142.276809] RDX: 81a84bd8 RSI: 0246 RDI: 
> 81a84bd0
> [  142.276876] RBP: c9f8fd98 R08:  R09: 
> 0001
> [  142.276943] R10:  R11:  R12: 
> 880235cf8800
> [  142.277009] R13: 880235eb2c28 R14: 0001 R15: 
> 
> [  142.277076]  
> [  142.277136]  md_check_recovery+0x30b/0x4a0
> [  142.277199]  raid1d+0x4c/0x810
> [  142.277258]  md_thread+0x11a/0x150
> [  142.277319]  ? md_thread+0x11a/0x150
> [  142.277379]  ? __wake_up_common+0x80/0x80
> [  142.277442]  kthread+0x11a/0x150
> [  142.277502]  ? find_pers+0x70/0x70
> [  142.277562]  ? __kthread_create_on_node+0x140/0x140
> [  142.277625]  ret_from_fork+0x22/0x30
> 
> ... or this one (on v4.12.5):
> [ 1294.560172] INFO: rcu_sched self-detected stall on CPU  
> [ 1294.560285]  2-...: (6000 ticks this GP) idle=f06/141/0 
> softirq=140681/140681 fqs=2988
> [ 1294.560365]   (t=6001 jiffies g=28666 c=28665 q=129416)
> [ 1294.560426] NMI backtrace for cpu 2
> [ 1294.560483] CPU: 2 PID: 1173 Comm: md3_raid1 Not tainted 4.12.5 #1
> [ 1294.560543] 

Re: [MD] Crash with 4.12+ kernel and high disk load -- bisected to 4ad23a976413: MD: use per-cpu counter for writes_pending

2017-08-07 Thread Shaohua Li
On Mon, Aug 07, 2017 at 01:20:25PM +0200, Dominik Brodowski wrote:
> Neil, Shaohua,
> 
> following up on David R's bug message: I have observed something similar
> on v4.12.[345] and v4.13-rc4, but not on v4.11. This is a RAID1 (on bare
> metal partitions, /dev/sdaX and /dev/sdbY linked together). In case it
> matters: Further upwards are cryptsetup, a DM volume group, then logical
> volumes, and then filesystems (ext4, but also happened with xfs).
> 
> In a tedious bisect (the bug wasn't as quickly reproducible as I would like,
> but happened when I repeatedly created large lvs and filled them with some
> content, while compiling kernels in parallel), I was able to track this
> down to:
> 
> 
> commit 4ad23a976413aa57fe5ba7a25953dc35ccca5b71
> Author: NeilBrown 
> Date:   Wed Mar 15 14:05:14 2017 +1100
> 
> MD: use per-cpu counter for writes_pending
> 
> The 'writes_pending' counter is used to determine when the
> array is stable so that it can be marked in the superblock
> as "Clean".  Consequently it needs to be updated frequently
> but only checked for zero occasionally.  Recent changes to
> raid5 cause the count to be updated even more often - once
> per 4K rather than once per bio.  This provided
> justification for making the updates more efficient.
> 
> ...
> 
> 
> CC'ing t...@kernel.org, as 4ad23a976413 is the first (and only?) user
> of percpu_ref_switch_to_atomic_sync() introduced in 210f7cdcf088.
> 
> Applying a415c0f10627 on top of 4ad23a976413 does *not* fix the issue, but
> reverting all of a2bfc6753065, a415c0f10627 and 4ad23a976413 seems to fix
> the issue for v4.12.5.

Spent some time to check this one, unfortunately I can't find how that patch
makes rcu stall. the percpu part looks good to me too. Can you double check if
reverting 4ad23a976413aa57 makes the issue go away? When the rcu stall happens,
what the /sys/block/md/md0/array_state? please also attach /proc/mdstat. When
you say the mdx_raid1 threads are in 'R' state, can you double check if the
/proc/pid/stack always 0xff?

Thanks,
Shaohua
> In addition, I can provide the following stack traces, which appear in dmesg
> around the time the system becomes more or less unusuable, with one or more
> of the md[0123]_raid1 threads in the "R" state.
> 
> ...  ...
> [  142.275244] INFO: rcu_sched self-detected stall on CPU
> [  142.275386]  4-...: (5999 ticks this GP) idle=d8a/141/0 
> softirq=2404/2404 fqs=2954
> [  142.275441]   (t=6000 jiffies g=645 c=644 q=199031)
> [  142.275490] NMI backtrace for cpu 4
> [  142.275537] CPU: 4 PID: 1164 Comm: md2_raid1 Not tainted 4.12.4 #2
> [  142.275586] Hardware name: MSI MS-7522/MSI X58 Pro (MS-7522)  , BIOS 
> V8.14B8 11/09/2012
> [  142.275640] Call Trace:
> [  142.275683]  
> [  142.275728]  dump_stack+0x4d/0x6a
> [  142.275775]  nmi_cpu_backtrace+0x9b/0xa0
> [  142.275822]  ? irq_force_complete_move+0xf0/0xf0
> [  142.275869]  nmi_trigger_cpumask_backtrace+0x8f/0xc0
> [  142.275918]  arch_trigger_cpumask_backtrace+0x14/0x20
> [  142.275967]  rcu_dump_cpu_stacks+0x8f/0xd9
> [  142.276016]  rcu_check_callbacks+0x62e/0x780
> [  142.276064]  ? acct_account_cputime+0x17/0x20
> [  142.276111]  update_process_times+0x2a/0x50
> [  142.276159]  tick_sched_handle.isra.18+0x2d/0x30
> [  142.276222]  tick_sched_timer+0x38/0x70
> [  142.276283]  __hrtimer_run_queues+0xbe/0x120
> [  142.276345]  hrtimer_interrupt+0xa3/0x190
> [  142.276408]  local_apic_timer_interrupt+0x33/0x60
> [  142.276471]  smp_apic_timer_interrupt+0x33/0x50
> [  142.276534]  apic_timer_interrupt+0x86/0x90
> [  142.276598] RIP: 0010:__wake_up+0x44/0x50
> [  142.276658] RSP: 0018:c9f8fd88 EFLAGS: 0246 ORIG_RAX: 
> ff10
> [  142.276742] RAX: 81a84bc0 RBX: 880235cf8800 RCX: 
> 
> [  142.276809] RDX: 81a84bd8 RSI: 0246 RDI: 
> 81a84bd0
> [  142.276876] RBP: c9f8fd98 R08:  R09: 
> 0001
> [  142.276943] R10:  R11:  R12: 
> 880235cf8800
> [  142.277009] R13: 880235eb2c28 R14: 0001 R15: 
> 
> [  142.277076]  
> [  142.277136]  md_check_recovery+0x30b/0x4a0
> [  142.277199]  raid1d+0x4c/0x810
> [  142.277258]  md_thread+0x11a/0x150
> [  142.277319]  ? md_thread+0x11a/0x150
> [  142.277379]  ? __wake_up_common+0x80/0x80
> [  142.277442]  kthread+0x11a/0x150
> [  142.277502]  ? find_pers+0x70/0x70
> [  142.277562]  ? __kthread_create_on_node+0x140/0x140
> [  142.277625]  ret_from_fork+0x22/0x30
> 
> ... or this one (on v4.12.5):
> [ 1294.560172] INFO: rcu_sched self-detected stall on CPU  
> [ 1294.560285]  2-...: (6000 ticks this GP) idle=f06/141/0 
> softirq=140681/140681 fqs=2988
> [ 1294.560365]   (t=6001 jiffies g=28666 c=28665 q=129416)
> [ 1294.560426] NMI backtrace for cpu 2
> [ 1294.560483] CPU: 2 PID: 1173 Comm: md3_raid1 Not tainted 4.12.5 #1
> [ 1294.560543] Hardware name: MSI 

Re: linux-next: Signed-off-by missing for commit in the scsi-mkp tree

2017-08-07 Thread Finn Thain
On Tue, 8 Aug 2017, Stephen Rothwell wrote:

> Hi Martin,
> 
> Commit
> 
>   facfc963ae92 ("scsi: g_NCR5380: Two DTC436 PDMA workarounds")
> 
> is missing a Signed-off-by from its author.
> 

Sorry about that. The patch was a joint effort.

Ondrej, would you please send your "Signed-off-by" tag so that Martin can 
amend this commit (if need be).

-- 


Re: linux-next: Signed-off-by missing for commit in the scsi-mkp tree

2017-08-07 Thread Finn Thain
On Tue, 8 Aug 2017, Stephen Rothwell wrote:

> Hi Martin,
> 
> Commit
> 
>   facfc963ae92 ("scsi: g_NCR5380: Two DTC436 PDMA workarounds")
> 
> is missing a Signed-off-by from its author.
> 

Sorry about that. The patch was a joint effort.

Ondrej, would you please send your "Signed-off-by" tag so that Martin can 
amend this commit (if need be).

-- 


[PATCH] usb: dwc3: omap: fix error return code in dwc3_omap_probe()

2017-08-07 Thread Gustavo A. R. Silva
platform_get_irq() returns an error code, but the dwc3-omap driver
ignores it and always returns -EINVAL. This is not correct and,
prevents -EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print and propagate the return value of platform_get_irq on failure.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/usb/dwc3/dwc3-omap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index f5aaa0c..3530795 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -478,8 +478,8 @@ static int dwc3_omap_probe(struct platform_device *pdev)
 
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
-   dev_err(dev, "missing IRQ resource\n");
-   return -EINVAL;
+   dev_err(dev, "missing IRQ resource: %d\n", irq);
+   return irq;
}
 
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-- 
2.5.0



[PATCH] usb: dwc3: omap: fix error return code in dwc3_omap_probe()

2017-08-07 Thread Gustavo A. R. Silva
platform_get_irq() returns an error code, but the dwc3-omap driver
ignores it and always returns -EINVAL. This is not correct and,
prevents -EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print and propagate the return value of platform_get_irq on failure.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/usb/dwc3/dwc3-omap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index f5aaa0c..3530795 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -478,8 +478,8 @@ static int dwc3_omap_probe(struct platform_device *pdev)
 
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
-   dev_err(dev, "missing IRQ resource\n");
-   return -EINVAL;
+   dev_err(dev, "missing IRQ resource: %d\n", irq);
+   return irq;
}
 
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-- 
2.5.0



Re: [PATCH 0/6] In-kernel QMI handling

2017-08-07 Thread Bjorn Andersson
On Mon 07 Aug 12:19 PDT 2017, Marcel Holtmann wrote:

> Hi Bjorn,
> 
> >>> This series starts by moving the common definitions of the QMUX
> >>> protocol to the
> >>> uapi header, as they are shared with clients - both in kernel and
> >>> userspace.
> >>> 
> >>> This series then introduces in-kernel helper functions for aiding the
> >>> handling
> >>> of QMI encoded messages in the kernel. QMI encoding is a wire-format
> >>> used in
> >>> exchanging messages between the majority of QRTR clients and
> >>> services.
> >> 
> >> This raises a few red-flags for me.
> > 
> > I'm glad it does. In discussions with the responsible team within
> > Qualcomm I've highlighted a number of concerns about enabling this
> > support in the kernel. Together we're continuously looking into what
> > should be pushed out to user space, and trying to not introduce
> > unnecessary new users.
> > 
> >> So far, we've kept almost everything QMI related in userspace and
> >> handled all QMI control-channel messages from libraries like libqmi or
> >> uqmi via the cdc-wdm driver and the "rmnet" interface via the qmi_wwan
> >> driver.  The kernel drivers just serve as the transport.
> >> 
> > 
> > The path that was taken to support the MSM-style devices was to
> > implement net/qrtr, which exposes a socket interface to abstract the
> > physical transports (QMUX or IPCROUTER in Qualcomm terminology).
> > 
> > As I share you view on letting the kernel handle the transportation only
> > the task of keeping track of registered services (service id -> node and
> > port mapping) was done in a user space process and so far we've only
> > ever have to deal with QMI encoded messages in various user space tools.
> 
> I think that the transport and multiplexing can be in the kernel as
> long as it is done as proper subsystem. Similar to Phonet or CAIF.
> Meaning it should have a well defined socket interface that can be
> easily used from userspace, but also a clean in-kernel interface
> handling.
> 

In a mobile Qualcomm device there's a few different components involved
here: message routing, QMUX protocol and QMI-encoding.

The downstream Qualcomm kernel implements the two first in the
IPCROUTER, upstream this is split between the kernel net/qrtr and a user
space service-register implementing the QMUX protocol for knowing where
services are located.

The common encoding of messages passed between endpoints of the message
routing is QMI, which is made an affair totally that of each client.

> If Qualcomm is supportive of this effort and is willing to actually
> assist and/or open some of the specs or interface descriptions, then
> this is a good thing. Service registration and cleanup is really done
> best in the kernel. Same applies to multiplexing. Trying to do
> multiplexing in userspace is always cumbersome and leads to overhead
> that is of no gain. For example within oFono, we had to force
> everything to go via oFono since it was the only sane way of handling
> it. Other approaches were error prone and full of race conditions. You
> need a central entity that can clean up.
> 

The current upstream solution depends on a collaboration between
net/qrtr and the user space service register for figuring out whom to
send messages to. After that muxing et al is handled by the socket
interface and service registry does not need to be involved.

Qualcomm is very supporting of this solution and we're collaborating on
transitioning "downstream" to use this implementation.

> For the definition of an UAPI to share some code, I am actually not
> sure that is such a good idea. For example the QMI code in oFono
> follows a way simpler approach. And I am not convinced that all the
> macros are actually beneficial. For example, the whole netlink macros
> are pretty cumbersome. Adding some Documentation/qmi.txt on how the
> wire format looks like and what is expected seems to be a way better
> approach.
> 

The socket interface provided by the kernel expects some knowledge of
the QMUX protocol, for service management. The majority of this
knowledge is already public, but I agree that it would be good to gather
this in a document. The common data structure for the control message is
what I've put in the uapi, as this is used by anyone dealing with
control messages.

When it comes to the QMI-encoded messages these are application
specific, just like e.g. protobuf definitions are application specific.

As the core infrastructure is becoming available upstream and boards
like the DB410c and DB820c aim to be supported by open solutions we will
have a natural place to discuss publication of at least some of the
application level protocols.

Regards,
Bjorn


[PATCH] spi: xlp: fix error return code in xlp_spi_probe()

2017-08-07 Thread Gustavo A. R. Silva
platform_get_irq() returns an error code, but the spi-xlp driver ignores
it and always returns -EINVAL. This is not correct and, prevents
-EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print and propagate the return value of platform_get_irq on failure.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/spi/spi-xlp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-xlp.c b/drivers/spi/spi-xlp.c
index 80cb4d6..74a01b0 100644
--- a/drivers/spi/spi-xlp.c
+++ b/drivers/spi/spi-xlp.c
@@ -393,8 +393,8 @@ static int xlp_spi_probe(struct platform_device *pdev)
 
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
-   dev_err(>dev, "no IRQ resource found\n");
-   return -EINVAL;
+   dev_err(>dev, "no IRQ resource found: %d\n", irq);
+   return irq;
}
err = devm_request_irq(>dev, irq, xlp_spi_interrupt, 0,
pdev->name, xspi);
-- 
2.5.0



Re: [PATCH 0/6] In-kernel QMI handling

2017-08-07 Thread Bjorn Andersson
On Mon 07 Aug 12:19 PDT 2017, Marcel Holtmann wrote:

> Hi Bjorn,
> 
> >>> This series starts by moving the common definitions of the QMUX
> >>> protocol to the
> >>> uapi header, as they are shared with clients - both in kernel and
> >>> userspace.
> >>> 
> >>> This series then introduces in-kernel helper functions for aiding the
> >>> handling
> >>> of QMI encoded messages in the kernel. QMI encoding is a wire-format
> >>> used in
> >>> exchanging messages between the majority of QRTR clients and
> >>> services.
> >> 
> >> This raises a few red-flags for me.
> > 
> > I'm glad it does. In discussions with the responsible team within
> > Qualcomm I've highlighted a number of concerns about enabling this
> > support in the kernel. Together we're continuously looking into what
> > should be pushed out to user space, and trying to not introduce
> > unnecessary new users.
> > 
> >> So far, we've kept almost everything QMI related in userspace and
> >> handled all QMI control-channel messages from libraries like libqmi or
> >> uqmi via the cdc-wdm driver and the "rmnet" interface via the qmi_wwan
> >> driver.  The kernel drivers just serve as the transport.
> >> 
> > 
> > The path that was taken to support the MSM-style devices was to
> > implement net/qrtr, which exposes a socket interface to abstract the
> > physical transports (QMUX or IPCROUTER in Qualcomm terminology).
> > 
> > As I share you view on letting the kernel handle the transportation only
> > the task of keeping track of registered services (service id -> node and
> > port mapping) was done in a user space process and so far we've only
> > ever have to deal with QMI encoded messages in various user space tools.
> 
> I think that the transport and multiplexing can be in the kernel as
> long as it is done as proper subsystem. Similar to Phonet or CAIF.
> Meaning it should have a well defined socket interface that can be
> easily used from userspace, but also a clean in-kernel interface
> handling.
> 

In a mobile Qualcomm device there's a few different components involved
here: message routing, QMUX protocol and QMI-encoding.

The downstream Qualcomm kernel implements the two first in the
IPCROUTER, upstream this is split between the kernel net/qrtr and a user
space service-register implementing the QMUX protocol for knowing where
services are located.

The common encoding of messages passed between endpoints of the message
routing is QMI, which is made an affair totally that of each client.

> If Qualcomm is supportive of this effort and is willing to actually
> assist and/or open some of the specs or interface descriptions, then
> this is a good thing. Service registration and cleanup is really done
> best in the kernel. Same applies to multiplexing. Trying to do
> multiplexing in userspace is always cumbersome and leads to overhead
> that is of no gain. For example within oFono, we had to force
> everything to go via oFono since it was the only sane way of handling
> it. Other approaches were error prone and full of race conditions. You
> need a central entity that can clean up.
> 

The current upstream solution depends on a collaboration between
net/qrtr and the user space service register for figuring out whom to
send messages to. After that muxing et al is handled by the socket
interface and service registry does not need to be involved.

Qualcomm is very supporting of this solution and we're collaborating on
transitioning "downstream" to use this implementation.

> For the definition of an UAPI to share some code, I am actually not
> sure that is such a good idea. For example the QMI code in oFono
> follows a way simpler approach. And I am not convinced that all the
> macros are actually beneficial. For example, the whole netlink macros
> are pretty cumbersome. Adding some Documentation/qmi.txt on how the
> wire format looks like and what is expected seems to be a way better
> approach.
> 

The socket interface provided by the kernel expects some knowledge of
the QMUX protocol, for service management. The majority of this
knowledge is already public, but I agree that it would be good to gather
this in a document. The common data structure for the control message is
what I've put in the uapi, as this is used by anyone dealing with
control messages.

When it comes to the QMI-encoded messages these are application
specific, just like e.g. protobuf definitions are application specific.

As the core infrastructure is becoming available upstream and boards
like the DB410c and DB820c aim to be supported by open solutions we will
have a natural place to discuss publication of at least some of the
application level protocols.

Regards,
Bjorn


[PATCH] spi: xlp: fix error return code in xlp_spi_probe()

2017-08-07 Thread Gustavo A. R. Silva
platform_get_irq() returns an error code, but the spi-xlp driver ignores
it and always returns -EINVAL. This is not correct and, prevents
-EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print and propagate the return value of platform_get_irq on failure.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/spi/spi-xlp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-xlp.c b/drivers/spi/spi-xlp.c
index 80cb4d6..74a01b0 100644
--- a/drivers/spi/spi-xlp.c
+++ b/drivers/spi/spi-xlp.c
@@ -393,8 +393,8 @@ static int xlp_spi_probe(struct platform_device *pdev)
 
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
-   dev_err(>dev, "no IRQ resource found\n");
-   return -EINVAL;
+   dev_err(>dev, "no IRQ resource found: %d\n", irq);
+   return irq;
}
err = devm_request_irq(>dev, irq, xlp_spi_interrupt, 0,
pdev->name, xspi);
-- 
2.5.0



[PATCH 1/2] PCI: iproc: Implement PCI hotplug support

2017-08-07 Thread Oza Pawandeep
This patch implements PCI hotplug support for iproc family chipsets.

Iproc based SOC (e.g. Stingray) does not have hotplug controller
integrated.
Hence, standard PCI hotplug framework hooks can-not be used.
e.g. controlled power up/down of slot.

The mechanism, for e.g. Stingray has adopted for PCI hotplug is as follows:
PCI present lines are input to GPIOs depending on the type of
connector (x2, x4, x8).

GPIO array needs to be present if hotplug is supported.
HW implementation is SOC/Board specific, and also it depends on how
add-in card is designed
(e.g. how many present pins are implemented).

If x8 card is connected, then it might be possible that all the
3 present pins could go low, or at least one pin goes low.
If x4 card is connected, then it might be possible that 2 present
pins go low, or at least one pin goes low.

The implementation essentially takes care of following:
> Initializing hotplug irq thread.
> Detecting the endpoint device based on link state.
> Handling PERST and detecting the plugged devices.
> Ordered hot plug-out, where User is expected
  to write 1 to /sys/bus/pci/devices//remove
> Handling spurious interrupt
> Handling multiple interrupts and makes sure that card is
  enumerated only once.

Signed-off-by: Oza Pawandeep 
Reviewed-by: Ray Jui 

diff --git a/drivers/pci/host/pcie-iproc-platform.c 
b/drivers/pci/host/pcie-iproc-platform.c
index 9512960..e51bbd2 100644
--- a/drivers/pci/host/pcie-iproc-platform.c
+++ b/drivers/pci/host/pcie-iproc-platform.c
@@ -89,6 +89,9 @@ static int iproc_pcie_pltfm_probe(struct platform_device 
*pdev)
pcie->need_ob_cfg = true;
}
 
+   if (of_property_read_bool(np, "brcm,pci-hotplug"))
+   pcie->enable_hotplug = true;
+
/* PHY use is optional */
pcie->phy = devm_phy_get(dev, "pcie-phy");
if (IS_ERR(pcie->phy)) {
diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
index ee40651..c6d1add 100644
--- a/drivers/pci/host/pcie-iproc.c
+++ b/drivers/pci/host/pcie-iproc.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "pcie-iproc.h"
 
@@ -65,6 +66,17 @@
 #define PCIE_DL_ACTIVE_SHIFT 2
 #define PCIE_DL_ACTIVE   BIT(PCIE_DL_ACTIVE_SHIFT)
 
+#define CFG_RC_LTSSM 0x1cf8
+#define CFG_RC_PHY_CTL   0x1804
+#define CFG_RC_LTSSM_TIMEOUT 1000
+#define CFG_RC_LTSSM_STATE_MASK  0xff
+#define CFG_RC_LTSSM_STATE_L10x1
+
+#define CFG_RC_CLR_LTSSM_HIST_SHIFT  29
+#define CFG_RC_CLR_LTSSM_HIST_MASK   BIT(CFG_RC_CLR_LTSSM_HIST_SHIFT)
+#define CFG_RC_CLR_RECOV_HIST_SHIFT  31
+#define CFG_RC_CLR_RECOV_HIST_MASK   BIT(CFG_RC_CLR_RECOV_HIST_SHIFT)
+
 #define APB_ERR_EN_SHIFT 0
 #define APB_ERR_EN   BIT(APB_ERR_EN_SHIFT)
 
@@ -1306,12 +1318,106 @@ static int iproc_pcie_rev_init(struct iproc_pcie *pcie)
return 0;
 }
 
+static bool iproc_pci_hp_check_ltssm(struct iproc_pcie *pcie)
+{
+   struct pci_bus *bus = pcie->root_bus;
+   u32 val, timeout = CFG_RC_LTSSM_TIMEOUT;
+
+   /* Clear LTSSM history. */
+   pci_bus_read_config_dword(pcie->root_bus, 0,
+ CFG_RC_PHY_CTL, );
+   pci_bus_write_config_dword(bus, 0, CFG_RC_PHY_CTL,
+  val | CFG_RC_CLR_RECOV_HIST_MASK |
+  CFG_RC_CLR_LTSSM_HIST_MASK);
+   /* write back the origional value. */
+   pci_bus_write_config_dword(bus, 0, CFG_RC_PHY_CTL, val);
+
+   do {
+   pci_bus_read_config_dword(pcie->root_bus, 0,
+ CFG_RC_LTSSM, );
+   /* check link state to see if link moved to L1 state. */
+   if ((val & CFG_RC_LTSSM_STATE_MASK) ==
+CFG_RC_LTSSM_STATE_L1)
+   return true;
+   timeout--;
+   usleep_range(500, 1000);
+   } while (timeout);
+
+   return false;
+}
+
+static irqreturn_t iproc_pci_hotplug_thread(int irq, void *data)
+{
+   struct iproc_pcie *pcie = data;
+   struct pci_bus *bus = pcie->root_bus, *child;
+   bool link_status;
+
+   iproc_pcie_perst_ctrl(pcie, true);
+   iproc_pcie_perst_ctrl(pcie, false);
+
+   link_status = iproc_pci_hp_check_ltssm(pcie);
+
+   if (link_status &&
+   !iproc_pcie_check_link(pcie, bus) &&
+   !pcie->ep_is_present) {
+   pci_rescan_bus(bus);
+   list_for_each_entry(child, >children, node)
+   pcie_bus_configure_settings(child);
+   pcie->ep_is_present = true;
+   dev_info(pcie->dev,
+"PCI Hotplug: \n");
+   } else if (link_status && pcie->ep_is_present)
+   /*
+* ep_is_present makes sure, enumuration done only once.
+* So it can handle spurious intrrupts, and also if we
+

[PATCH 0/2] PCI hotplug feature

2017-08-07 Thread Oza Pawandeep
These patches bring in PCI hotplug support for iproc family chipsets.

It includes DT binding documentation update and, implementation in
iproc pcie RC driver.

These patch set is made on top of following patches.
[PATCH v6 2/2] PCI: iproc: add device shutdown for PCI RC
[PATCH v6 1/2] PCI: iproc: Retry request when CRS returned from EP

Oza Pawandeep (2):
  PCI: iproc: Implement PCI hotplug support
  PCI: iproc: Add optional brcm,pci-hotplug

 .../devicetree/bindings/pci/brcm,iproc-pcie.txt|  23 
 drivers/pci/host/pcie-iproc-platform.c |   3 +
 drivers/pci/host/pcie-iproc.c  | 130 -
 drivers/pci/host/pcie-iproc.h  |   7 ++
 4 files changed, 157 insertions(+), 6 deletions(-)

-- 
1.9.1



[PATCH 1/2] PCI: iproc: Implement PCI hotplug support

2017-08-07 Thread Oza Pawandeep
This patch implements PCI hotplug support for iproc family chipsets.

Iproc based SOC (e.g. Stingray) does not have hotplug controller
integrated.
Hence, standard PCI hotplug framework hooks can-not be used.
e.g. controlled power up/down of slot.

The mechanism, for e.g. Stingray has adopted for PCI hotplug is as follows:
PCI present lines are input to GPIOs depending on the type of
connector (x2, x4, x8).

GPIO array needs to be present if hotplug is supported.
HW implementation is SOC/Board specific, and also it depends on how
add-in card is designed
(e.g. how many present pins are implemented).

If x8 card is connected, then it might be possible that all the
3 present pins could go low, or at least one pin goes low.
If x4 card is connected, then it might be possible that 2 present
pins go low, or at least one pin goes low.

The implementation essentially takes care of following:
> Initializing hotplug irq thread.
> Detecting the endpoint device based on link state.
> Handling PERST and detecting the plugged devices.
> Ordered hot plug-out, where User is expected
  to write 1 to /sys/bus/pci/devices//remove
> Handling spurious interrupt
> Handling multiple interrupts and makes sure that card is
  enumerated only once.

Signed-off-by: Oza Pawandeep 
Reviewed-by: Ray Jui 

diff --git a/drivers/pci/host/pcie-iproc-platform.c 
b/drivers/pci/host/pcie-iproc-platform.c
index 9512960..e51bbd2 100644
--- a/drivers/pci/host/pcie-iproc-platform.c
+++ b/drivers/pci/host/pcie-iproc-platform.c
@@ -89,6 +89,9 @@ static int iproc_pcie_pltfm_probe(struct platform_device 
*pdev)
pcie->need_ob_cfg = true;
}
 
+   if (of_property_read_bool(np, "brcm,pci-hotplug"))
+   pcie->enable_hotplug = true;
+
/* PHY use is optional */
pcie->phy = devm_phy_get(dev, "pcie-phy");
if (IS_ERR(pcie->phy)) {
diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
index ee40651..c6d1add 100644
--- a/drivers/pci/host/pcie-iproc.c
+++ b/drivers/pci/host/pcie-iproc.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "pcie-iproc.h"
 
@@ -65,6 +66,17 @@
 #define PCIE_DL_ACTIVE_SHIFT 2
 #define PCIE_DL_ACTIVE   BIT(PCIE_DL_ACTIVE_SHIFT)
 
+#define CFG_RC_LTSSM 0x1cf8
+#define CFG_RC_PHY_CTL   0x1804
+#define CFG_RC_LTSSM_TIMEOUT 1000
+#define CFG_RC_LTSSM_STATE_MASK  0xff
+#define CFG_RC_LTSSM_STATE_L10x1
+
+#define CFG_RC_CLR_LTSSM_HIST_SHIFT  29
+#define CFG_RC_CLR_LTSSM_HIST_MASK   BIT(CFG_RC_CLR_LTSSM_HIST_SHIFT)
+#define CFG_RC_CLR_RECOV_HIST_SHIFT  31
+#define CFG_RC_CLR_RECOV_HIST_MASK   BIT(CFG_RC_CLR_RECOV_HIST_SHIFT)
+
 #define APB_ERR_EN_SHIFT 0
 #define APB_ERR_EN   BIT(APB_ERR_EN_SHIFT)
 
@@ -1306,12 +1318,106 @@ static int iproc_pcie_rev_init(struct iproc_pcie *pcie)
return 0;
 }
 
+static bool iproc_pci_hp_check_ltssm(struct iproc_pcie *pcie)
+{
+   struct pci_bus *bus = pcie->root_bus;
+   u32 val, timeout = CFG_RC_LTSSM_TIMEOUT;
+
+   /* Clear LTSSM history. */
+   pci_bus_read_config_dword(pcie->root_bus, 0,
+ CFG_RC_PHY_CTL, );
+   pci_bus_write_config_dword(bus, 0, CFG_RC_PHY_CTL,
+  val | CFG_RC_CLR_RECOV_HIST_MASK |
+  CFG_RC_CLR_LTSSM_HIST_MASK);
+   /* write back the origional value. */
+   pci_bus_write_config_dword(bus, 0, CFG_RC_PHY_CTL, val);
+
+   do {
+   pci_bus_read_config_dword(pcie->root_bus, 0,
+ CFG_RC_LTSSM, );
+   /* check link state to see if link moved to L1 state. */
+   if ((val & CFG_RC_LTSSM_STATE_MASK) ==
+CFG_RC_LTSSM_STATE_L1)
+   return true;
+   timeout--;
+   usleep_range(500, 1000);
+   } while (timeout);
+
+   return false;
+}
+
+static irqreturn_t iproc_pci_hotplug_thread(int irq, void *data)
+{
+   struct iproc_pcie *pcie = data;
+   struct pci_bus *bus = pcie->root_bus, *child;
+   bool link_status;
+
+   iproc_pcie_perst_ctrl(pcie, true);
+   iproc_pcie_perst_ctrl(pcie, false);
+
+   link_status = iproc_pci_hp_check_ltssm(pcie);
+
+   if (link_status &&
+   !iproc_pcie_check_link(pcie, bus) &&
+   !pcie->ep_is_present) {
+   pci_rescan_bus(bus);
+   list_for_each_entry(child, >children, node)
+   pcie_bus_configure_settings(child);
+   pcie->ep_is_present = true;
+   dev_info(pcie->dev,
+"PCI Hotplug: \n");
+   } else if (link_status && pcie->ep_is_present)
+   /*
+* ep_is_present makes sure, enumuration done only once.
+* So it can handle spurious intrrupts, and also if we
+* get multiple interrupts for all the 

[PATCH 0/2] PCI hotplug feature

2017-08-07 Thread Oza Pawandeep
These patches bring in PCI hotplug support for iproc family chipsets.

It includes DT binding documentation update and, implementation in
iproc pcie RC driver.

These patch set is made on top of following patches.
[PATCH v6 2/2] PCI: iproc: add device shutdown for PCI RC
[PATCH v6 1/2] PCI: iproc: Retry request when CRS returned from EP

Oza Pawandeep (2):
  PCI: iproc: Implement PCI hotplug support
  PCI: iproc: Add optional brcm,pci-hotplug

 .../devicetree/bindings/pci/brcm,iproc-pcie.txt|  23 
 drivers/pci/host/pcie-iproc-platform.c |   3 +
 drivers/pci/host/pcie-iproc.c  | 130 -
 drivers/pci/host/pcie-iproc.h  |   7 ++
 4 files changed, 157 insertions(+), 6 deletions(-)

-- 
1.9.1



[PATCH 2/2] PCI: iproc: Add optional brcm,pci-hotplug

2017-08-07 Thread Oza Pawandeep
Add description for optional device tree property
'brcm,pci-hotplug' for PCI hotplug feature.

Signed-off-by: Oza Pawandeep 
Reviewed-by: Ray Jui 

diff --git a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt 
b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
index b8e48b4..a3bad24 100644
--- a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
@@ -72,6 +72,29 @@ Optional properties:
 - brcm,pcie-msi-inten: Needs to be present for some older iProc platforms that
 require the interrupt enable registers to be set explicitly to enable MSI
 
+Optional properties:
+- brcm,pci-hotplug: PCI hotplug feature is supported.
+
+If the brcm,pcie-hotplug property is present, the following properties become
+effective:
+
+- brcm,prsnt-gpio: Array of gpios, needs to be present if Hotplug is supported.
+
+PCI hotplug implementation is SOC/Board specific, and also it depends on
+how add-in card is designed (e.g. how many present pins are implemented).
+
+If x8 card is connected, then it might be possible that all the
+3 present pins could go low, or at least one pin goes low.
+
+If x4 card is connected, then it might be possible that 2 present
+pins go low, or at least one pin goes low.
+
+Example:
+brcm,prsnt-gpio: < 32 1>, < 33 1>;
+This is x4 connector: monitoring max 2 present lines.
+brcm,prsnt-gpio: < 32 1>, < 33 1>, < 34 1>;
+This is x8 connector: monitoring max 3 present lines.
+
 Example:
pcie0: pcie@18012000 {
compatible = "brcm,iproc-pcie";
-- 
1.9.1



[PATCH 2/2] PCI: iproc: Add optional brcm,pci-hotplug

2017-08-07 Thread Oza Pawandeep
Add description for optional device tree property
'brcm,pci-hotplug' for PCI hotplug feature.

Signed-off-by: Oza Pawandeep 
Reviewed-by: Ray Jui 

diff --git a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt 
b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
index b8e48b4..a3bad24 100644
--- a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
@@ -72,6 +72,29 @@ Optional properties:
 - brcm,pcie-msi-inten: Needs to be present for some older iProc platforms that
 require the interrupt enable registers to be set explicitly to enable MSI
 
+Optional properties:
+- brcm,pci-hotplug: PCI hotplug feature is supported.
+
+If the brcm,pcie-hotplug property is present, the following properties become
+effective:
+
+- brcm,prsnt-gpio: Array of gpios, needs to be present if Hotplug is supported.
+
+PCI hotplug implementation is SOC/Board specific, and also it depends on
+how add-in card is designed (e.g. how many present pins are implemented).
+
+If x8 card is connected, then it might be possible that all the
+3 present pins could go low, or at least one pin goes low.
+
+If x4 card is connected, then it might be possible that 2 present
+pins go low, or at least one pin goes low.
+
+Example:
+brcm,prsnt-gpio: < 32 1>, < 33 1>;
+This is x4 connector: monitoring max 2 present lines.
+brcm,prsnt-gpio: < 32 1>, < 33 1>, < 34 1>;
+This is x8 connector: monitoring max 3 present lines.
+
 Example:
pcie0: pcie@18012000 {
compatible = "brcm,iproc-pcie";
-- 
1.9.1



Re: [PATCH] arm64: correct modules range of kernel virtual memory layout

2017-08-07 Thread Miles Chen
On Mon, 2017-08-07 at 15:01 +0100, Will Deacon wrote:
> On Mon, Aug 07, 2017 at 02:18:00PM +0100, Ard Biesheuvel wrote:
> > On 7 August 2017 at 14:16, Will Deacon  wrote:
> > > On Mon, Aug 07, 2017 at 07:04:46PM +0800, Miles Chen wrote:
> > >> The commit f80fb3a3d508 ("arm64: add support for kernel ASLR")
> > >> moved module virtual address to
> > >> [module_alloc_base, module_alloc_base + MODULES_VSIZE).
> > >>
> > >> Display module information of the virtual kernel
> > >> memory layout by using module_alloc_base.
> > >>
> > >> testing output:
> > >> 1) Current implementation:
> > >> Virtual kernel memory layout:
> > >>   modules : 0xff80 - 0xff800800   (   128 MB)
> > >> 2) this patch + KASLR:
> > >> Virtual kernel memory layout:
> > >>   modules : 0xff800056 - 0xff800856   (   128 MB)
> > >> 3) this patch + KASLR and a dummy seed:
> > >> Virtual kernel memory layout:
> > >>   modules : 0xffa7df637000 - 0xffa7e7637000   (   128 MB)
> > >>
> > >> Signed-off-by: Miles Chen 
> > >> ---
> > >>  arch/arm64/mm/init.c | 5 +++--
> > >>  1 file changed, 3 insertions(+), 2 deletions(-)
> > >
> > > Does this mean the modules code in our pt dumper is busted
> > > (arch/arm64/mm/dump.c)? Also, what about KASAN, which uses these addresses
> > > too (in kasan_init)? Should we just remove MODULES_VADDR and MODULES_END
> > > altogether?
> > >
> > 
> > I don't think we need this patch. The 'module' line simply prints the
> > VA region that is reserved for modules. The fact that we end up
> > putting them elsewhere when running randomized does not necessarily
> > mean this line should reflect that.
> 
> I was more concerned by other users of MODULES_VADDR tbh, although I see
> now that we don't randomize the module region if kasan is enabled. Still,
> the kcore code adds the modules region as a separate area (distinct from
> vmalloc) if MODULES_VADDR is defined, the page table dumping code uses
> MODULES_VADDR to identify the module region and I think we'll get false
> positives from is_vmalloc_or_module_addr, which again uses the static
> region.
> 
> So, given that MODULES_VADDR never points at the module area, can't we get
> rid of it?

Agreed.MODULES_VADDR should be phased out. Considering the kernel
modules live somewhere between [VMALLOC_START, VMALLOC_END) now:
(arch/arm64/kernel/module.c:module_alloc). I suggest the following
changes:

1. is_vmalloc_or_module_addr() should return is_vmalloc_addr() directly
2. arch/arm64/mm/dump.c does not need MODULES_VADDR and MODULES_END.
3. kasan uses [module_alloc_base, module_alloc_base + MODULES_VSIZE) to
get the shadow memory? (the kernel modules still live in this range when
kasan is enabled)
4. remove modules line in kernel memory layout
(optional, thanks for Ard's feedback)
5. remove MODULE_VADDR, MODULES_END definition


Miles
> 
> Will




Re: [PATCH] arm64: correct modules range of kernel virtual memory layout

2017-08-07 Thread Miles Chen
On Mon, 2017-08-07 at 15:01 +0100, Will Deacon wrote:
> On Mon, Aug 07, 2017 at 02:18:00PM +0100, Ard Biesheuvel wrote:
> > On 7 August 2017 at 14:16, Will Deacon  wrote:
> > > On Mon, Aug 07, 2017 at 07:04:46PM +0800, Miles Chen wrote:
> > >> The commit f80fb3a3d508 ("arm64: add support for kernel ASLR")
> > >> moved module virtual address to
> > >> [module_alloc_base, module_alloc_base + MODULES_VSIZE).
> > >>
> > >> Display module information of the virtual kernel
> > >> memory layout by using module_alloc_base.
> > >>
> > >> testing output:
> > >> 1) Current implementation:
> > >> Virtual kernel memory layout:
> > >>   modules : 0xff80 - 0xff800800   (   128 MB)
> > >> 2) this patch + KASLR:
> > >> Virtual kernel memory layout:
> > >>   modules : 0xff800056 - 0xff800856   (   128 MB)
> > >> 3) this patch + KASLR and a dummy seed:
> > >> Virtual kernel memory layout:
> > >>   modules : 0xffa7df637000 - 0xffa7e7637000   (   128 MB)
> > >>
> > >> Signed-off-by: Miles Chen 
> > >> ---
> > >>  arch/arm64/mm/init.c | 5 +++--
> > >>  1 file changed, 3 insertions(+), 2 deletions(-)
> > >
> > > Does this mean the modules code in our pt dumper is busted
> > > (arch/arm64/mm/dump.c)? Also, what about KASAN, which uses these addresses
> > > too (in kasan_init)? Should we just remove MODULES_VADDR and MODULES_END
> > > altogether?
> > >
> > 
> > I don't think we need this patch. The 'module' line simply prints the
> > VA region that is reserved for modules. The fact that we end up
> > putting them elsewhere when running randomized does not necessarily
> > mean this line should reflect that.
> 
> I was more concerned by other users of MODULES_VADDR tbh, although I see
> now that we don't randomize the module region if kasan is enabled. Still,
> the kcore code adds the modules region as a separate area (distinct from
> vmalloc) if MODULES_VADDR is defined, the page table dumping code uses
> MODULES_VADDR to identify the module region and I think we'll get false
> positives from is_vmalloc_or_module_addr, which again uses the static
> region.
> 
> So, given that MODULES_VADDR never points at the module area, can't we get
> rid of it?

Agreed.MODULES_VADDR should be phased out. Considering the kernel
modules live somewhere between [VMALLOC_START, VMALLOC_END) now:
(arch/arm64/kernel/module.c:module_alloc). I suggest the following
changes:

1. is_vmalloc_or_module_addr() should return is_vmalloc_addr() directly
2. arch/arm64/mm/dump.c does not need MODULES_VADDR and MODULES_END.
3. kasan uses [module_alloc_base, module_alloc_base + MODULES_VSIZE) to
get the shadow memory? (the kernel modules still live in this range when
kasan is enabled)
4. remove modules line in kernel memory layout
(optional, thanks for Ard's feedback)
5. remove MODULE_VADDR, MODULES_END definition


Miles
> 
> Will




[PATCH v2] perf/core: Avoid context switch overheads

2017-08-07 Thread 石祤
From: "leilei.lin" 

A performance issue caused by less strickly check in task
sched when these tasks were once attached by per-task perf_event.

A task will alloc task->perf_event_ctxp[ctxn] when it was called
by perf_event_open, and task->perf_event_ctxp[ctxn] would not
ever be freed to NULL.

__perf_event_task_sched_in()
if (task->perf_event_ctxp[ctxn]) //  here is always true
perf_event_context_sched_in() // operate pmu

50% at most performance overhead was observed under some extreme
test case. Therefor, add a more strick check as to ctx->nr_events,
when ctx->nr_events == 0, it's no need to continue.

Signed-off-by: leilei.lin 
---
 kernel/events/core.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 426c2ff..3d86695 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3180,6 +3180,13 @@ static void perf_event_context_sched_in(struct 
perf_event_context *ctx,
return;
 
perf_ctx_lock(cpuctx, ctx);
+   /*
+* We must check ctx->nr_events while holding ctx->lock, such
+* that we serialize against perf_install_in_context().
+*/
+   if (!cpuctx->task_ctx && !ctx->nr_events)
+   goto unlock;
+
perf_pmu_disable(ctx->pmu);
/*
 * We want to keep the following priority order:
@@ -3193,6 +3200,8 @@ static void perf_event_context_sched_in(struct 
perf_event_context *ctx,
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
perf_event_sched_in(cpuctx, ctx, task);
perf_pmu_enable(ctx->pmu);
+
+unlock:
perf_ctx_unlock(cpuctx, ctx);
 }
 
-- 
2.8.4.31.g9ed660f



[PATCH v2] perf/core: Avoid context switch overheads

2017-08-07 Thread 石祤
From: "leilei.lin" 

A performance issue caused by less strickly check in task
sched when these tasks were once attached by per-task perf_event.

A task will alloc task->perf_event_ctxp[ctxn] when it was called
by perf_event_open, and task->perf_event_ctxp[ctxn] would not
ever be freed to NULL.

__perf_event_task_sched_in()
if (task->perf_event_ctxp[ctxn]) //  here is always true
perf_event_context_sched_in() // operate pmu

50% at most performance overhead was observed under some extreme
test case. Therefor, add a more strick check as to ctx->nr_events,
when ctx->nr_events == 0, it's no need to continue.

Signed-off-by: leilei.lin 
---
 kernel/events/core.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 426c2ff..3d86695 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3180,6 +3180,13 @@ static void perf_event_context_sched_in(struct 
perf_event_context *ctx,
return;
 
perf_ctx_lock(cpuctx, ctx);
+   /*
+* We must check ctx->nr_events while holding ctx->lock, such
+* that we serialize against perf_install_in_context().
+*/
+   if (!cpuctx->task_ctx && !ctx->nr_events)
+   goto unlock;
+
perf_pmu_disable(ctx->pmu);
/*
 * We want to keep the following priority order:
@@ -3193,6 +3200,8 @@ static void perf_event_context_sched_in(struct 
perf_event_context *ctx,
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
perf_event_sched_in(cpuctx, ctx, task);
perf_pmu_enable(ctx->pmu);
+
+unlock:
perf_ctx_unlock(cpuctx, ctx);
 }
 
-- 
2.8.4.31.g9ed660f



[PATCH 2/3] autofs - make disc device user accessible

2017-08-07 Thread Ian Kent
The autofs miscellanous device ioctls that shouldn't require
CAP_SYS_ADMIN need to be accessible to user space applications in
order to be able to get information about autofs mounts.

The module checks capabilities so the miscelaneous device should
be fine with broad permissions.

Signed-off-by: Ian Kent 
Cc: Colin Walters 
Cc: Ondrej Holy 
---
 fs/autofs4/dev-ioctl.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index dd9f1bebb5a3..218a4ecc75cc 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -733,7 +733,8 @@ static const struct file_operations _dev_ioctl_fops = {
 static struct miscdevice _autofs_dev_ioctl_misc = {
.minor  = AUTOFS_MINOR,
.name   = AUTOFS_DEVICE_NAME,
-   .fops   = &_dev_ioctl_fops
+   .fops   = &_dev_ioctl_fops,
+   .mode   = 0644,
 };
 
 MODULE_ALIAS_MISCDEV(AUTOFS_MINOR);



[PATCH 2/3] autofs - make disc device user accessible

2017-08-07 Thread Ian Kent
The autofs miscellanous device ioctls that shouldn't require
CAP_SYS_ADMIN need to be accessible to user space applications in
order to be able to get information about autofs mounts.

The module checks capabilities so the miscelaneous device should
be fine with broad permissions.

Signed-off-by: Ian Kent 
Cc: Colin Walters 
Cc: Ondrej Holy 
---
 fs/autofs4/dev-ioctl.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index dd9f1bebb5a3..218a4ecc75cc 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -733,7 +733,8 @@ static const struct file_operations _dev_ioctl_fops = {
 static struct miscdevice _autofs_dev_ioctl_misc = {
.minor  = AUTOFS_MINOR,
.name   = AUTOFS_DEVICE_NAME,
-   .fops   = &_dev_ioctl_fops
+   .fops   = &_dev_ioctl_fops,
+   .mode   = 0644,
 };
 
 MODULE_ALIAS_MISCDEV(AUTOFS_MINOR);



[PATCH 1/3] autofs - fix AT_NO_AUTOMOUNT not being honored

2017-08-07 Thread Ian Kent
The fstatat(2) and statx() calls can pass the flag AT_NO_AUTOMOUNT
which is meant to clear the LOOKUP_AUTOMOUNT flag and prevent triggering
of an automount by the call. But this flag is unconditionally cleared
for all stat family system calls except statx().

stat family system calls have always triggered mount requests for the
negative dentry case in follow_automount() which is intended but prevents
the fstatat(2) and statx() AT_NO_AUTOMOUNT case from being handled.

In order to handle the AT_NO_AUTOMOUNT for both system calls the
negative dentry case in follow_automount() needs to be changed to
return ENOENT when the LOOKUP_AUTOMOUNT flag is clear (and the other
required flags are clear).

AFAICT this change doesn't have any noticable side effects and may,
in some use cases (although I didn't see it in testing) prevent
unnecessary callbacks to the automount daemon.

It's also possible that a stat family call has been made with a
path that is in the process of being mounted by some other process.
But stat family calls should return the automount state of the path
as it is "now" so it shouldn't wait for mount completion.

This is the same semantic as the positive dentry case already
handled.

Signed-off-by: Ian Kent 
Cc: David Howells 
Cc: Colin Walters 
Cc: Ondrej Holy 
---
 fs/namei.c |   15 ---
 include/linux/fs.h |3 +--
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index ddb6a7c2b3d4..1180f9c58093 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1129,9 +1129,18 @@ static int follow_automount(struct path *path, struct 
nameidata *nd,
 * of the daemon to instantiate them before they can be used.
 */
if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
-  LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
-   path->dentry->d_inode)
-   return -EISDIR;
+  LOOKUP_OPEN | LOOKUP_CREATE |
+  LOOKUP_AUTOMOUNT))) {
+   /* Positive dentry that isn't meant to trigger an
+* automount, EISDIR will allow it to be used,
+* otherwise there's no mount here "now" so return
+* ENOENT.
+*/
+   if (path->dentry->d_inode)
+   return -EISDIR;
+   else
+   return -ENOENT;
+   }
 
if (path->dentry->d_sb->s_user_ns != _user_ns)
return -EACCES;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6e1fd5d21248..37c96f52e48e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3022,8 +3022,7 @@ static inline int vfs_lstat(const char __user *name, 
struct kstat *stat)
 static inline int vfs_fstatat(int dfd, const char __user *filename,
  struct kstat *stat, int flags)
 {
-   return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT,
-stat, STATX_BASIC_STATS);
+   return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS);
 }
 static inline int vfs_fstat(int fd, struct kstat *stat)
 {



[PATCH 3/3] autofs - make dev ioctl version and ismountpoint user accessible

2017-08-07 Thread Ian Kent
Some of the autofs miscellaneous device ioctls need to be accessable to
user space applications without CAP_SYS_ADMIN to get information about
autofs mounts.

Signed-off-by: Ian Kent 
Cc: Colin Walters 
Cc: Ondrej Holy 
---
 fs/autofs4/dev-ioctl.c  |   12 
 include/uapi/linux/auto_dev-ioctl.h |2 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 218a4ecc75cc..ea8b3a1cddd2 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -628,10 +628,6 @@ static int _autofs_dev_ioctl(unsigned int command,
ioctl_fn fn = NULL;
int err = 0;
 
-   /* only root can play with this */
-   if (!capable(CAP_SYS_ADMIN))
-   return -EPERM;
-
cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST);
cmd = _IOC_NR(command);
 
@@ -640,6 +636,14 @@ static int _autofs_dev_ioctl(unsigned int command,
return -ENOTTY;
}
 
+   /* Only root can use ioctls other than AUTOFS_DEV_IOCTL_VERSION_CMD
+* and AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD
+*/
+   if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD &&
+   cmd != AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD &&
+   !capable(CAP_SYS_ADMIN))
+   return -EPERM;
+
/* Copy the parameters into kernel space. */
param = copy_dev_ioctl(user);
if (IS_ERR(param))
diff --git a/include/uapi/linux/auto_dev-ioctl.h 
b/include/uapi/linux/auto_dev-ioctl.h
index 744b3d060968..5558db8e6646 100644
--- a/include/uapi/linux/auto_dev-ioctl.h
+++ b/include/uapi/linux/auto_dev-ioctl.h
@@ -16,7 +16,7 @@
 #define AUTOFS_DEVICE_NAME "autofs"
 
 #define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1
-#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0
+#define AUTOFS_DEV_IOCTL_VERSION_MINOR 1
 
 #define AUTOFS_DEV_IOCTL_SIZE  sizeof(struct autofs_dev_ioctl)
 



[PATCH 1/3] autofs - fix AT_NO_AUTOMOUNT not being honored

2017-08-07 Thread Ian Kent
The fstatat(2) and statx() calls can pass the flag AT_NO_AUTOMOUNT
which is meant to clear the LOOKUP_AUTOMOUNT flag and prevent triggering
of an automount by the call. But this flag is unconditionally cleared
for all stat family system calls except statx().

stat family system calls have always triggered mount requests for the
negative dentry case in follow_automount() which is intended but prevents
the fstatat(2) and statx() AT_NO_AUTOMOUNT case from being handled.

In order to handle the AT_NO_AUTOMOUNT for both system calls the
negative dentry case in follow_automount() needs to be changed to
return ENOENT when the LOOKUP_AUTOMOUNT flag is clear (and the other
required flags are clear).

AFAICT this change doesn't have any noticable side effects and may,
in some use cases (although I didn't see it in testing) prevent
unnecessary callbacks to the automount daemon.

It's also possible that a stat family call has been made with a
path that is in the process of being mounted by some other process.
But stat family calls should return the automount state of the path
as it is "now" so it shouldn't wait for mount completion.

This is the same semantic as the positive dentry case already
handled.

Signed-off-by: Ian Kent 
Cc: David Howells 
Cc: Colin Walters 
Cc: Ondrej Holy 
---
 fs/namei.c |   15 ---
 include/linux/fs.h |3 +--
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index ddb6a7c2b3d4..1180f9c58093 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1129,9 +1129,18 @@ static int follow_automount(struct path *path, struct 
nameidata *nd,
 * of the daemon to instantiate them before they can be used.
 */
if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
-  LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
-   path->dentry->d_inode)
-   return -EISDIR;
+  LOOKUP_OPEN | LOOKUP_CREATE |
+  LOOKUP_AUTOMOUNT))) {
+   /* Positive dentry that isn't meant to trigger an
+* automount, EISDIR will allow it to be used,
+* otherwise there's no mount here "now" so return
+* ENOENT.
+*/
+   if (path->dentry->d_inode)
+   return -EISDIR;
+   else
+   return -ENOENT;
+   }
 
if (path->dentry->d_sb->s_user_ns != _user_ns)
return -EACCES;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6e1fd5d21248..37c96f52e48e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3022,8 +3022,7 @@ static inline int vfs_lstat(const char __user *name, 
struct kstat *stat)
 static inline int vfs_fstatat(int dfd, const char __user *filename,
  struct kstat *stat, int flags)
 {
-   return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT,
-stat, STATX_BASIC_STATS);
+   return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS);
 }
 static inline int vfs_fstat(int fd, struct kstat *stat)
 {



[PATCH 3/3] autofs - make dev ioctl version and ismountpoint user accessible

2017-08-07 Thread Ian Kent
Some of the autofs miscellaneous device ioctls need to be accessable to
user space applications without CAP_SYS_ADMIN to get information about
autofs mounts.

Signed-off-by: Ian Kent 
Cc: Colin Walters 
Cc: Ondrej Holy 
---
 fs/autofs4/dev-ioctl.c  |   12 
 include/uapi/linux/auto_dev-ioctl.h |2 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 218a4ecc75cc..ea8b3a1cddd2 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -628,10 +628,6 @@ static int _autofs_dev_ioctl(unsigned int command,
ioctl_fn fn = NULL;
int err = 0;
 
-   /* only root can play with this */
-   if (!capable(CAP_SYS_ADMIN))
-   return -EPERM;
-
cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST);
cmd = _IOC_NR(command);
 
@@ -640,6 +636,14 @@ static int _autofs_dev_ioctl(unsigned int command,
return -ENOTTY;
}
 
+   /* Only root can use ioctls other than AUTOFS_DEV_IOCTL_VERSION_CMD
+* and AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD
+*/
+   if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD &&
+   cmd != AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD &&
+   !capable(CAP_SYS_ADMIN))
+   return -EPERM;
+
/* Copy the parameters into kernel space. */
param = copy_dev_ioctl(user);
if (IS_ERR(param))
diff --git a/include/uapi/linux/auto_dev-ioctl.h 
b/include/uapi/linux/auto_dev-ioctl.h
index 744b3d060968..5558db8e6646 100644
--- a/include/uapi/linux/auto_dev-ioctl.h
+++ b/include/uapi/linux/auto_dev-ioctl.h
@@ -16,7 +16,7 @@
 #define AUTOFS_DEVICE_NAME "autofs"
 
 #define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1
-#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0
+#define AUTOFS_DEV_IOCTL_VERSION_MINOR 1
 
 #define AUTOFS_DEV_IOCTL_SIZE  sizeof(struct autofs_dev_ioctl)
 



[PATCH] thermal: rockchip: fix error return code in rockchip_thermal_probe()

2017-08-07 Thread Gustavo A. R. Silva
platform_get_irq() returns an error code, but the rockchip_thermal driver
ignores it and always returns -EINVAL. This is not correct and, prevents
-EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print and propagate the return value of platform_get_irq on failure.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/thermal/rockchip_thermal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/rockchip_thermal.c 
b/drivers/thermal/rockchip_thermal.c
index 4c77965..6ca9747 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -1068,8 +1068,8 @@ static int rockchip_thermal_probe(struct platform_device 
*pdev)
 
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
-   dev_err(>dev, "no irq resource?\n");
-   return -EINVAL;
+   dev_err(>dev, "no irq resource: %d\n", irq);
+   return irq;
}
 
thermal = devm_kzalloc(>dev, sizeof(struct rockchip_thermal_data),
-- 
2.5.0



[PATCH] thermal: rockchip: fix error return code in rockchip_thermal_probe()

2017-08-07 Thread Gustavo A. R. Silva
platform_get_irq() returns an error code, but the rockchip_thermal driver
ignores it and always returns -EINVAL. This is not correct and, prevents
-EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print and propagate the return value of platform_get_irq on failure.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/thermal/rockchip_thermal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/rockchip_thermal.c 
b/drivers/thermal/rockchip_thermal.c
index 4c77965..6ca9747 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -1068,8 +1068,8 @@ static int rockchip_thermal_probe(struct platform_device 
*pdev)
 
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
-   dev_err(>dev, "no irq resource?\n");
-   return -EINVAL;
+   dev_err(>dev, "no irq resource: %d\n", irq);
+   return irq;
}
 
thermal = devm_kzalloc(>dev, sizeof(struct rockchip_thermal_data),
-- 
2.5.0



Re: [PATCH 3/3] IPI: Avoid to use 2 cache lines for one call_single_data

2017-08-07 Thread Huang, Ying
Peter Zijlstra  writes:

> On Sat, Aug 05, 2017 at 08:47:02AM +0800, Huang, Ying wrote:
>> Yes.  That looks good.  So you will prepare the final patch?  Or you
>> hope me to do that?
>
> I was hoping you'd do it ;-)

Thanks!  Here is the updated patch

Best Regards,
Huang, Ying

-->8--
>From 957735e9ff3922368286540dab852986fc7b23b5 Mon Sep 17 00:00:00 2001
From: Huang Ying 
Date: Mon, 7 Aug 2017 16:55:33 +0800
Subject: [PATCH -v3] IPI: Avoid to use 2 cache lines for one
 call_single_data

struct call_single_data is used in IPI to transfer information between
CPUs.  Its size is bigger than sizeof(unsigned long) and less than
cache line size.  Now, it is allocated with no explicit alignment
requirement.  This makes it possible for allocated call_single_data to
cross 2 cache lines.  So that double the number of the cache lines
that need to be transferred among CPUs.

This is resolved by requiring call_single_data to be aligned with the
size of call_single_data.  Now the size of call_single_data is the
power of 2.  If we add new fields to call_single_data, we may need to
add pads to make sure the size of new definition is the power of 2.
Fortunately, this is enforced by gcc, which will report error for not
power of 2 alignment requirement.

To set alignment requirement of call_single_data to the size of
call_single_data, a struct definition and a typedef is used.

To test the effect of the patch, we use the vm-scalability multiple
thread swap test case (swap-w-seq-mt).  The test will create multiple
threads and each thread will eat memory until all RAM and part of swap
is used, so that huge number of IPI will be triggered when unmapping
memory.  In the test, the throughput of memory writing improves ~5%
compared with misaligned call_single_data because of faster IPI.

[Add call_single_data_t and align with size of call_single_data]
Suggested-by: Peter Zijlstra 
Signed-off-by: "Huang, Ying" 
Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Cc: Michael Ellerman 
Cc: Borislav Petkov 
Cc: Thomas Gleixner 
Cc: Juergen Gross 
Cc: Aaron Lu 
---
 arch/mips/kernel/smp.c |  6 ++--
 block/blk-softirq.c|  2 +-
 drivers/block/null_blk.c   |  2 +-
 drivers/cpuidle/coupled.c  | 10 +++
 drivers/net/ethernet/cavium/liquidio/lio_main.c|  2 +-
 drivers/net/ethernet/cavium/liquidio/octeon_droq.h |  2 +-
 include/linux/blkdev.h |  2 +-
 include/linux/netdevice.h  |  2 +-
 include/linux/smp.h|  8 --
 kernel/sched/sched.h   |  2 +-
 kernel/smp.c   | 32 --
 kernel/up.c|  2 +-
 12 files changed, 39 insertions(+), 33 deletions(-)

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 770d4d1516cb..bd8ba5472bca 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -648,12 +648,12 @@ EXPORT_SYMBOL(flush_tlb_one);
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 
 static DEFINE_PER_CPU(atomic_t, tick_broadcast_count);
-static DEFINE_PER_CPU(struct call_single_data, tick_broadcast_csd);
+static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd);
 
 void tick_broadcast(const struct cpumask *mask)
 {
atomic_t *count;
-   struct call_single_data *csd;
+   call_single_data_t *csd;
int cpu;
 
for_each_cpu(cpu, mask) {
@@ -674,7 +674,7 @@ static void tick_broadcast_callee(void *info)
 
 static int __init tick_broadcast_init(void)
 {
-   struct call_single_data *csd;
+   call_single_data_t *csd;
int cpu;
 
for (cpu = 0; cpu < NR_CPUS; cpu++) {
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 87b7df4851bf..07125e7941f4 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -60,7 +60,7 @@ static void trigger_softirq(void *data)
 static int raise_blk_irq(int cpu, struct request *rq)
 {
if (cpu_online(cpu)) {
-   struct call_single_data *data = >csd;
+   call_single_data_t *data = >csd;
 
data->func = trigger_softirq;
data->info = rq;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 85c24cace973..81142ce781da 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -13,7 +13,7 @@
 struct nullb_cmd {
struct list_head list;
struct llist_node ll_list;
-   struct call_single_data csd;
+   call_single_data_t csd;
struct request *rq;
struct bio *bio;
unsigned int tag;
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 71e586d7df71..147f38ea0fcd 

Re: [PATCH 3/3] IPI: Avoid to use 2 cache lines for one call_single_data

2017-08-07 Thread Huang, Ying
Peter Zijlstra  writes:

> On Sat, Aug 05, 2017 at 08:47:02AM +0800, Huang, Ying wrote:
>> Yes.  That looks good.  So you will prepare the final patch?  Or you
>> hope me to do that?
>
> I was hoping you'd do it ;-)

Thanks!  Here is the updated patch

Best Regards,
Huang, Ying

-->8--
>From 957735e9ff3922368286540dab852986fc7b23b5 Mon Sep 17 00:00:00 2001
From: Huang Ying 
Date: Mon, 7 Aug 2017 16:55:33 +0800
Subject: [PATCH -v3] IPI: Avoid to use 2 cache lines for one
 call_single_data

struct call_single_data is used in IPI to transfer information between
CPUs.  Its size is bigger than sizeof(unsigned long) and less than
cache line size.  Now, it is allocated with no explicit alignment
requirement.  This makes it possible for allocated call_single_data to
cross 2 cache lines.  So that double the number of the cache lines
that need to be transferred among CPUs.

This is resolved by requiring call_single_data to be aligned with the
size of call_single_data.  Now the size of call_single_data is the
power of 2.  If we add new fields to call_single_data, we may need to
add pads to make sure the size of new definition is the power of 2.
Fortunately, this is enforced by gcc, which will report error for not
power of 2 alignment requirement.

To set alignment requirement of call_single_data to the size of
call_single_data, a struct definition and a typedef is used.

To test the effect of the patch, we use the vm-scalability multiple
thread swap test case (swap-w-seq-mt).  The test will create multiple
threads and each thread will eat memory until all RAM and part of swap
is used, so that huge number of IPI will be triggered when unmapping
memory.  In the test, the throughput of memory writing improves ~5%
compared with misaligned call_single_data because of faster IPI.

[Add call_single_data_t and align with size of call_single_data]
Suggested-by: Peter Zijlstra 
Signed-off-by: "Huang, Ying" 
Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Cc: Michael Ellerman 
Cc: Borislav Petkov 
Cc: Thomas Gleixner 
Cc: Juergen Gross 
Cc: Aaron Lu 
---
 arch/mips/kernel/smp.c |  6 ++--
 block/blk-softirq.c|  2 +-
 drivers/block/null_blk.c   |  2 +-
 drivers/cpuidle/coupled.c  | 10 +++
 drivers/net/ethernet/cavium/liquidio/lio_main.c|  2 +-
 drivers/net/ethernet/cavium/liquidio/octeon_droq.h |  2 +-
 include/linux/blkdev.h |  2 +-
 include/linux/netdevice.h  |  2 +-
 include/linux/smp.h|  8 --
 kernel/sched/sched.h   |  2 +-
 kernel/smp.c   | 32 --
 kernel/up.c|  2 +-
 12 files changed, 39 insertions(+), 33 deletions(-)

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 770d4d1516cb..bd8ba5472bca 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -648,12 +648,12 @@ EXPORT_SYMBOL(flush_tlb_one);
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 
 static DEFINE_PER_CPU(atomic_t, tick_broadcast_count);
-static DEFINE_PER_CPU(struct call_single_data, tick_broadcast_csd);
+static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd);
 
 void tick_broadcast(const struct cpumask *mask)
 {
atomic_t *count;
-   struct call_single_data *csd;
+   call_single_data_t *csd;
int cpu;
 
for_each_cpu(cpu, mask) {
@@ -674,7 +674,7 @@ static void tick_broadcast_callee(void *info)
 
 static int __init tick_broadcast_init(void)
 {
-   struct call_single_data *csd;
+   call_single_data_t *csd;
int cpu;
 
for (cpu = 0; cpu < NR_CPUS; cpu++) {
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 87b7df4851bf..07125e7941f4 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -60,7 +60,7 @@ static void trigger_softirq(void *data)
 static int raise_blk_irq(int cpu, struct request *rq)
 {
if (cpu_online(cpu)) {
-   struct call_single_data *data = >csd;
+   call_single_data_t *data = >csd;
 
data->func = trigger_softirq;
data->info = rq;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 85c24cace973..81142ce781da 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -13,7 +13,7 @@
 struct nullb_cmd {
struct list_head list;
struct llist_node ll_list;
-   struct call_single_data csd;
+   call_single_data_t csd;
struct request *rq;
struct bio *bio;
unsigned int tag;
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 71e586d7df71..147f38ea0fcd 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -119,13 +119,13 @@ struct cpuidle_coupled {
 
 #define CPUIDLE_COUPLED_NOT_IDLE   (-1)
 
-static DEFINE_PER_CPU(struct call_single_data, 

[PATCH v5 1/1] usb:host:xhci support option to disable the xHCI USB2 HW LPM

2017-08-07 Thread Thang Q. Nguyen
XHCI specification 1.1 does not require xHCI-compliant controllers
to always enable hardware USB2 LPM. However, the current xHCI
driver always enable it when seeing HLC=1.
This patch supports an option for users to control disabling
USB2 Hardware LPM via DT/ACPI attribute.
This option is needed in case user would like to disable this
feature. For example, their xHCI controller has its USB2 HW LPM
broken.

Signed-off-by: Tung Nguyen 
Signed-off-by: Thang Q. Nguyen 
Acked-by: Rob Herring 
---
Changes since v4:
 - When HW LPM is optionally disabled, explicitly disable HLE, RWE, ...
 - Update codes to work with kernel 4.13-rc4
 - Add Acked-By from Rob Herring 
Changes since v3:
 - Bypass updating LPM parameters when HW LPM is optionally disabled.
Changes since v2:
 - Change code to disable HW LPM as an option for user which
   is set via ACPI/DT.
Changes since v1:
 - Update DT/ACPI attribute and corresponding codes from HLE to LPM to
   be consistent with other attribute names.
---
 Documentation/devicetree/bindings/usb/usb-xhci.txt |1 +
 drivers/usb/host/xhci-plat.c   |3 +++
 drivers/usb/host/xhci.c|2 +-
 drivers/usb/host/xhci.h|1 +
 4 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt 
b/Documentation/devicetree/bindings/usb/usb-xhci.txt
index 2d80b60..ae6e484 100644
--- a/Documentation/devicetree/bindings/usb/usb-xhci.txt
+++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt
@@ -26,6 +26,7 @@ Required properties:
 
 Optional properties:
   - clocks: reference to a clock
+  - usb2-lpm-disable: indicate if we don't want to enable USB2 HW LPM
   - usb3-lpm-capable: determines if platform is USB3 LPM capable
   - quirk-broken-port-ped: set if the controller has broken port disable 
mechanism
 
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index c04144b..9028fb5 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -267,6 +267,9 @@ static int xhci_plat_probe(struct platform_device *pdev)
goto disable_clk;
}
 
+   if (device_property_read_bool(sysdev, "usb2-lpm-disable"))
+   xhci->quirks |= XHCI_HW_LPM_DISABLE;
+
if (device_property_read_bool(sysdev, "usb3-lpm-capable"))
xhci->quirks |= XHCI_LPM_SUPPORT;
 
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index b2ff1ff..3a8e75f 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -4087,7 +4087,7 @@ static int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd,
xhci_dbg(xhci, "%s port %d USB2 hardware LPM\n",
enable ? "enable" : "disable", port_num + 1);
 
-   if (enable) {
+   if (enable && !(xhci->quirks & XHCI_HW_LPM_DISABLE)) {
/* Host supports BESL timeout instead of HIRD */
if (udev->usb2_hw_lpm_besl_capable) {
/* if device doesn't have a preferred BESL value use a
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index e3e9352..5d89c51 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1821,6 +1821,7 @@ struct xhci_hcd {
 #define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26)
 #define XHCI_U2_DISABLE_WAKE   (1 << 27)
 #define XHCI_ASMEDIA_MODIFY_FLOWCONTROL(1 << 28)
+#define XHCI_HW_LPM_DISABLE(1 << 29)
 
unsigned intnum_active_eps;
unsigned intlimit_active_eps;
-- 
1.7.1



[PATCH v5 1/1] usb:host:xhci support option to disable the xHCI USB2 HW LPM

2017-08-07 Thread Thang Q. Nguyen
XHCI specification 1.1 does not require xHCI-compliant controllers
to always enable hardware USB2 LPM. However, the current xHCI
driver always enable it when seeing HLC=1.
This patch supports an option for users to control disabling
USB2 Hardware LPM via DT/ACPI attribute.
This option is needed in case user would like to disable this
feature. For example, their xHCI controller has its USB2 HW LPM
broken.

Signed-off-by: Tung Nguyen 
Signed-off-by: Thang Q. Nguyen 
Acked-by: Rob Herring 
---
Changes since v4:
 - When HW LPM is optionally disabled, explicitly disable HLE, RWE, ...
 - Update codes to work with kernel 4.13-rc4
 - Add Acked-By from Rob Herring 
Changes since v3:
 - Bypass updating LPM parameters when HW LPM is optionally disabled.
Changes since v2:
 - Change code to disable HW LPM as an option for user which
   is set via ACPI/DT.
Changes since v1:
 - Update DT/ACPI attribute and corresponding codes from HLE to LPM to
   be consistent with other attribute names.
---
 Documentation/devicetree/bindings/usb/usb-xhci.txt |1 +
 drivers/usb/host/xhci-plat.c   |3 +++
 drivers/usb/host/xhci.c|2 +-
 drivers/usb/host/xhci.h|1 +
 4 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt 
b/Documentation/devicetree/bindings/usb/usb-xhci.txt
index 2d80b60..ae6e484 100644
--- a/Documentation/devicetree/bindings/usb/usb-xhci.txt
+++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt
@@ -26,6 +26,7 @@ Required properties:
 
 Optional properties:
   - clocks: reference to a clock
+  - usb2-lpm-disable: indicate if we don't want to enable USB2 HW LPM
   - usb3-lpm-capable: determines if platform is USB3 LPM capable
   - quirk-broken-port-ped: set if the controller has broken port disable 
mechanism
 
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index c04144b..9028fb5 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -267,6 +267,9 @@ static int xhci_plat_probe(struct platform_device *pdev)
goto disable_clk;
}
 
+   if (device_property_read_bool(sysdev, "usb2-lpm-disable"))
+   xhci->quirks |= XHCI_HW_LPM_DISABLE;
+
if (device_property_read_bool(sysdev, "usb3-lpm-capable"))
xhci->quirks |= XHCI_LPM_SUPPORT;
 
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index b2ff1ff..3a8e75f 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -4087,7 +4087,7 @@ static int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd,
xhci_dbg(xhci, "%s port %d USB2 hardware LPM\n",
enable ? "enable" : "disable", port_num + 1);
 
-   if (enable) {
+   if (enable && !(xhci->quirks & XHCI_HW_LPM_DISABLE)) {
/* Host supports BESL timeout instead of HIRD */
if (udev->usb2_hw_lpm_besl_capable) {
/* if device doesn't have a preferred BESL value use a
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index e3e9352..5d89c51 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1821,6 +1821,7 @@ struct xhci_hcd {
 #define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26)
 #define XHCI_U2_DISABLE_WAKE   (1 << 27)
 #define XHCI_ASMEDIA_MODIFY_FLOWCONTROL(1 << 28)
+#define XHCI_HW_LPM_DISABLE(1 << 29)
 
unsigned intnum_active_eps;
unsigned intlimit_active_eps;
-- 
1.7.1



Re: [PATCH -mm] mm: Clear to access sub-page last when clearing huge page

2017-08-07 Thread Huang, Ying
Mike Kravetz  writes:

> On 08/07/2017 12:21 AM, Huang, Ying wrote:
>> From: Huang Ying 
>> 
>> Huge page helps to reduce TLB miss rate, but it has higher cache
>> footprint, sometimes this may cause some issue.  For example, when
>> clearing huge page on x86_64 platform, the cache footprint is 2M.  But
>> on a Xeon E5 v3 2699 CPU, there are 18 cores, 36 threads, and only 45M
>> LLC (last level cache).  That is, in average, there are 2.5M LLC for
>> each core and 1.25M LLC for each thread.  If the cache pressure is
>> heavy when clearing the huge page, and we clear the huge page from the
>> begin to the end, it is possible that the begin of huge page is
>> evicted from the cache after we finishing clearing the end of the huge
>> page.  And it is possible for the application to access the begin of
>> the huge page after clearing the huge page.
>> 
>> To help the above situation, in this patch, when we clear a huge page,
>> the order to clear sub-pages is changed.  In quite some situation, we
>> can get the address that the application will access after we clear
>> the huge page, for example, in a page fault handler.  Instead of
>> clearing the huge page from begin to end, we will clear the sub-pages
>> farthest from the the sub-page to access firstly, and clear the
>> sub-page to access last.  This will make the sub-page to access most
>> cache-hot and sub-pages around it more cache-hot too.  If we cannot
>> know the address the application will access, the begin of the huge
>> page is assumed to be the the address the application will access.
>> 
>> With this patch, the throughput increases ~28.3% in vm-scalability
>> anon-w-seq test case with 72 processes on a 2 socket Xeon E5 v3 2699
>> system (36 cores, 72 threads).  The test case creates 72 processes,
>> each process mmap a big anonymous memory area and writes to it from
>> the begin to the end.  For each process, other processes could be seen
>> as other workload which generates heavy cache pressure.  At the same
>> time, the cache miss rate reduced from ~33.4% to ~31.7%, the
>> IPC (instruction per cycle) increased from 0.56 to 0.74, and the time
>> spent in user space is reduced ~7.9%
>> 
>> Thanks Andi Kleen to propose to use address to access to determine the
>> order of sub-pages to clear.
>> 
>> The hugetlbfs access address could be improved, will do that in
>> another patch.
>
> hugetlb_fault masks off the actual faulting address with,
> address &= huge_page_mask(h);
> before calling hugetlb_no_page.
>
> But, we could pass down the actual (unmasked) address to take advantage
> of this optimization for hugetlb faults as well.  hugetlb_fault is the
> only caller of hugetlb_no_page, so this should be pretty straight forward.
>
> Were you thinking of additional improvements?

No.  I am thinking of something like this.  If the basic idea is
accepted, I plan to add better support like this for hugetlbfs in
another patch.

Best Regards,
Huang, Ying


Re: [PATCH -mm] mm: Clear to access sub-page last when clearing huge page

2017-08-07 Thread Huang, Ying
Mike Kravetz  writes:

> On 08/07/2017 12:21 AM, Huang, Ying wrote:
>> From: Huang Ying 
>> 
>> Huge page helps to reduce TLB miss rate, but it has higher cache
>> footprint, sometimes this may cause some issue.  For example, when
>> clearing huge page on x86_64 platform, the cache footprint is 2M.  But
>> on a Xeon E5 v3 2699 CPU, there are 18 cores, 36 threads, and only 45M
>> LLC (last level cache).  That is, in average, there are 2.5M LLC for
>> each core and 1.25M LLC for each thread.  If the cache pressure is
>> heavy when clearing the huge page, and we clear the huge page from the
>> begin to the end, it is possible that the begin of huge page is
>> evicted from the cache after we finishing clearing the end of the huge
>> page.  And it is possible for the application to access the begin of
>> the huge page after clearing the huge page.
>> 
>> To help the above situation, in this patch, when we clear a huge page,
>> the order to clear sub-pages is changed.  In quite some situation, we
>> can get the address that the application will access after we clear
>> the huge page, for example, in a page fault handler.  Instead of
>> clearing the huge page from begin to end, we will clear the sub-pages
>> farthest from the the sub-page to access firstly, and clear the
>> sub-page to access last.  This will make the sub-page to access most
>> cache-hot and sub-pages around it more cache-hot too.  If we cannot
>> know the address the application will access, the begin of the huge
>> page is assumed to be the the address the application will access.
>> 
>> With this patch, the throughput increases ~28.3% in vm-scalability
>> anon-w-seq test case with 72 processes on a 2 socket Xeon E5 v3 2699
>> system (36 cores, 72 threads).  The test case creates 72 processes,
>> each process mmap a big anonymous memory area and writes to it from
>> the begin to the end.  For each process, other processes could be seen
>> as other workload which generates heavy cache pressure.  At the same
>> time, the cache miss rate reduced from ~33.4% to ~31.7%, the
>> IPC (instruction per cycle) increased from 0.56 to 0.74, and the time
>> spent in user space is reduced ~7.9%
>> 
>> Thanks Andi Kleen to propose to use address to access to determine the
>> order of sub-pages to clear.
>> 
>> The hugetlbfs access address could be improved, will do that in
>> another patch.
>
> hugetlb_fault masks off the actual faulting address with,
> address &= huge_page_mask(h);
> before calling hugetlb_no_page.
>
> But, we could pass down the actual (unmasked) address to take advantage
> of this optimization for hugetlb faults as well.  hugetlb_fault is the
> only caller of hugetlb_no_page, so this should be pretty straight forward.
>
> Were you thinking of additional improvements?

No.  I am thinking of something like this.  If the basic idea is
accepted, I plan to add better support like this for hugetlbfs in
another patch.

Best Regards,
Huang, Ying


Re: [lkp-robot] [mm] 7674270022: will-it-scale.per_process_ops -19.3% regression

2017-08-07 Thread Nadav Amit
Minchan Kim  wrote:

> Hi,
> 
> On Tue, Aug 08, 2017 at 09:19:23AM +0800, kernel test robot wrote:
>> Greeting,
>> 
>> FYI, we noticed a -19.3% regression of will-it-scale.per_process_ops due to 
>> commit:
>> 
>> 
>> commit: 76742700225cad9df49f05399381ac3f1ec3dc60 ("mm: fix 
>> MADV_[FREE|DONTNEED] TLB flush miss problem")
>> url: 
>> https://github.com/0day-ci/linux/commits/Nadav-Amit/mm-migrate-prevent-racy-access-to-tlb_flush_pending/20170802-205715
>> 
>> 
>> in testcase: will-it-scale
>> on test machine: 88 threads Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz with 
>> 64G memory
>> with following parameters:
>> 
>>  nr_task: 16
>>  mode: process
>>  test: brk1
>>  cpufreq_governor: performance
>> 
>> test-description: Will It Scale takes a testcase and runs it from 1 through 
>> to n parallel copies to see if the testcase will scale. It builds both a 
>> process and threads based test in order to see any differences between the 
>> two.
>> test-url: https://github.com/antonblanchard/will-it-scale
> 
> Thanks for the report.
> Could you explain what kinds of workload you are testing?
> 
> Does it calls frequently madvise(MADV_DONTNEED) in parallel on multiple
> threads?

According to the description it is "testcase:brk increase/decrease of one
page”. According to the mode it spawns multiple processes, not threads.

Since a single page is unmapped each time, and the iTLB-loads increase
dramatically, I would suspect that for some reason a full TLB flush is
caused during do_munmap().

If I find some free time, I’ll try to profile the workload - but feel free
to beat me to it.

Nadav 



Re: [lkp-robot] [mm] 7674270022: will-it-scale.per_process_ops -19.3% regression

2017-08-07 Thread Nadav Amit
Minchan Kim  wrote:

> Hi,
> 
> On Tue, Aug 08, 2017 at 09:19:23AM +0800, kernel test robot wrote:
>> Greeting,
>> 
>> FYI, we noticed a -19.3% regression of will-it-scale.per_process_ops due to 
>> commit:
>> 
>> 
>> commit: 76742700225cad9df49f05399381ac3f1ec3dc60 ("mm: fix 
>> MADV_[FREE|DONTNEED] TLB flush miss problem")
>> url: 
>> https://github.com/0day-ci/linux/commits/Nadav-Amit/mm-migrate-prevent-racy-access-to-tlb_flush_pending/20170802-205715
>> 
>> 
>> in testcase: will-it-scale
>> on test machine: 88 threads Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz with 
>> 64G memory
>> with following parameters:
>> 
>>  nr_task: 16
>>  mode: process
>>  test: brk1
>>  cpufreq_governor: performance
>> 
>> test-description: Will It Scale takes a testcase and runs it from 1 through 
>> to n parallel copies to see if the testcase will scale. It builds both a 
>> process and threads based test in order to see any differences between the 
>> two.
>> test-url: https://github.com/antonblanchard/will-it-scale
> 
> Thanks for the report.
> Could you explain what kinds of workload you are testing?
> 
> Does it calls frequently madvise(MADV_DONTNEED) in parallel on multiple
> threads?

According to the description it is "testcase:brk increase/decrease of one
page”. According to the mode it spawns multiple processes, not threads.

Since a single page is unmapped each time, and the iTLB-loads increase
dramatically, I would suspect that for some reason a full TLB flush is
caused during do_munmap().

If I find some free time, I’ll try to profile the workload - but feel free
to beat me to it.

Nadav 



Re: [v5] wlcore: add missing nvs file name info for wilink8

2017-08-07 Thread Tony Lindgren
* Reizer, Eyal  [170807 00:47]:
> Hi Tony,
> > 
> > * Reizer, Eyal  [170807 00:32]:
> > > The following commits:
> > > c815fde wlcore: spi: Populate config firmware data
> > > d776fc8 wlcore: sdio: Populate config firmware data
> > >
> > > Populated the nvs entry for wilink6 and wilink7 only while it is
> > > still needed for wilink8 as well.
> > > This broke user space backward compatibility when upgrading from older
> > > kernels, as the alternate mac address would not be read from the nvs that
> > is
> > > present in the file system (lib/firmware/ti-connectivity/wl1271-nvs.bin)
> > > causing mac address change of the wlan interface.
> > >
> > > This patch fix this and update the structure field with the same default
> > > nvs file name that has been used before.
> > >
> > > In addition, some distros hold a default wl1271-nvs.bin in the file
> > > system with a bogus mac address (deadbeef...) that for a wl18xx device
> > > also overrides the mac address that is stored inside the device.
> > > Warn users about this bogus mac address and use a random mac instead
> > 
> > Hmm looks pretty good to me except for one more thing I just noticed.
> > 
> > Why don't you just use the hardware mac address instead of a random
> > mac address on wl18xx device when you see a bogus nvs file?
> > 
> 
> I agree that this would have been better but the problem is that hardware 
> mac address is available for wilink8 onlyWilink6/7 don't have one stored.
> The wlcore code responsible for handling mac address is common code 
> and there is method for detecting between them in this module.

Care to clarify a bit.. Are you saying wilink8 will use the hardware
mac address in case of bogus nvs file?

Regards,

Tony


Re: [v5] wlcore: add missing nvs file name info for wilink8

2017-08-07 Thread Tony Lindgren
* Reizer, Eyal  [170807 00:47]:
> Hi Tony,
> > 
> > * Reizer, Eyal  [170807 00:32]:
> > > The following commits:
> > > c815fde wlcore: spi: Populate config firmware data
> > > d776fc8 wlcore: sdio: Populate config firmware data
> > >
> > > Populated the nvs entry for wilink6 and wilink7 only while it is
> > > still needed for wilink8 as well.
> > > This broke user space backward compatibility when upgrading from older
> > > kernels, as the alternate mac address would not be read from the nvs that
> > is
> > > present in the file system (lib/firmware/ti-connectivity/wl1271-nvs.bin)
> > > causing mac address change of the wlan interface.
> > >
> > > This patch fix this and update the structure field with the same default
> > > nvs file name that has been used before.
> > >
> > > In addition, some distros hold a default wl1271-nvs.bin in the file
> > > system with a bogus mac address (deadbeef...) that for a wl18xx device
> > > also overrides the mac address that is stored inside the device.
> > > Warn users about this bogus mac address and use a random mac instead
> > 
> > Hmm looks pretty good to me except for one more thing I just noticed.
> > 
> > Why don't you just use the hardware mac address instead of a random
> > mac address on wl18xx device when you see a bogus nvs file?
> > 
> 
> I agree that this would have been better but the problem is that hardware 
> mac address is available for wilink8 onlyWilink6/7 don't have one stored.
> The wlcore code responsible for handling mac address is common code 
> and there is method for detecting between them in this module.

Care to clarify a bit.. Are you saying wilink8 will use the hardware
mac address in case of bogus nvs file?

Regards,

Tony


Re: [PATCH] gpio: uniphier: add UniPhier GPIO controller driver

2017-08-07 Thread Keerthy


On Tuesday 08 August 2017 06:36 AM, Masahiro Yamada wrote:
> Hi Linus,
> 
> 2017-08-08 0:37 GMT+09:00 Linus Walleij :
>> On Mon, Aug 7, 2017 at 3:50 PM, Masahiro Yamada
>>  wrote:
>>
>>> Adding "interrupts" property in DT causes
>>> of_pupulate_default_populate() to assign virtual IRQ numbers
>>> before driver probing.  So it does not work well with IRQ domain hierarchy.
>>
>> I think I heard some noise about this the week before.
>>
>>> For pinctrl/stm32/pinctrl-stm32.c,
>>> I do not see "interrupts", so it just straight maps the irq numbers.
>>
>> I think OMAP and DaVinci does someting similar too. This is from a recent
>> DaVinci patch from Keerthy:
>>
>> +Example for 66AK2G:
>> +
>> +gpio0: gpio@2603000 {
>> +   compatible = "ti,k2g-gpio", "ti,keystone-gpio";
>> +   reg = <0x02603000 0x100>;
>> +   gpio-controller;
>> +   #gpio-cells = <2>;
>> +   interrupts = ,
>> +   ,
>> +   ,
>> +   ,
>> +   ,
>> +   ,
>> +   ,
>> +   ,
>> +   ;
>> +   interrupt-controller;
>> +   #interrupt-cells = <2>;
>> +   ti,ngpio = <144>;
>> +   ti,davinci-gpio-unbanked = <0>;
>> +   clocks = <_clks 0x001b 0x0>;
>> +   clock-names = "gpio";
>> +};
>>
>>
>> That looks fairly similar.
>>
> 
> I do not think so.
> 
> 
> I do not see .alloc hook in drivers/gpio/gpio-davinci.c
> so this driver is unrelated to IRQ domain hierarchy.

Hi Masahiro,

Yes CONFIG_IRQ_DOMAIN_HIERARCHY is not enabled in keystone_defconfig or
davinci_all_defconfig.

Regards,
Keerthy

> 
> 
> 
> 
> 
> 


Re: [PATCH] gpio: uniphier: add UniPhier GPIO controller driver

2017-08-07 Thread Keerthy


On Tuesday 08 August 2017 06:36 AM, Masahiro Yamada wrote:
> Hi Linus,
> 
> 2017-08-08 0:37 GMT+09:00 Linus Walleij :
>> On Mon, Aug 7, 2017 at 3:50 PM, Masahiro Yamada
>>  wrote:
>>
>>> Adding "interrupts" property in DT causes
>>> of_pupulate_default_populate() to assign virtual IRQ numbers
>>> before driver probing.  So it does not work well with IRQ domain hierarchy.
>>
>> I think I heard some noise about this the week before.
>>
>>> For pinctrl/stm32/pinctrl-stm32.c,
>>> I do not see "interrupts", so it just straight maps the irq numbers.
>>
>> I think OMAP and DaVinci does someting similar too. This is from a recent
>> DaVinci patch from Keerthy:
>>
>> +Example for 66AK2G:
>> +
>> +gpio0: gpio@2603000 {
>> +   compatible = "ti,k2g-gpio", "ti,keystone-gpio";
>> +   reg = <0x02603000 0x100>;
>> +   gpio-controller;
>> +   #gpio-cells = <2>;
>> +   interrupts = ,
>> +   ,
>> +   ,
>> +   ,
>> +   ,
>> +   ,
>> +   ,
>> +   ,
>> +   ;
>> +   interrupt-controller;
>> +   #interrupt-cells = <2>;
>> +   ti,ngpio = <144>;
>> +   ti,davinci-gpio-unbanked = <0>;
>> +   clocks = <_clks 0x001b 0x0>;
>> +   clock-names = "gpio";
>> +};
>>
>>
>> That looks fairly similar.
>>
> 
> I do not think so.
> 
> 
> I do not see .alloc hook in drivers/gpio/gpio-davinci.c
> so this driver is unrelated to IRQ domain hierarchy.

Hi Masahiro,

Yes CONFIG_IRQ_DOMAIN_HIERARCHY is not enabled in keystone_defconfig or
davinci_all_defconfig.

Regards,
Keerthy

> 
> 
> 
> 
> 
> 


Re: [PATCH 13/18] power: supply: bq24190_charger: Export 5V boost converter as regulator

2017-08-07 Thread Tony Lindgren
* Hans de Goede  [170806 05:37]:
> Register the 5V boost converter as a regulator named
> "regulator-bq24190-usb-vbus". Note the name includes "bq24190" because
> the bq24190 family is also used on ACPI devices where there are no
> device-tree phandles, so regulator_get will fallback to the name and thus
> it must be unique on the system.

Nice, this makes VBUS easy to use for USB PHY drivers :)

Tony


Re: [PATCH 13/18] power: supply: bq24190_charger: Export 5V boost converter as regulator

2017-08-07 Thread Tony Lindgren
* Hans de Goede  [170806 05:37]:
> Register the 5V boost converter as a regulator named
> "regulator-bq24190-usb-vbus". Note the name includes "bq24190" because
> the bq24190 family is also used on ACPI devices where there are no
> device-tree phandles, so regulator_get will fallback to the name and thus
> it must be unique on the system.

Nice, this makes VBUS easy to use for USB PHY drivers :)

Tony


[PATCH] usb: imx21-hcd: fix error return code in imx21_probe()

2017-08-07 Thread Gustavo A. R. Silva
platform_get_irq() returns an error code, but the imx21-hcd driver
ignores it and always returns -ENXIO. This is not correct, and
prevents -EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print error message and propagate the return value of platform_get_irq
on failure.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/usb/host/imx21-hcd.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/imx21-hcd.c b/drivers/usb/host/imx21-hcd.c
index f542045..e25d72e 100644
--- a/drivers/usb/host/imx21-hcd.c
+++ b/drivers/usb/host/imx21-hcd.c
@@ -1849,8 +1849,10 @@ static int imx21_probe(struct platform_device *pdev)
if (!res)
return -ENODEV;
irq = platform_get_irq(pdev, 0);
-   if (irq < 0)
-   return -ENXIO;
+   if (irq < 0) {
+   dev_err(>dev, "Failed to get IRQ: %d\n", irq);
+   return irq;
+   }
 
hcd = usb_create_hcd(_hc_driver,
>dev, dev_name(>dev));
-- 
2.5.0



Re: [RESEND PATCH] bcache: Don't reinvent the wheel but use existing llist API

2017-08-07 Thread Byungchul Park
On Mon, Aug 07, 2017 at 06:18:35PM +0800, Coly Li wrote:
> On 2017/8/7 下午4:38, Byungchul Park wrote:
> > Although llist provides proper APIs, they are not used. Make them used.
> > 
> > Signed-off-by: Byungchul Park  Only have a question about why not using llist_for_each_entry(), it's

Hello,

The reason is to keep the original logic unchanged. The logic already
does as if it's the safe version against removal.

> still OK with llist_for_each_entry_safe(). The rested part is good to me.
> 
> Acked-by: Coly Li 
> 
> > ---
> >  drivers/md/bcache/closure.c | 17 +++--
> >  1 file changed, 3 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
> > index 864e673..1841d03 100644
> > --- a/drivers/md/bcache/closure.c
> > +++ b/drivers/md/bcache/closure.c
> > @@ -64,27 +64,16 @@ void closure_put(struct closure *cl)
> >  void __closure_wake_up(struct closure_waitlist *wait_list)
> >  {
> > struct llist_node *list;
> > -   struct closure *cl;
> > +   struct closure *cl, *t;
> > struct llist_node *reverse = NULL;
> >  
> > list = llist_del_all(_list->list);
> >  
> > /* We first reverse the list to preserve FIFO ordering and fairness */
> > -
> > -   while (list) {
> > -   struct llist_node *t = list;
> > -   list = llist_next(list);
> > -
> > -   t->next = reverse;
> > -   reverse = t;
> > -   }
> > +   reverse = llist_reverse_order(list);
> >  
> > /* Then do the wakeups */
> > -
> > -   while (reverse) {
> > -   cl = container_of(reverse, struct closure, list);
> > -   reverse = llist_next(reverse);
> > -
> > +   llist_for_each_entry_safe(cl, t, reverse, list) {
> 
> Just wondering why not using llist_for_each_entry(), or you use the
> _safe version on purpose ?

If I use llist_for_each_entry(), then it would change the original
behavior. Is it ok?

Thank you,
Byungchul



[PATCH] usb: imx21-hcd: fix error return code in imx21_probe()

2017-08-07 Thread Gustavo A. R. Silva
platform_get_irq() returns an error code, but the imx21-hcd driver
ignores it and always returns -ENXIO. This is not correct, and
prevents -EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print error message and propagate the return value of platform_get_irq
on failure.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/usb/host/imx21-hcd.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/imx21-hcd.c b/drivers/usb/host/imx21-hcd.c
index f542045..e25d72e 100644
--- a/drivers/usb/host/imx21-hcd.c
+++ b/drivers/usb/host/imx21-hcd.c
@@ -1849,8 +1849,10 @@ static int imx21_probe(struct platform_device *pdev)
if (!res)
return -ENODEV;
irq = platform_get_irq(pdev, 0);
-   if (irq < 0)
-   return -ENXIO;
+   if (irq < 0) {
+   dev_err(>dev, "Failed to get IRQ: %d\n", irq);
+   return irq;
+   }
 
hcd = usb_create_hcd(_hc_driver,
>dev, dev_name(>dev));
-- 
2.5.0



Re: [RESEND PATCH] bcache: Don't reinvent the wheel but use existing llist API

2017-08-07 Thread Byungchul Park
On Mon, Aug 07, 2017 at 06:18:35PM +0800, Coly Li wrote:
> On 2017/8/7 下午4:38, Byungchul Park wrote:
> > Although llist provides proper APIs, they are not used. Make them used.
> > 
> > Signed-off-by: Byungchul Park  Only have a question about why not using llist_for_each_entry(), it's

Hello,

The reason is to keep the original logic unchanged. The logic already
does as if it's the safe version against removal.

> still OK with llist_for_each_entry_safe(). The rested part is good to me.
> 
> Acked-by: Coly Li 
> 
> > ---
> >  drivers/md/bcache/closure.c | 17 +++--
> >  1 file changed, 3 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
> > index 864e673..1841d03 100644
> > --- a/drivers/md/bcache/closure.c
> > +++ b/drivers/md/bcache/closure.c
> > @@ -64,27 +64,16 @@ void closure_put(struct closure *cl)
> >  void __closure_wake_up(struct closure_waitlist *wait_list)
> >  {
> > struct llist_node *list;
> > -   struct closure *cl;
> > +   struct closure *cl, *t;
> > struct llist_node *reverse = NULL;
> >  
> > list = llist_del_all(_list->list);
> >  
> > /* We first reverse the list to preserve FIFO ordering and fairness */
> > -
> > -   while (list) {
> > -   struct llist_node *t = list;
> > -   list = llist_next(list);
> > -
> > -   t->next = reverse;
> > -   reverse = t;
> > -   }
> > +   reverse = llist_reverse_order(list);
> >  
> > /* Then do the wakeups */
> > -
> > -   while (reverse) {
> > -   cl = container_of(reverse, struct closure, list);
> > -   reverse = llist_next(reverse);
> > -
> > +   llist_for_each_entry_safe(cl, t, reverse, list) {
> 
> Just wondering why not using llist_for_each_entry(), or you use the
> _safe version on purpose ?

If I use llist_for_each_entry(), then it would change the original
behavior. Is it ok?

Thank you,
Byungchul



Re: [PATCH 3.18 00/50] 3.18.64-stable review

2017-08-07 Thread Guenter Roeck

On 08/07/2017 12:34 PM, Greg Kroah-Hartman wrote:

On Sat, Aug 05, 2017 at 12:11:19PM -0700, Guenter Roeck wrote:

On 08/05/2017 08:43 AM, Greg Kroah-Hartman wrote:

On Sat, Aug 05, 2017 at 08:02:17AM +0200, Willy Tarreau wrote:

On Sat, Aug 05, 2017 at 07:55:11AM +0200, Willy Tarreau wrote:

On Fri, Aug 04, 2017 at 07:51:07PM -0700, Greg Kroah-Hartman wrote:

On Fri, Aug 04, 2017 at 07:46:57PM -0700, Greg Kroah-Hartman wrote:

On Fri, Aug 04, 2017 at 06:43:50PM -0700, Guenter Roeck wrote:

On 08/04/2017 04:15 PM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 3.18.64 release.
There are 50 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun Aug  6 23:15:34 UTC 2017.
Anything received after that time might be too late.



Preliminary:

Lots of

lib/string.c:31:32: fatal error: asm/word-at-a-time.h

affecting several architectures.

alpha:

lib/string.c:217:4: error: implicit declaration of function 'zero_bytemask'


Hm, I think I need to add c753bf34c94e ("word-at-a-time.h: support
zero_bytemask() on alpha and tile"), right?  Any other arches failing?


Hm, that doesn't work, do we care about tile? :)

Let me see how deep this hole is, I just wanted to get strscpy into 3.18
to fix a bug...


I suspect you'll need this one which came as part of the strscpy() series
between 4.2 and 4.3 (though I have not tested) :

commit a6e2f029ae34f41adb6ae3812c32c5d326e1abd2
Author: Chris Metcalf 
Date:   Wed Apr 29 12:48:40 2015 -0400

  Make asm/word-at-a-time.h available on all architectures
  Added the x86 implementation of word-at-a-time to the
  generic version, which previously only supported big-endian.
  (...)


OK I just applied it on top of 3.18.64-rc1 and it allowed me to build mips
which previously broke. It will not apply as-is, you'll need to drop the
change for arch/nios2/include/asm/Kbuild, and after that it's OK.


Thanks for that, I've now queued that patch up.



Better, but there are still some errors.

powerpc:
lib/string.c: In function 'strscpy':
lib/string.c:217:4: error: implicit declaration of function 'zero_bytemask'

tile:
arch/tile/gxio/mpipe.c:46:15: error: conflicting types for 'strscpy'
include/linux/string.h:29:22: note: previous declaration of 'strscpy' was here

Missing patches:

7a5692e6e533 ("arch/powerpc: provide zero_bytemask() for big-endian")
30059d494a72 ("tile: use global strscpy() rather than private copy")


Thanks for these, I'll queue them up.  And do a -rc2 in a few days as
this was a mess...



Getting there. With v3.18.63-62-gc7d9ae0:

Build results:
total: 136 pass: 136 fail: 0
Qemu test results:
total: 111 pass: 111 fail: 0

Details are available at http://kerneltests.org/builders.

Guenter


Re: [PATCH 3.18 00/50] 3.18.64-stable review

2017-08-07 Thread Guenter Roeck

On 08/07/2017 12:34 PM, Greg Kroah-Hartman wrote:

On Sat, Aug 05, 2017 at 12:11:19PM -0700, Guenter Roeck wrote:

On 08/05/2017 08:43 AM, Greg Kroah-Hartman wrote:

On Sat, Aug 05, 2017 at 08:02:17AM +0200, Willy Tarreau wrote:

On Sat, Aug 05, 2017 at 07:55:11AM +0200, Willy Tarreau wrote:

On Fri, Aug 04, 2017 at 07:51:07PM -0700, Greg Kroah-Hartman wrote:

On Fri, Aug 04, 2017 at 07:46:57PM -0700, Greg Kroah-Hartman wrote:

On Fri, Aug 04, 2017 at 06:43:50PM -0700, Guenter Roeck wrote:

On 08/04/2017 04:15 PM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 3.18.64 release.
There are 50 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun Aug  6 23:15:34 UTC 2017.
Anything received after that time might be too late.



Preliminary:

Lots of

lib/string.c:31:32: fatal error: asm/word-at-a-time.h

affecting several architectures.

alpha:

lib/string.c:217:4: error: implicit declaration of function 'zero_bytemask'


Hm, I think I need to add c753bf34c94e ("word-at-a-time.h: support
zero_bytemask() on alpha and tile"), right?  Any other arches failing?


Hm, that doesn't work, do we care about tile? :)

Let me see how deep this hole is, I just wanted to get strscpy into 3.18
to fix a bug...


I suspect you'll need this one which came as part of the strscpy() series
between 4.2 and 4.3 (though I have not tested) :

commit a6e2f029ae34f41adb6ae3812c32c5d326e1abd2
Author: Chris Metcalf 
Date:   Wed Apr 29 12:48:40 2015 -0400

  Make asm/word-at-a-time.h available on all architectures
  Added the x86 implementation of word-at-a-time to the
  generic version, which previously only supported big-endian.
  (...)


OK I just applied it on top of 3.18.64-rc1 and it allowed me to build mips
which previously broke. It will not apply as-is, you'll need to drop the
change for arch/nios2/include/asm/Kbuild, and after that it's OK.


Thanks for that, I've now queued that patch up.



Better, but there are still some errors.

powerpc:
lib/string.c: In function 'strscpy':
lib/string.c:217:4: error: implicit declaration of function 'zero_bytemask'

tile:
arch/tile/gxio/mpipe.c:46:15: error: conflicting types for 'strscpy'
include/linux/string.h:29:22: note: previous declaration of 'strscpy' was here

Missing patches:

7a5692e6e533 ("arch/powerpc: provide zero_bytemask() for big-endian")
30059d494a72 ("tile: use global strscpy() rather than private copy")


Thanks for these, I'll queue them up.  And do a -rc2 in a few days as
this was a mess...



Getting there. With v3.18.63-62-gc7d9ae0:

Build results:
total: 136 pass: 136 fail: 0
Qemu test results:
total: 111 pass: 111 fail: 0

Details are available at http://kerneltests.org/builders.

Guenter


[PATCH] f2fs: fix some cases with reserved_blocks

2017-08-07 Thread Yunlong Song
Signed-off-by: Yunlong Song 
---
 fs/f2fs/recovery.c | 3 ++-
 fs/f2fs/super.c| 9 +
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index a3d0261..e288319 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -51,7 +51,8 @@ bool space_for_roll_forward(struct f2fs_sb_info *sbi)
 {
s64 nalloc = percpu_counter_sum_positive(>alloc_valid_block_count);
 
-   if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
+   if (sbi->last_valid_block_count + nalloc +
+   sbi->reserved_blocks > sbi->user_block_count)
return false;
return true;
 }
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 4c1bdcb..c644bf5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -946,6 +946,7 @@ static int f2fs_statfs(struct dentry *dentry, struct 
kstatfs *buf)
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
block_t total_count, user_block_count, start_count, ovp_count;
u64 avail_node_count;
+   block_t avail_user_block_count;
 
total_count = le64_to_cpu(sbi->raw_super->block_count);
user_block_count = sbi->user_block_count;
@@ -953,16 +954,16 @@ static int f2fs_statfs(struct dentry *dentry, struct 
kstatfs *buf)
ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
buf->f_type = F2FS_SUPER_MAGIC;
buf->f_bsize = sbi->blocksize;
+   avail_user_block_count = user_block_count - sbi->reserved_blocks;
 
buf->f_blocks = total_count - start_count;
buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
-   buf->f_bavail = user_block_count - valid_user_blocks(sbi) -
-   sbi->reserved_blocks;
+   buf->f_bavail = avail_user_block_count - valid_user_blocks(sbi);
 
avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
 
-   if (avail_node_count > user_block_count) {
-   buf->f_files = user_block_count;
+   if (avail_node_count > avail_user_block_count) {
+   buf->f_files = avail_user_block_count;
buf->f_ffree = buf->f_bavail;
} else {
buf->f_files = avail_node_count;
-- 
1.8.5.2



[PATCH] f2fs: fix some cases with reserved_blocks

2017-08-07 Thread Yunlong Song
Signed-off-by: Yunlong Song 
---
 fs/f2fs/recovery.c | 3 ++-
 fs/f2fs/super.c| 9 +
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index a3d0261..e288319 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -51,7 +51,8 @@ bool space_for_roll_forward(struct f2fs_sb_info *sbi)
 {
s64 nalloc = percpu_counter_sum_positive(>alloc_valid_block_count);
 
-   if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
+   if (sbi->last_valid_block_count + nalloc +
+   sbi->reserved_blocks > sbi->user_block_count)
return false;
return true;
 }
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 4c1bdcb..c644bf5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -946,6 +946,7 @@ static int f2fs_statfs(struct dentry *dentry, struct 
kstatfs *buf)
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
block_t total_count, user_block_count, start_count, ovp_count;
u64 avail_node_count;
+   block_t avail_user_block_count;
 
total_count = le64_to_cpu(sbi->raw_super->block_count);
user_block_count = sbi->user_block_count;
@@ -953,16 +954,16 @@ static int f2fs_statfs(struct dentry *dentry, struct 
kstatfs *buf)
ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
buf->f_type = F2FS_SUPER_MAGIC;
buf->f_bsize = sbi->blocksize;
+   avail_user_block_count = user_block_count - sbi->reserved_blocks;
 
buf->f_blocks = total_count - start_count;
buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
-   buf->f_bavail = user_block_count - valid_user_blocks(sbi) -
-   sbi->reserved_blocks;
+   buf->f_bavail = avail_user_block_count - valid_user_blocks(sbi);
 
avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
 
-   if (avail_node_count > user_block_count) {
-   buf->f_files = user_block_count;
+   if (avail_node_count > avail_user_block_count) {
+   buf->f_files = avail_user_block_count;
buf->f_ffree = buf->f_bavail;
} else {
buf->f_files = avail_node_count;
-- 
1.8.5.2



Re: Possible race in pc87413_wdt.ko

2017-08-07 Thread Guenter Roeck

On 08/07/2017 06:22 AM, Anton Volkov wrote:

Hello.

While searching for races in the Linux kernel I've come across 
"drivers/watchdog/pc87413_wdt.ko" module. Here is a question that I came up 
with while analyzing results. Lines are given using the info from Linux v4.12.

Consider the following case:

Thread 1:  Thread 2:
pc87413_init
misc_register(_miscdev)
-> pc87413_get_swc_base_addr   pc87413_open
-> pc87413_refresh
   -> pc87413_swc_bank3
  swc_base_addr = ...  
  (pc87413_wdt.c: line 133)(pc87413_wdt.c: line 146)

So in this case preemptive registration of the device leads to a possibility of 
race between the initialization process and a callback to the registered device.

Is this race feasible from your point of view? And if it is, is it possible to 
move the device registration a bit further down in the pc87413_init function?



Yes, the race is feasible, and it is possible to move the device registration 
function
(though the preferred solution would be to convert the driver to use the 
watchdog
subsystem). The code looks pretty bad as written.

Just not sure if it is worth bothering about it. I suspect no on is using that 
driver
anymore (the datasheet is from 2001). Might as well just declare it obsolete and
wait for someone to scream.

Guenter


Re: Possible race in pc87413_wdt.ko

2017-08-07 Thread Guenter Roeck

On 08/07/2017 06:22 AM, Anton Volkov wrote:

Hello.

While searching for races in the Linux kernel I've come across 
"drivers/watchdog/pc87413_wdt.ko" module. Here is a question that I came up 
with while analyzing results. Lines are given using the info from Linux v4.12.

Consider the following case:

Thread 1:  Thread 2:
pc87413_init
misc_register(_miscdev)
-> pc87413_get_swc_base_addr   pc87413_open
-> pc87413_refresh
   -> pc87413_swc_bank3
  swc_base_addr = ...  
  (pc87413_wdt.c: line 133)(pc87413_wdt.c: line 146)

So in this case preemptive registration of the device leads to a possibility of 
race between the initialization process and a callback to the registered device.

Is this race feasible from your point of view? And if it is, is it possible to 
move the device registration a bit further down in the pc87413_init function?



Yes, the race is feasible, and it is possible to move the device registration 
function
(though the preferred solution would be to convert the driver to use the 
watchdog
subsystem). The code looks pretty bad as written.

Just not sure if it is worth bothering about it. I suspect no on is using that 
driver
anymore (the datasheet is from 2001). Might as well just declare it obsolete and
wait for someone to scream.

Guenter


Re: [PATCH -mm] mm: Clear to access sub-page last when clearing huge page

2017-08-07 Thread Mike Kravetz
On 08/07/2017 12:21 AM, Huang, Ying wrote:
> From: Huang Ying 
> 
> Huge page helps to reduce TLB miss rate, but it has higher cache
> footprint, sometimes this may cause some issue.  For example, when
> clearing huge page on x86_64 platform, the cache footprint is 2M.  But
> on a Xeon E5 v3 2699 CPU, there are 18 cores, 36 threads, and only 45M
> LLC (last level cache).  That is, in average, there are 2.5M LLC for
> each core and 1.25M LLC for each thread.  If the cache pressure is
> heavy when clearing the huge page, and we clear the huge page from the
> begin to the end, it is possible that the begin of huge page is
> evicted from the cache after we finishing clearing the end of the huge
> page.  And it is possible for the application to access the begin of
> the huge page after clearing the huge page.
> 
> To help the above situation, in this patch, when we clear a huge page,
> the order to clear sub-pages is changed.  In quite some situation, we
> can get the address that the application will access after we clear
> the huge page, for example, in a page fault handler.  Instead of
> clearing the huge page from begin to end, we will clear the sub-pages
> farthest from the the sub-page to access firstly, and clear the
> sub-page to access last.  This will make the sub-page to access most
> cache-hot and sub-pages around it more cache-hot too.  If we cannot
> know the address the application will access, the begin of the huge
> page is assumed to be the the address the application will access.
> 
> With this patch, the throughput increases ~28.3% in vm-scalability
> anon-w-seq test case with 72 processes on a 2 socket Xeon E5 v3 2699
> system (36 cores, 72 threads).  The test case creates 72 processes,
> each process mmap a big anonymous memory area and writes to it from
> the begin to the end.  For each process, other processes could be seen
> as other workload which generates heavy cache pressure.  At the same
> time, the cache miss rate reduced from ~33.4% to ~31.7%, the
> IPC (instruction per cycle) increased from 0.56 to 0.74, and the time
> spent in user space is reduced ~7.9%
> 
> Thanks Andi Kleen to propose to use address to access to determine the
> order of sub-pages to clear.
> 
> The hugetlbfs access address could be improved, will do that in
> another patch.

hugetlb_fault masks off the actual faulting address with,
address &= huge_page_mask(h);
before calling hugetlb_no_page.

But, we could pass down the actual (unmasked) address to take advantage
of this optimization for hugetlb faults as well.  hugetlb_fault is the
only caller of hugetlb_no_page, so this should be pretty straight forward.

Were you thinking of additional improvements?
-- 
Mike Kravetz


Re: [PATCH -mm] mm: Clear to access sub-page last when clearing huge page

2017-08-07 Thread Mike Kravetz
On 08/07/2017 12:21 AM, Huang, Ying wrote:
> From: Huang Ying 
> 
> Huge page helps to reduce TLB miss rate, but it has higher cache
> footprint, sometimes this may cause some issue.  For example, when
> clearing huge page on x86_64 platform, the cache footprint is 2M.  But
> on a Xeon E5 v3 2699 CPU, there are 18 cores, 36 threads, and only 45M
> LLC (last level cache).  That is, in average, there are 2.5M LLC for
> each core and 1.25M LLC for each thread.  If the cache pressure is
> heavy when clearing the huge page, and we clear the huge page from the
> begin to the end, it is possible that the begin of huge page is
> evicted from the cache after we finishing clearing the end of the huge
> page.  And it is possible for the application to access the begin of
> the huge page after clearing the huge page.
> 
> To help the above situation, in this patch, when we clear a huge page,
> the order to clear sub-pages is changed.  In quite some situation, we
> can get the address that the application will access after we clear
> the huge page, for example, in a page fault handler.  Instead of
> clearing the huge page from begin to end, we will clear the sub-pages
> farthest from the the sub-page to access firstly, and clear the
> sub-page to access last.  This will make the sub-page to access most
> cache-hot and sub-pages around it more cache-hot too.  If we cannot
> know the address the application will access, the begin of the huge
> page is assumed to be the the address the application will access.
> 
> With this patch, the throughput increases ~28.3% in vm-scalability
> anon-w-seq test case with 72 processes on a 2 socket Xeon E5 v3 2699
> system (36 cores, 72 threads).  The test case creates 72 processes,
> each process mmap a big anonymous memory area and writes to it from
> the begin to the end.  For each process, other processes could be seen
> as other workload which generates heavy cache pressure.  At the same
> time, the cache miss rate reduced from ~33.4% to ~31.7%, the
> IPC (instruction per cycle) increased from 0.56 to 0.74, and the time
> spent in user space is reduced ~7.9%
> 
> Thanks Andi Kleen to propose to use address to access to determine the
> order of sub-pages to clear.
> 
> The hugetlbfs access address could be improved, will do that in
> another patch.

hugetlb_fault masks off the actual faulting address with,
address &= huge_page_mask(h);
before calling hugetlb_no_page.

But, we could pass down the actual (unmasked) address to take advantage
of this optimization for hugetlb faults as well.  hugetlb_fault is the
only caller of hugetlb_no_page, so this should be pretty straight forward.

Were you thinking of additional improvements?
-- 
Mike Kravetz


[PATCH v2 3/4] KVM: s390: implements the kvm_arch_vcpu_in_kernel()

2017-08-07 Thread Longpeng(Mike)
This implements the kvm_arch_vcpu_in_kernel() for s390.

Signed-off-by: Longpeng(Mike) 
---
 arch/s390/kvm/kvm-s390.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0b0c689..e46177b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2449,7 +2449,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 
 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
 {
-   return false;
+   return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
 }
 EXPORT_SYMBOL_GPL(kvm_arch_vcpu_in_kernel);
 
-- 
1.8.3.1




[PATCH v2 2/4] KVM: X86: implement the logic for spinlock optimization

2017-08-07 Thread Longpeng(Mike)
1. Implements the kvm_arch_vcpu_in_kernel(), because get_cpl requires
vcpu_load, so we must cache the result(whether the vcpu was preempted
when its cpl=0) in kvm_vcpu_arch.

2. Add ->spin_in_kernel hook, because we can get benefit from VMX.

Signed-off-by: Longpeng(Mike) 
---
 arch/x86/include/asm/kvm_host.h |  5 +
 arch/x86/kvm/hyperv.c   |  2 +-
 arch/x86/kvm/svm.c  |  8 +++-
 arch/x86/kvm/vmx.c  | 16 +++-
 arch/x86/kvm/x86.c  |  7 ++-
 5 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 87ac4fb..d2b2d57 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -688,6 +688,9 @@ struct kvm_vcpu_arch {
 
/* GPA available (AMD only) */
bool gpa_available;
+
+   /* be preempted when it's in kernel-mode(cpl=0) */
+   bool preempted_in_kernel;
 };
 
 struct kvm_lpage_info {
@@ -1057,6 +1060,8 @@ struct kvm_x86_ops {
void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
 
void (*setup_mce)(struct kvm_vcpu *vcpu);
+
+   bool (*spin_in_kernel)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index cd0e6e6..dec5e8a 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1268,7 +1268,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 
switch (code) {
case HVCALL_NOTIFY_LONG_SPIN_WAIT:
-   kvm_vcpu_on_spin(vcpu, kvm_arch_vcpu_in_kernel(vcpu));
+   kvm_vcpu_on_spin(vcpu, kvm_x86_ops->spin_in_kernel(vcpu));
break;
case HVCALL_POST_MESSAGE:
case HVCALL_SIGNAL_EVENT:
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e6ed24e..ccb6df7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3751,7 +3751,7 @@ static int pause_interception(struct vcpu_svm *svm)
 {
struct kvm_vcpu *vcpu = &(svm->vcpu);
 
-   kvm_vcpu_on_spin(vcpu, kvm_arch_vcpu_in_kernel(vcpu));
+   kvm_vcpu_on_spin(vcpu, kvm_x86_ops->spin_in_kernel(vcpu));
return 1;
 }
 
@@ -5364,6 +5364,11 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
vcpu->arch.mcg_cap &= 0x1ff;
 }
 
+static bool svm_spin_in_kernel(struct kvm_vcpu *vcpu)
+{
+   return svm_get_cpl(vcpu) == 0;
+}
+
 static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -5476,6 +5481,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
.deliver_posted_interrupt = svm_deliver_avic_intr,
.update_pi_irte = svm_update_pi_irte,
.setup_mce = svm_setup_mce,
+   .spin_in_kernel = svm_spin_in_kernel,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9d6223a..297a158 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6761,7 +6761,8 @@ static int handle_pause(struct kvm_vcpu *vcpu)
if (ple_gap)
grow_ple_window(vcpu);
 
-   kvm_vcpu_on_spin(vcpu, kvm_arch_vcpu_in_kernel(vcpu));
+   /* See comments in vmx_spin_in_kernel() */
+   kvm_vcpu_on_spin(vcpu, true);
return kvm_skip_emulated_instruction(vcpu);
 }
 
@@ -11636,6 +11637,17 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
~FEATURE_CONTROL_LMCE;
 }
 
+static bool vmx_spin_in_kernel(struct kvm_vcpu *vcpu)
+{
+   /*
+* Intel sdm vol3 ch-25.1.3 says: The “PAUSE-loop exiting”
+* VM-execution control is ignored if CPL > 0. OTOH, KVM
+* never set PAUSE_EXITING and just set PLE if supported,
+* so the vcpu must be CPL=0 if it gets a PAUSE exit.
+*/
+   return true;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -11763,6 +11775,8 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
 #endif
 
.setup_mce = vmx_setup_mce,
+
+   .spin_in_kernel = vmx_spin_in_kernel,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4430be6..28299b9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2881,6 +2881,10 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu 
*vcpu)
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
int idx;
+
+   if (vcpu->preempted)
+   vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu);
+
/*
 * Disable page faults because we're in atomic context here.
 * kvm_write_guest_offset_cached() would call might_fault()
@@ -7992,6 +7996,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
kvm_pmu_init(vcpu);
 
vcpu->arch.pending_external_vector = -1;
+   vcpu->arch.preempted_in_kernel = false;
 
kvm_hv_vcpu_init(vcpu);
 
@@ -8441,7 +8446,7 @@ int 

[PATCH v2 3/4] KVM: s390: implements the kvm_arch_vcpu_in_kernel()

2017-08-07 Thread Longpeng(Mike)
This implements the kvm_arch_vcpu_in_kernel() for s390.

Signed-off-by: Longpeng(Mike) 
---
 arch/s390/kvm/kvm-s390.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0b0c689..e46177b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2449,7 +2449,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 
 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
 {
-   return false;
+   return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
 }
 EXPORT_SYMBOL_GPL(kvm_arch_vcpu_in_kernel);
 
-- 
1.8.3.1




[PATCH v2 2/4] KVM: X86: implement the logic for spinlock optimization

2017-08-07 Thread Longpeng(Mike)
1. Implements the kvm_arch_vcpu_in_kernel(), because get_cpl requires
vcpu_load, so we must cache the result(whether the vcpu was preempted
when its cpl=0) in kvm_vcpu_arch.

2. Add ->spin_in_kernel hook, because we can get benefit from VMX.

Signed-off-by: Longpeng(Mike) 
---
 arch/x86/include/asm/kvm_host.h |  5 +
 arch/x86/kvm/hyperv.c   |  2 +-
 arch/x86/kvm/svm.c  |  8 +++-
 arch/x86/kvm/vmx.c  | 16 +++-
 arch/x86/kvm/x86.c  |  7 ++-
 5 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 87ac4fb..d2b2d57 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -688,6 +688,9 @@ struct kvm_vcpu_arch {
 
/* GPA available (AMD only) */
bool gpa_available;
+
+   /* be preempted when it's in kernel-mode(cpl=0) */
+   bool preempted_in_kernel;
 };
 
 struct kvm_lpage_info {
@@ -1057,6 +1060,8 @@ struct kvm_x86_ops {
void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
 
void (*setup_mce)(struct kvm_vcpu *vcpu);
+
+   bool (*spin_in_kernel)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index cd0e6e6..dec5e8a 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1268,7 +1268,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 
switch (code) {
case HVCALL_NOTIFY_LONG_SPIN_WAIT:
-   kvm_vcpu_on_spin(vcpu, kvm_arch_vcpu_in_kernel(vcpu));
+   kvm_vcpu_on_spin(vcpu, kvm_x86_ops->spin_in_kernel(vcpu));
break;
case HVCALL_POST_MESSAGE:
case HVCALL_SIGNAL_EVENT:
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e6ed24e..ccb6df7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3751,7 +3751,7 @@ static int pause_interception(struct vcpu_svm *svm)
 {
struct kvm_vcpu *vcpu = &(svm->vcpu);
 
-   kvm_vcpu_on_spin(vcpu, kvm_arch_vcpu_in_kernel(vcpu));
+   kvm_vcpu_on_spin(vcpu, kvm_x86_ops->spin_in_kernel(vcpu));
return 1;
 }
 
@@ -5364,6 +5364,11 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
vcpu->arch.mcg_cap &= 0x1ff;
 }
 
+static bool svm_spin_in_kernel(struct kvm_vcpu *vcpu)
+{
+   return svm_get_cpl(vcpu) == 0;
+}
+
 static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -5476,6 +5481,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
.deliver_posted_interrupt = svm_deliver_avic_intr,
.update_pi_irte = svm_update_pi_irte,
.setup_mce = svm_setup_mce,
+   .spin_in_kernel = svm_spin_in_kernel,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9d6223a..297a158 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6761,7 +6761,8 @@ static int handle_pause(struct kvm_vcpu *vcpu)
if (ple_gap)
grow_ple_window(vcpu);
 
-   kvm_vcpu_on_spin(vcpu, kvm_arch_vcpu_in_kernel(vcpu));
+   /* See comments in vmx_spin_in_kernel() */
+   kvm_vcpu_on_spin(vcpu, true);
return kvm_skip_emulated_instruction(vcpu);
 }
 
@@ -11636,6 +11637,17 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
~FEATURE_CONTROL_LMCE;
 }
 
+static bool vmx_spin_in_kernel(struct kvm_vcpu *vcpu)
+{
+   /*
+* Intel sdm vol3 ch-25.1.3 says: The “PAUSE-loop exiting”
+* VM-execution control is ignored if CPL > 0. OTOH, KVM
+* never set PAUSE_EXITING and just set PLE if supported,
+* so the vcpu must be CPL=0 if it gets a PAUSE exit.
+*/
+   return true;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -11763,6 +11775,8 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
 #endif
 
.setup_mce = vmx_setup_mce,
+
+   .spin_in_kernel = vmx_spin_in_kernel,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4430be6..28299b9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2881,6 +2881,10 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu 
*vcpu)
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
int idx;
+
+   if (vcpu->preempted)
+   vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu);
+
/*
 * Disable page faults because we're in atomic context here.
 * kvm_write_guest_offset_cached() would call might_fault()
@@ -7992,6 +7996,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
kvm_pmu_init(vcpu);
 
vcpu->arch.pending_external_vector = -1;
+   vcpu->arch.preempted_in_kernel = false;
 
kvm_hv_vcpu_init(vcpu);
 
@@ -8441,7 +8446,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 
 

  1   2   3   4   5   6   7   8   9   10   >