[lkp] [blk] e7b81af035: BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0

2016-04-17 Thread kernel test robot
FYI, we noticed the below changes on

https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git 
wb-buf-throttle
commit e7b81af035ddc1323090d86350627881fcf9b1b0 ("blk-wb: updates")


++++
|   
 | 2adccca124 | e7b81af035 |
++++
| boot_successes
 | 3  | 2  |
| boot_failures 
 | 1  | 6  |
| 
page_allocation_failure:order:#,mode:#(GFP_NOWAIT|__GFP_HIGH|__GFP_COMP|__GFP_NOTRACK)
 | 1  ||
| warn_alloc_failed+0x  
 | 1  ||
| Mem-Info  
 | 1  ||
| backtrace:do_execveat_common  
 | 1  ||
| backtrace:compat_SyS_execve   
 | 1  ||
| backtrace:compat_process_vm_rw
 | 1  ||
| backtrace:compat_SyS_process_vm_writev
 | 1  ||
| BUG:unable_to_handle_kernel   
 | 0  | 6  |
| Oops  
 | 0  | 6  |
| RIP:blk_wb_done   
 | 0  | 6  |
| Kernel_panic-not_syncing:Fatal_exception_in_interrupt 
 | 0  | 6  |
| backtrace:cpu_startup_entry   
 | 0  | 3  |
| backtrace:schedule_preempt_disabled   
 | 0  | 1  |
++++



[   17.486381] FDC 0 is a S82078B
[   17.557276] brd: module loaded
[   17.602156] loop: module loaded
[   17.612220] BUG: unable to handle kernel NULL pointer dereference at 
00a0
[   17.614651] IP: [] blk_wb_done+0x18/0x8e
[   17.616135] PGD 0 
[   17.617105] Oops:  [#1] SMP 
[   17.618343] Modules linked in:
[   17.619459] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 
4.6.0-rc1-00096-ge7b81af #1
[   17.621643] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Debian-1.8.2-1 04/01/2014
[   17.624034] task: 8241d540 ti: 8240 task.ti: 
8240
[   17.626211] RIP: 0010:[]  [] 
blk_wb_done+0x18/0x8e
[   17.628556] RSP: 0018:880038803df8  EFLAGS: 00010046
[   17.629923] RAX: 88003e550008 RBX:  RCX: 88003880cf20
[   17.631540] RDX: ef7bdef7bdef7bdf RSI: 88003e4a RDI: 
[   17.633175] RBP: 880038803e08 R08: e272d5c3 R09: 0004
[   17.634807] R10: 880038803d78 R11: 880038808000 R12: 88003e4a
[   17.636426] R13: e8c07800 R14:  R15: 0001
[   17.638053] FS:  () GS:88003880() 
knlGS:
[   17.640324] CS:  0010 DS:  ES:  CR0: 80050033
[   17.641740] CR2: 00a0 CR3: 02418000 CR4: 06f0
[   17.643381] Stack:
[   17.644273]  88003e4a  880038803e28 
8152fa30
[   17.646792]  88003e4a  880038803e38 
81755005
[   17.649300]  880038803e68 8152fcef  
88003e974548
[   17.651805] Call Trace:
[   17.652740]   
[   17.653108]  [] blk_mq_end_request+0x38/0x6c
[   17.655190]  [] virtblk_request_done+0x5e/0x60
[   17.656660]  [] __blk_mq_complete_request+0x122/0x132
[   17.658222]  [] blk_mq_complete_request+0x1c/0x1e
[   17.659724]  [] virtblk_done+0x74/0xce
[   17.661118]  [] vring_interrupt+0x32/0x39
[   17.662525]  [] handle_irq_event_percpu+0x146/0x3d7
[   17.664060]  [] handle_irq_event+0x38/0x56
[   17.665463]  [] handle_edge_irq+0xd9/0xfb
[   17.666872]  [] handle_irq+0x101/0x109
[   17.668246]  [] do_IRQ+0x85/0x101
[   17.669550]  [] common_interrupt+0x8c/0x8c
[   17.671071]   
[   17.671434]  [] ? native_safe_halt+0x6/0x8
[   17.673485]  [] default_idle+0x60/0x1a8
[   17.674935]  [] arch_cpu_idle+0xf/0x11
[   17.676307]  [] default_idle_call+0x3d/0x44
[   17.677736]  [] cpu_startup_entry+0x218/0x3e6
[   17.679209]  [] rest_init+0x135/0x13b
[   17.680585]  [] 

[lkp] [blk] e7b81af035: BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0

2016-04-17 Thread kernel test robot
FYI, we noticed the below changes on

https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git 
wb-buf-throttle
commit e7b81af035ddc1323090d86350627881fcf9b1b0 ("blk-wb: updates")


++++
|   
 | 2adccca124 | e7b81af035 |
++++
| boot_successes
 | 3  | 2  |
| boot_failures 
 | 1  | 6  |
| 
page_allocation_failure:order:#,mode:#(GFP_NOWAIT|__GFP_HIGH|__GFP_COMP|__GFP_NOTRACK)
 | 1  ||
| warn_alloc_failed+0x  
 | 1  ||
| Mem-Info  
 | 1  ||
| backtrace:do_execveat_common  
 | 1  ||
| backtrace:compat_SyS_execve   
 | 1  ||
| backtrace:compat_process_vm_rw
 | 1  ||
| backtrace:compat_SyS_process_vm_writev
 | 1  ||
| BUG:unable_to_handle_kernel   
 | 0  | 6  |
| Oops  
 | 0  | 6  |
| RIP:blk_wb_done   
 | 0  | 6  |
| Kernel_panic-not_syncing:Fatal_exception_in_interrupt 
 | 0  | 6  |
| backtrace:cpu_startup_entry   
 | 0  | 3  |
| backtrace:schedule_preempt_disabled   
 | 0  | 1  |
++++



[   17.486381] FDC 0 is a S82078B
[   17.557276] brd: module loaded
[   17.602156] loop: module loaded
[   17.612220] BUG: unable to handle kernel NULL pointer dereference at 
00a0
[   17.614651] IP: [] blk_wb_done+0x18/0x8e
[   17.616135] PGD 0 
[   17.617105] Oops:  [#1] SMP 
[   17.618343] Modules linked in:
[   17.619459] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 
4.6.0-rc1-00096-ge7b81af #1
[   17.621643] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Debian-1.8.2-1 04/01/2014
[   17.624034] task: 8241d540 ti: 8240 task.ti: 
8240
[   17.626211] RIP: 0010:[]  [] 
blk_wb_done+0x18/0x8e
[   17.628556] RSP: 0018:880038803df8  EFLAGS: 00010046
[   17.629923] RAX: 88003e550008 RBX:  RCX: 88003880cf20
[   17.631540] RDX: ef7bdef7bdef7bdf RSI: 88003e4a RDI: 
[   17.633175] RBP: 880038803e08 R08: e272d5c3 R09: 0004
[   17.634807] R10: 880038803d78 R11: 880038808000 R12: 88003e4a
[   17.636426] R13: e8c07800 R14:  R15: 0001
[   17.638053] FS:  () GS:88003880() 
knlGS:
[   17.640324] CS:  0010 DS:  ES:  CR0: 80050033
[   17.641740] CR2: 00a0 CR3: 02418000 CR4: 06f0
[   17.643381] Stack:
[   17.644273]  88003e4a  880038803e28 
8152fa30
[   17.646792]  88003e4a  880038803e38 
81755005
[   17.649300]  880038803e68 8152fcef  
88003e974548
[   17.651805] Call Trace:
[   17.652740]   
[   17.653108]  [] blk_mq_end_request+0x38/0x6c
[   17.655190]  [] virtblk_request_done+0x5e/0x60
[   17.656660]  [] __blk_mq_complete_request+0x122/0x132
[   17.658222]  [] blk_mq_complete_request+0x1c/0x1e
[   17.659724]  [] virtblk_done+0x74/0xce
[   17.661118]  [] vring_interrupt+0x32/0x39
[   17.662525]  [] handle_irq_event_percpu+0x146/0x3d7
[   17.664060]  [] handle_irq_event+0x38/0x56
[   17.665463]  [] handle_edge_irq+0xd9/0xfb
[   17.666872]  [] handle_irq+0x101/0x109
[   17.668246]  [] do_IRQ+0x85/0x101
[   17.669550]  [] common_interrupt+0x8c/0x8c
[   17.671071]   
[   17.671434]  [] ? native_safe_halt+0x6/0x8
[   17.673485]  [] default_idle+0x60/0x1a8
[   17.674935]  [] arch_cpu_idle+0xf/0x11
[   17.676307]  [] default_idle_call+0x3d/0x44
[   17.677736]  [] cpu_startup_entry+0x218/0x3e6
[   17.679209]  [] rest_init+0x135/0x13b
[   17.680585]  [] 

Re: [PATCH 1/3] ARM: DTS: da850: add node for spi0

2016-04-17 Thread Sekhar Nori
On Friday 15 April 2016 09:47 PM, David Lechner wrote:
> On 04/15/2016 05:24 AM, Sekhar Nori wrote:
> 
>>
>> This made me notice that num-cs is populated wrongly for spi1. It
>> actually has 8 chip selects. This is fine though.
> 
> I might as well fix it since I have to make changes anyway. Don't
> remember how I came up with 6.

In section 3.7.7 of datasheet, there are 6 possible chip selects listed
for SPI0 and 8 possible chipselects for SPI1.

If you are fixing SPI1, please make that a separate patch.

>> Also, it will be nice to add pinctrl entries for spi0 like it is done
>> for spi1. You will need those anyway for using the interface.
> 
> I omitted this on purpose. For my use case, I am using the SPI as
> write-only, so not using the SOMI pin, which is actually muxed as a GPIO
> for something else. So having a pinctl like spi1 is of no use to me. I
> figured if someone needs it, they can add it, otherwise it just is
> wasted space to me.

Alright, makes sense.

Regards,
Sekhar



Re: [PATCH 1/3] ARM: DTS: da850: add node for spi0

2016-04-17 Thread Sekhar Nori
On Friday 15 April 2016 09:47 PM, David Lechner wrote:
> On 04/15/2016 05:24 AM, Sekhar Nori wrote:
> 
>>
>> This made me notice that num-cs is populated wrongly for spi1. It
>> actually has 8 chip selects. This is fine though.
> 
> I might as well fix it since I have to make changes anyway. Don't
> remember how I came up with 6.

In section 3.7.7 of datasheet, there are 6 possible chip selects listed
for SPI0 and 8 possible chipselects for SPI1.

If you are fixing SPI1, please make that a separate patch.

>> Also, it will be nice to add pinctrl entries for spi0 like it is done
>> for spi1. You will need those anyway for using the interface.
> 
> I omitted this on purpose. For my use case, I am using the SPI as
> write-only, so not using the SOMI pin, which is actually muxed as a GPIO
> for something else. So having a pinctl like spi1 is of no use to me. I
> figured if someone needs it, they can add it, otherwise it just is
> wasted space to me.

Alright, makes sense.

Regards,
Sekhar



RE: [PATCHv2] wlcore: spi: add wl18xx support

2016-04-17 Thread Reizer, Eyal
> >
> > - all wilink family needs special init command for entering wspi mode.
> >   extra clock cycles should be sent after the spi init command while the
> >   cs pin is high.
> > - switch to controling the cs pin from the spi driver for achieveing the
> >   above.
> > - the selected cs gpio is read from the spi device-tree node using the
> >   cs-gpios field and setup as a gpio.
> > - See the example below for specifying the cs gpio using the cs-gpios entry
> >{
> > ...
> > cs-gpios = < 5 0>;
> > ...
> > wlcore: wlcore@0 {
> > compatible = "ti,wl1835";
> > ...
> > ...
> > };
> > };
> >
> > Signed-off-by: Eyal Reizer 
> 
> I don't think this can work in general: not all SPI hosts uses GPIOs for
> controlling CS, so the logic can't work, and it's also a layering violation 
> for the
> driver to look at the parent.
> 
> I would suggest fixing this using a new API function from the SPI core, if we
> don't already have a generic way to do it.
>
Originally this is what I have done until I was pointed to the generic cs-gpio 
mechanism 
in the SPI core. 
It is a generic mechanism already in the SPI core driver.
See: Documentation/devicetree/bindings/spi/spi-bus.txt

It is also part of the generic spi.h (include/Linux/spi/spi.h), already part of 
" struct spi_device" So it seemed redundant adding another mechanism for 
implementing the same.
Platform that interact with a wilink need to use it, and platforms that don't  
have this capability will probably not interact with a wilink device using SPI.

Best Regards,
Eyal





RE: [PATCHv2] wlcore: spi: add wl18xx support

2016-04-17 Thread Reizer, Eyal
> >
> > - all wilink family needs special init command for entering wspi mode.
> >   extra clock cycles should be sent after the spi init command while the
> >   cs pin is high.
> > - switch to controling the cs pin from the spi driver for achieveing the
> >   above.
> > - the selected cs gpio is read from the spi device-tree node using the
> >   cs-gpios field and setup as a gpio.
> > - See the example below for specifying the cs gpio using the cs-gpios entry
> >{
> > ...
> > cs-gpios = < 5 0>;
> > ...
> > wlcore: wlcore@0 {
> > compatible = "ti,wl1835";
> > ...
> > ...
> > };
> > };
> >
> > Signed-off-by: Eyal Reizer 
> 
> I don't think this can work in general: not all SPI hosts uses GPIOs for
> controlling CS, so the logic can't work, and it's also a layering violation 
> for the
> driver to look at the parent.
> 
> I would suggest fixing this using a new API function from the SPI core, if we
> don't already have a generic way to do it.
>
Originally this is what I have done until I was pointed to the generic cs-gpio 
mechanism 
in the SPI core. 
It is a generic mechanism already in the SPI core driver.
See: Documentation/devicetree/bindings/spi/spi-bus.txt

It is also part of the generic spi.h (include/Linux/spi/spi.h), already part of 
" struct spi_device" So it seemed redundant adding another mechanism for 
implementing the same.
Platform that interact with a wilink need to use it, and platforms that don't  
have this capability will probably not interact with a wilink device using SPI.

Best Regards,
Eyal





[PATCH v2 1/3] PCI: imx6: Use enum instead of bool for variant indicator

2016-04-17 Thread Andrey Smirnov
Use enumerated type instead of a boolean flag to specify the variant of
the PCIe IP block (6Q, 6SX, etc). This patch has zero functional impact,
however it makes the code easier to extend for the case of more than 2
possible variants of an IP block (of which there are).

Signed-off-by: Andrey Smirnov 
---

Changes since v1:

- Patchset is rebased against
  
https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/log/?h=pci/host-imx6

- DTS files changes moved into a separate patch


 drivers/pci/host/pci-imx6.c | 126 +---
 1 file changed, 71 insertions(+), 55 deletions(-)

diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c
index 0f6d630..c570bbb 100644
--- a/drivers/pci/host/pci-imx6.c
+++ b/drivers/pci/host/pci-imx6.c
@@ -31,6 +31,11 @@
 
 #define to_imx6_pcie(x)container_of(x, struct imx6_pcie, pp)
 
+enum imx6_pcie_variants {
+   IMX6Q,
+   IMX6SX
+};
+
 struct imx6_pcie {
struct gpio_desc*reset_gpio;
struct clk  *pcie_bus;
@@ -39,7 +44,7 @@ struct imx6_pcie {
struct clk  *pcie;
struct pcie_portpp;
struct regmap   *iomuxc_gpr;
-   boolis_imx6sx;
+   enum imx6_pcie_variants variant;
void __iomem*mem_base;
u32 tx_deemph_gen1;
u32 tx_deemph_gen2_3p5db;
@@ -238,7 +243,8 @@ static int imx6_pcie_assert_core_reset(struct pcie_port *pp)
struct imx6_pcie *imx6_pcie = to_imx6_pcie(pp);
u32 val, gpr1, gpr12;
 
-   if (imx6_pcie->is_imx6sx) {
+   switch (imx6_pcie->variant) {
+   case IMX6SX:
regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
   IMX6SX_GPR12_PCIE_TEST_POWERDOWN,
   IMX6SX_GPR12_PCIE_TEST_POWERDOWN);
@@ -246,72 +252,80 @@ static int imx6_pcie_assert_core_reset(struct pcie_port 
*pp)
regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR5,
   IMX6SX_GPR5_PCIE_BTNRST_RESET,
   IMX6SX_GPR5_PCIE_BTNRST_RESET);
-   return 0;
-   }
-
-   /*
-* If the bootloader already enabled the link we need some special
-* handling to get the core back into a state where it is safe to
-* touch it for configuration.  As there is no dedicated reset signal
-* wired up for MX6QDL, we need to manually force LTSSM into "detect"
-* state before completely disabling LTSSM, which is a prerequisite
-* for core configuration.
-*
-* If both LTSSM_ENABLE and REF_SSP_ENABLE are active we have a strong
-* indication that the bootloader activated the link.
-*/
-   regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, );
-   regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, );
-
-   if ((gpr1 & IMX6Q_GPR1_PCIE_REF_CLK_EN) &&
-   (gpr12 & IMX6Q_GPR12_PCIE_CTL_2)) {
-   val = readl(pp->dbi_base + PCIE_PL_PFLR);
-   val &= ~PCIE_PL_PFLR_LINK_STATE_MASK;
-   val |= PCIE_PL_PFLR_FORCE_LINK;
-   writel(val, pp->dbi_base + PCIE_PL_PFLR);
+   break;
+   case IMX6Q:
+   /*
+* If the bootloader already enabled the link we need some 
special
+* handling to get the core back into a state where it is safe 
to
+* touch it for configuration.  As there is no dedicated reset 
signal
+* wired up for MX6QDL, we need to manually force LTSSM into 
"detect"
+* state before completely disabling LTSSM, which is a 
prerequisite
+* for core configuration.
+*
+* If both LTSSM_ENABLE and REF_SSP_ENABLE are active we have a 
strong
+* indication that the bootloader activated the link.
+*/
+   regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, );
+   regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, );
+
+   if ((gpr1 & IMX6Q_GPR1_PCIE_REF_CLK_EN) &&
+   (gpr12 & IMX6Q_GPR12_PCIE_CTL_2)) {
+   val = readl(pp->dbi_base + PCIE_PL_PFLR);
+   val &= ~PCIE_PL_PFLR_LINK_STATE_MASK;
+   val |= PCIE_PL_PFLR_FORCE_LINK;
+   writel(val, pp->dbi_base + PCIE_PL_PFLR);
+
+   regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+  IMX6Q_GPR12_PCIE_CTL_2, 0 << 10);
+   }
 
-   regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
-   IMX6Q_GPR12_PCIE_CTL_2, 0 << 10);
+   regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+  

[PATCH v2 3/3] ARM: dts: imx6qp: Specify imx6qp version of PCIe core

2016-04-17 Thread Andrey Smirnov
I.MX6Quad Plus has a slightly different version of PCIe core than
reqular i.MX6Quad.

Signed-off-by: Andrey Smirnov 
---

Changes since v1:

- Patchset is rebased against
  
https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/log/?h=pci/host-imx6

- DTS files changes moved into a separate patch

 arch/arm/boot/dts/imx6qp.dtsi | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm/boot/dts/imx6qp.dtsi b/arch/arm/boot/dts/imx6qp.dtsi
index 1ada714..886dbf2 100644
--- a/arch/arm/boot/dts/imx6qp.dtsi
+++ b/arch/arm/boot/dts/imx6qp.dtsi
@@ -82,5 +82,8 @@
  "ldb_di0", "ldb_di1", "prg";
};
 
+   pcie: pcie@0x0100 {
+   compatible = "fsl,imx6qp-pcie", "snps,dw-pcie";
+   };
};
 };
-- 
2.5.5



[PATCH v2 3/3] ARM: dts: imx6qp: Specify imx6qp version of PCIe core

2016-04-17 Thread Andrey Smirnov
I.MX6Quad Plus has a slightly different version of PCIe core than
reqular i.MX6Quad.

Signed-off-by: Andrey Smirnov 
---

Changes since v1:

- Patchset is rebased against
  
https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/log/?h=pci/host-imx6

- DTS files changes moved into a separate patch

 arch/arm/boot/dts/imx6qp.dtsi | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm/boot/dts/imx6qp.dtsi b/arch/arm/boot/dts/imx6qp.dtsi
index 1ada714..886dbf2 100644
--- a/arch/arm/boot/dts/imx6qp.dtsi
+++ b/arch/arm/boot/dts/imx6qp.dtsi
@@ -82,5 +82,8 @@
  "ldb_di0", "ldb_di1", "prg";
};
 
+   pcie: pcie@0x0100 {
+   compatible = "fsl,imx6qp-pcie", "snps,dw-pcie";
+   };
};
 };
-- 
2.5.5



[PATCH v2 1/3] PCI: imx6: Use enum instead of bool for variant indicator

2016-04-17 Thread Andrey Smirnov
Use enumerated type instead of a boolean flag to specify the variant of
the PCIe IP block (6Q, 6SX, etc). This patch has zero functional impact,
however it makes the code easier to extend for the case of more than 2
possible variants of an IP block (of which there are).

Signed-off-by: Andrey Smirnov 
---

Changes since v1:

- Patchset is rebased against
  
https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/log/?h=pci/host-imx6

- DTS files changes moved into a separate patch


 drivers/pci/host/pci-imx6.c | 126 +---
 1 file changed, 71 insertions(+), 55 deletions(-)

diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c
index 0f6d630..c570bbb 100644
--- a/drivers/pci/host/pci-imx6.c
+++ b/drivers/pci/host/pci-imx6.c
@@ -31,6 +31,11 @@
 
 #define to_imx6_pcie(x)container_of(x, struct imx6_pcie, pp)
 
+enum imx6_pcie_variants {
+   IMX6Q,
+   IMX6SX
+};
+
 struct imx6_pcie {
struct gpio_desc*reset_gpio;
struct clk  *pcie_bus;
@@ -39,7 +44,7 @@ struct imx6_pcie {
struct clk  *pcie;
struct pcie_portpp;
struct regmap   *iomuxc_gpr;
-   boolis_imx6sx;
+   enum imx6_pcie_variants variant;
void __iomem*mem_base;
u32 tx_deemph_gen1;
u32 tx_deemph_gen2_3p5db;
@@ -238,7 +243,8 @@ static int imx6_pcie_assert_core_reset(struct pcie_port *pp)
struct imx6_pcie *imx6_pcie = to_imx6_pcie(pp);
u32 val, gpr1, gpr12;
 
-   if (imx6_pcie->is_imx6sx) {
+   switch (imx6_pcie->variant) {
+   case IMX6SX:
regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
   IMX6SX_GPR12_PCIE_TEST_POWERDOWN,
   IMX6SX_GPR12_PCIE_TEST_POWERDOWN);
@@ -246,72 +252,80 @@ static int imx6_pcie_assert_core_reset(struct pcie_port 
*pp)
regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR5,
   IMX6SX_GPR5_PCIE_BTNRST_RESET,
   IMX6SX_GPR5_PCIE_BTNRST_RESET);
-   return 0;
-   }
-
-   /*
-* If the bootloader already enabled the link we need some special
-* handling to get the core back into a state where it is safe to
-* touch it for configuration.  As there is no dedicated reset signal
-* wired up for MX6QDL, we need to manually force LTSSM into "detect"
-* state before completely disabling LTSSM, which is a prerequisite
-* for core configuration.
-*
-* If both LTSSM_ENABLE and REF_SSP_ENABLE are active we have a strong
-* indication that the bootloader activated the link.
-*/
-   regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, );
-   regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, );
-
-   if ((gpr1 & IMX6Q_GPR1_PCIE_REF_CLK_EN) &&
-   (gpr12 & IMX6Q_GPR12_PCIE_CTL_2)) {
-   val = readl(pp->dbi_base + PCIE_PL_PFLR);
-   val &= ~PCIE_PL_PFLR_LINK_STATE_MASK;
-   val |= PCIE_PL_PFLR_FORCE_LINK;
-   writel(val, pp->dbi_base + PCIE_PL_PFLR);
+   break;
+   case IMX6Q:
+   /*
+* If the bootloader already enabled the link we need some 
special
+* handling to get the core back into a state where it is safe 
to
+* touch it for configuration.  As there is no dedicated reset 
signal
+* wired up for MX6QDL, we need to manually force LTSSM into 
"detect"
+* state before completely disabling LTSSM, which is a 
prerequisite
+* for core configuration.
+*
+* If both LTSSM_ENABLE and REF_SSP_ENABLE are active we have a 
strong
+* indication that the bootloader activated the link.
+*/
+   regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, );
+   regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, );
+
+   if ((gpr1 & IMX6Q_GPR1_PCIE_REF_CLK_EN) &&
+   (gpr12 & IMX6Q_GPR12_PCIE_CTL_2)) {
+   val = readl(pp->dbi_base + PCIE_PL_PFLR);
+   val &= ~PCIE_PL_PFLR_LINK_STATE_MASK;
+   val |= PCIE_PL_PFLR_FORCE_LINK;
+   writel(val, pp->dbi_base + PCIE_PL_PFLR);
+
+   regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+  IMX6Q_GPR12_PCIE_CTL_2, 0 << 10);
+   }
 
-   regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
-   IMX6Q_GPR12_PCIE_CTL_2, 0 << 10);
+   regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+  IMX6Q_GPR1_PCIE_TEST_PD, 1 << 18);
+

[PATCH] sched/cpufreq: don't trigger cpufreq update w/o real rt/deadline tasks running

2016-04-17 Thread Wanpeng Li
Sometimes update_curr() is called w/o tasks actually running, it is
captured by: 
u64 delta_exec = rq_clock_task(rq) - curr->se.exec_start;
We should not trigger cpufreq update in this case for rt/deadline
classes, and this patch fix it.

Signed-off-by: Wanpeng Li 
---
 kernel/sched/deadline.c | 8 
 kernel/sched/rt.c   | 8 
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index affd97e..8f9b5af 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -717,10 +717,6 @@ static void update_curr_dl(struct rq *rq)
if (!dl_task(curr) || !on_dl_rq(dl_se))
return;
 
-   /* Kick cpufreq (see the comment in linux/cpufreq.h). */
-   if (cpu_of(rq) == smp_processor_id())
-   cpufreq_trigger_update(rq_clock(rq));
-
/*
 * Consumed budget is computed considering the time as
 * observed by schedulable tasks (excluding time spent
@@ -736,6 +732,10 @@ static void update_curr_dl(struct rq *rq)
return;
}
 
+   /* kick cpufreq (see the comment in linux/cpufreq.h). */
+   if (cpu_of(rq) == smp_processor_id())
+   cpufreq_trigger_update(rq_clock(rq));
+
schedstat_set(curr->se.statistics.exec_max,
  max(curr->se.statistics.exec_max, delta_exec));
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index c41ea7a..19e1306 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -953,14 +953,14 @@ static void update_curr_rt(struct rq *rq)
if (curr->sched_class != _sched_class)
return;
 
-   /* Kick cpufreq (see the comment in linux/cpufreq.h). */
-   if (cpu_of(rq) == smp_processor_id())
-   cpufreq_trigger_update(rq_clock(rq));
-
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
if (unlikely((s64)delta_exec <= 0))
return;
 
+   /* Kick cpufreq (see the comment in linux/cpufreq.h). */
+   if (cpu_of(rq) == smp_processor_id())
+   cpufreq_trigger_update(rq_clock(rq));
+
schedstat_set(curr->se.statistics.exec_max,
  max(curr->se.statistics.exec_max, delta_exec));
 
-- 
1.9.1



[PATCH] sched/cpufreq: don't trigger cpufreq update w/o real rt/deadline tasks running

2016-04-17 Thread Wanpeng Li
Sometimes update_curr() is called w/o tasks actually running, it is
captured by: 
u64 delta_exec = rq_clock_task(rq) - curr->se.exec_start;
We should not trigger cpufreq update in this case for rt/deadline
classes, and this patch fix it.

Signed-off-by: Wanpeng Li 
---
 kernel/sched/deadline.c | 8 
 kernel/sched/rt.c   | 8 
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index affd97e..8f9b5af 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -717,10 +717,6 @@ static void update_curr_dl(struct rq *rq)
if (!dl_task(curr) || !on_dl_rq(dl_se))
return;
 
-   /* Kick cpufreq (see the comment in linux/cpufreq.h). */
-   if (cpu_of(rq) == smp_processor_id())
-   cpufreq_trigger_update(rq_clock(rq));
-
/*
 * Consumed budget is computed considering the time as
 * observed by schedulable tasks (excluding time spent
@@ -736,6 +732,10 @@ static void update_curr_dl(struct rq *rq)
return;
}
 
+   /* kick cpufreq (see the comment in linux/cpufreq.h). */
+   if (cpu_of(rq) == smp_processor_id())
+   cpufreq_trigger_update(rq_clock(rq));
+
schedstat_set(curr->se.statistics.exec_max,
  max(curr->se.statistics.exec_max, delta_exec));
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index c41ea7a..19e1306 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -953,14 +953,14 @@ static void update_curr_rt(struct rq *rq)
if (curr->sched_class != _sched_class)
return;
 
-   /* Kick cpufreq (see the comment in linux/cpufreq.h). */
-   if (cpu_of(rq) == smp_processor_id())
-   cpufreq_trigger_update(rq_clock(rq));
-
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
if (unlikely((s64)delta_exec <= 0))
return;
 
+   /* Kick cpufreq (see the comment in linux/cpufreq.h). */
+   if (cpu_of(rq) == smp_processor_id())
+   cpufreq_trigger_update(rq_clock(rq));
+
schedstat_set(curr->se.statistics.exec_max,
  max(curr->se.statistics.exec_max, delta_exec));
 
-- 
1.9.1



[PATCH v2 2/3] PCI: imx6: Implement reset sequence for i.MX6+

2016-04-17 Thread Andrey Smirnov
I.MX6+ has a dedicated bit for reseting PCIe core, which should be used
instead of a regular reset sequence since using the latter will hang the
SoC.

This commit is based on c34068d48273e24d392d9a49a38be807954420ed from
http://git.freescale.com/git/cgit.cgi/imx/linux-2.6-imx.git

Signed-off-by: Andrey Smirnov 
---

Changes since v1:

- Patchset is rebased against
  
https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/log/?h=pci/host-imx6

- DTS files changes moved into a separate patch

 drivers/pci/host/pci-imx6.c | 28 ++--
 include/linux/mfd/syscon/imx6q-iomuxc-gpr.h |  1 +
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c
index c570bbb..834c5b8 100644
--- a/drivers/pci/host/pci-imx6.c
+++ b/drivers/pci/host/pci-imx6.c
@@ -33,7 +33,8 @@
 
 enum imx6_pcie_variants {
IMX6Q,
-   IMX6SX
+   IMX6SX,
+   IMX6QP,
 };
 
 struct imx6_pcie {
@@ -253,6 +254,11 @@ static int imx6_pcie_assert_core_reset(struct pcie_port 
*pp)
   IMX6SX_GPR5_PCIE_BTNRST_RESET,
   IMX6SX_GPR5_PCIE_BTNRST_RESET);
break;
+   case IMX6QP:
+   regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+  IMX6Q_GPR1_PCIE_SW_RST,
+  IMX6Q_GPR1_PCIE_SW_RST);
+   break;
case IMX6Q:
/*
 * If the bootloader already enabled the link we need some 
special
@@ -307,6 +313,7 @@ static int imx6_pcie_enable_ref_clk(struct imx6_pcie 
*imx6_pcie)
regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
   IMX6SX_GPR12_PCIE_TEST_POWERDOWN, 0);
break;
+   case IMX6QP:/* FALLTHROUGH */
case IMX6Q:
/* power up core phy and enable ref clock */
regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
@@ -367,9 +374,22 @@ static int imx6_pcie_deassert_core_reset(struct pcie_port 
*pp)
gpiod_set_value_cansleep(imx6_pcie->reset_gpio, 1);
}
 
-   if (imx6_pcie->variant == IMX6SX)
+   switch (imx6_pcie->variant) {
+   case IMX6SX:
regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR5,
   IMX6SX_GPR5_PCIE_BTNRST_RESET, 0);
+   break;
+   case IMX6QP:
+   regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+  IMX6Q_GPR1_PCIE_SW_RST, 0);
+
+   usleep_range(200, 500);
+   break;
+   case IMX6Q: /* Nothing to do */
+   break;
+   default:
+   BUG();
+   }
 
return 0;
 
@@ -601,6 +621,9 @@ static int __init imx6_pcie_probe(struct platform_device 
*pdev)
if (of_device_is_compatible(pp->dev->of_node,
"fsl,imx6sx-pcie"))
imx6_pcie->variant = IMX6SX;
+   else if (of_device_is_compatible(pp->dev->of_node,
+"fsl,imx6qp-pcie"))
+   imx6_pcie->variant = IMX6QP;
else
imx6_pcie->variant = IMX6Q;
 
@@ -697,6 +720,7 @@ static void imx6_pcie_shutdown(struct platform_device *pdev)
 static const struct of_device_id imx6_pcie_of_match[] = {
{ .compatible = "fsl,imx6q-pcie", },
{ .compatible = "fsl,imx6sx-pcie", },
+   { .compatible = "fsl,imx6qp-pcie", },
{},
 };
 MODULE_DEVICE_TABLE(of, imx6_pcie_of_match);
diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h 
b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index 238c8db..5b08e3c 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -95,6 +95,7 @@
 #define IMX6Q_GPR0_DMAREQ_MUX_SEL0_IOMUX   BIT(0)
 
 #define IMX6Q_GPR1_PCIE_REQ_MASK   (0x3 << 30)
+#define IMX6Q_GPR1_PCIE_SW_RST BIT(29)
 #define IMX6Q_GPR1_PCIE_EXIT_L1BIT(28)
 #define IMX6Q_GPR1_PCIE_RDY_L23BIT(27)
 #define IMX6Q_GPR1_PCIE_ENTER_L1   BIT(26)
-- 
2.5.5



Re: [patch] bnx2i: silence uninitialized variable warnings

2016-04-17 Thread Nilesh Javali


On 14/04/16 3:10 PM, "Dan Carpenter"  wrote:

>Presumably it isn't possible to have empty lists here, but my static
>checker doesn't know that and complains that "ep" can be used
>uninitialized.
>
>Signed-off-by: Dan Carpenter 
>
>diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c
>b/drivers/scsi/bnx2i/bnx2i_iscsi.c
>index 7289437..133901f 100644
>--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
>+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
>@@ -675,7 +675,7 @@ bnx2i_find_ep_in_ofld_list(struct bnx2i_hba *hba, u32
>iscsi_cid)
> {
>   struct list_head *list;
>   struct list_head *tmp;
>-  struct bnx2i_endpoint *ep;
>+  struct bnx2i_endpoint *ep = NULL;
> 
>   read_lock_bh(>ep_rdwr_lock);
>   list_for_each_safe(list, tmp, >ep_ofld_list) {
>@@ -703,7 +703,7 @@ bnx2i_find_ep_in_destroy_list(struct bnx2i_hba *hba,
>u32 iscsi_cid)
> {
>   struct list_head *list;
>   struct list_head *tmp;
>-  struct bnx2i_endpoint *ep;
>+  struct bnx2i_endpoint *ep = NULL;
> 
>   read_lock_bh(>ep_rdwr_lock);
>   list_for_each_safe(list, tmp, >ep_destroy_list) {

Acked-by: Nilesh Javali 



[PATCH v2 2/3] PCI: imx6: Implement reset sequence for i.MX6+

2016-04-17 Thread Andrey Smirnov
I.MX6+ has a dedicated bit for reseting PCIe core, which should be used
instead of a regular reset sequence since using the latter will hang the
SoC.

This commit is based on c34068d48273e24d392d9a49a38be807954420ed from
http://git.freescale.com/git/cgit.cgi/imx/linux-2.6-imx.git

Signed-off-by: Andrey Smirnov 
---

Changes since v1:

- Patchset is rebased against
  
https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/log/?h=pci/host-imx6

- DTS files changes moved into a separate patch

 drivers/pci/host/pci-imx6.c | 28 ++--
 include/linux/mfd/syscon/imx6q-iomuxc-gpr.h |  1 +
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c
index c570bbb..834c5b8 100644
--- a/drivers/pci/host/pci-imx6.c
+++ b/drivers/pci/host/pci-imx6.c
@@ -33,7 +33,8 @@
 
 enum imx6_pcie_variants {
IMX6Q,
-   IMX6SX
+   IMX6SX,
+   IMX6QP,
 };
 
 struct imx6_pcie {
@@ -253,6 +254,11 @@ static int imx6_pcie_assert_core_reset(struct pcie_port 
*pp)
   IMX6SX_GPR5_PCIE_BTNRST_RESET,
   IMX6SX_GPR5_PCIE_BTNRST_RESET);
break;
+   case IMX6QP:
+   regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+  IMX6Q_GPR1_PCIE_SW_RST,
+  IMX6Q_GPR1_PCIE_SW_RST);
+   break;
case IMX6Q:
/*
 * If the bootloader already enabled the link we need some 
special
@@ -307,6 +313,7 @@ static int imx6_pcie_enable_ref_clk(struct imx6_pcie 
*imx6_pcie)
regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
   IMX6SX_GPR12_PCIE_TEST_POWERDOWN, 0);
break;
+   case IMX6QP:/* FALLTHROUGH */
case IMX6Q:
/* power up core phy and enable ref clock */
regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
@@ -367,9 +374,22 @@ static int imx6_pcie_deassert_core_reset(struct pcie_port 
*pp)
gpiod_set_value_cansleep(imx6_pcie->reset_gpio, 1);
}
 
-   if (imx6_pcie->variant == IMX6SX)
+   switch (imx6_pcie->variant) {
+   case IMX6SX:
regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR5,
   IMX6SX_GPR5_PCIE_BTNRST_RESET, 0);
+   break;
+   case IMX6QP:
+   regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+  IMX6Q_GPR1_PCIE_SW_RST, 0);
+
+   usleep_range(200, 500);
+   break;
+   case IMX6Q: /* Nothing to do */
+   break;
+   default:
+   BUG();
+   }
 
return 0;
 
@@ -601,6 +621,9 @@ static int __init imx6_pcie_probe(struct platform_device 
*pdev)
if (of_device_is_compatible(pp->dev->of_node,
"fsl,imx6sx-pcie"))
imx6_pcie->variant = IMX6SX;
+   else if (of_device_is_compatible(pp->dev->of_node,
+"fsl,imx6qp-pcie"))
+   imx6_pcie->variant = IMX6QP;
else
imx6_pcie->variant = IMX6Q;
 
@@ -697,6 +720,7 @@ static void imx6_pcie_shutdown(struct platform_device *pdev)
 static const struct of_device_id imx6_pcie_of_match[] = {
{ .compatible = "fsl,imx6q-pcie", },
{ .compatible = "fsl,imx6sx-pcie", },
+   { .compatible = "fsl,imx6qp-pcie", },
{},
 };
 MODULE_DEVICE_TABLE(of, imx6_pcie_of_match);
diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h 
b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index 238c8db..5b08e3c 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -95,6 +95,7 @@
 #define IMX6Q_GPR0_DMAREQ_MUX_SEL0_IOMUX   BIT(0)
 
 #define IMX6Q_GPR1_PCIE_REQ_MASK   (0x3 << 30)
+#define IMX6Q_GPR1_PCIE_SW_RST BIT(29)
 #define IMX6Q_GPR1_PCIE_EXIT_L1BIT(28)
 #define IMX6Q_GPR1_PCIE_RDY_L23BIT(27)
 #define IMX6Q_GPR1_PCIE_ENTER_L1   BIT(26)
-- 
2.5.5



Re: [patch] bnx2i: silence uninitialized variable warnings

2016-04-17 Thread Nilesh Javali


On 14/04/16 3:10 PM, "Dan Carpenter"  wrote:

>Presumably it isn't possible to have empty lists here, but my static
>checker doesn't know that and complains that "ep" can be used
>uninitialized.
>
>Signed-off-by: Dan Carpenter 
>
>diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c
>b/drivers/scsi/bnx2i/bnx2i_iscsi.c
>index 7289437..133901f 100644
>--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
>+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
>@@ -675,7 +675,7 @@ bnx2i_find_ep_in_ofld_list(struct bnx2i_hba *hba, u32
>iscsi_cid)
> {
>   struct list_head *list;
>   struct list_head *tmp;
>-  struct bnx2i_endpoint *ep;
>+  struct bnx2i_endpoint *ep = NULL;
> 
>   read_lock_bh(>ep_rdwr_lock);
>   list_for_each_safe(list, tmp, >ep_ofld_list) {
>@@ -703,7 +703,7 @@ bnx2i_find_ep_in_destroy_list(struct bnx2i_hba *hba,
>u32 iscsi_cid)
> {
>   struct list_head *list;
>   struct list_head *tmp;
>-  struct bnx2i_endpoint *ep;
>+  struct bnx2i_endpoint *ep = NULL;
> 
>   read_lock_bh(>ep_rdwr_lock);
>   list_for_each_safe(list, tmp, >ep_destroy_list) {

Acked-by: Nilesh Javali 



Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread Andy Lutomirski
On Sun, Apr 17, 2016 at 10:45 PM, H. Peter Anvin  wrote:
> On 04/17/16 22:39, Andy Lutomirski wrote:
>>>
>>> I'm reasonably confident they have, because we have had security bugs
>>> TWICE when someone has tried to "optimize" the code.  The masking was
>>> generally done with a movl instruction, which confused people.
>>>
 So the type of the syscall nr is a bit confused.  If there was an
 installed base of programs that leaved garbage in the high bits, we
 would have noticed *years* ago.  On the other hand, the 32-bit ptrace
 ABI and the seccomp ABI both think it's 32-bits.
>>>
>>> Incorrect.  We have seen these failures in real life.
>>
>> What kind of failure?  Programs that accidentally set rax to
>> 0xbaadf00d0003 get -ENOSYS in most cases, not close().  If we'd
>> broken programs like this, I assume we would have had to fix it a long
>> time ago.
>>
 If we were designing the x86_64 ABI and everything around it from
 scratch, I'd suggest that that either the high bits must be zero or
 that the number actually be 64 bits (which are more or less the same
 thing).  That would let us use the high bits for something interesting
 in the future.
>>>
>>> Not really all that useful.  What we have is a C ABI.
>>
>> And we've already stolen a bit once for x32.  Maybe we'll want more.
>> For example, if we added a cancellable bit, if x86_32 didn't want it,
>> we could steal a high bit for ie.
>>
>
> I think we're worrying about the wrong thing here... we skipped bit 31
> to avoid signedness issues, and with bit 30 for x32 we now "only" have
> 20 bits that haven't been used for anything at all.
>
>>>
 In practice, we can probably still declare that the thing is a 64-bit
 number, given that most kernels in the wild currently fail syscalls
 that have the high bits set.
>>>
>>> They don't, and we can prove it...
>>
>> I'm confused.
>>
>>   asm volatile ("syscall" :
>> "=a" (ret) :
>> "a" (SYS_getpid | 0xbaadf00dULL) :
>> "memory", "cc", "rcx", "r11");
>>
>> gets -ENOSYS on the kernel I'm running on my laptop and on Fedora 23's
>> stock kernel.
>>
>> I'm not terribly worried about nasty security issues in here because
>> all the nasty stuff is in C now.
>>
>> What kernel had the other behavior?  In 2.6.11, I see:
>>
>> ENTRY(system_call)
>> CFI_STARTPROC
>> swapgs
>> movq%rsp,%gs:pda_oldrsp
>> movq%gs:pda_kernelstack,%rsp
>> sti
>> SAVE_ARGS 8,1
>> movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
>> movq  %rcx,RIP-ARGOFFSET(%rsp)
>> GET_THREAD_INFO(%rcx)
>> testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
>> jnz tracesys
>> cmpq $__NR_syscall_max,%rax
>>
>
> I can't remember what versions.  What I do know is that this was a bug
> which was introduced, fixed, re-introduced, and fixed again, and both
> resulted in CVEs.  The fact that you're seeing the cmpq indicates that
> it at least was not one of the security-buggy kernels.
>
> I do agree we should make the behavior consistent, and follow the
> documented behavior of treating the syscall argument as an int.
>

I think I prefer the "reject weird input" behavior over the "accept
and normalize weird input" if we can get away with it, and I'm fairly
confident that we can get away with "reject weird input" given that
distro kernels do exactly that already.

So I like Ben's patch.

--Andy

> -hpa
>
>



-- 
Andy Lutomirski
AMA Capital Management, LLC


Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread Andy Lutomirski
On Sun, Apr 17, 2016 at 10:45 PM, H. Peter Anvin  wrote:
> On 04/17/16 22:39, Andy Lutomirski wrote:
>>>
>>> I'm reasonably confident they have, because we have had security bugs
>>> TWICE when someone has tried to "optimize" the code.  The masking was
>>> generally done with a movl instruction, which confused people.
>>>
 So the type of the syscall nr is a bit confused.  If there was an
 installed base of programs that leaved garbage in the high bits, we
 would have noticed *years* ago.  On the other hand, the 32-bit ptrace
 ABI and the seccomp ABI both think it's 32-bits.
>>>
>>> Incorrect.  We have seen these failures in real life.
>>
>> What kind of failure?  Programs that accidentally set rax to
>> 0xbaadf00d0003 get -ENOSYS in most cases, not close().  If we'd
>> broken programs like this, I assume we would have had to fix it a long
>> time ago.
>>
 If we were designing the x86_64 ABI and everything around it from
 scratch, I'd suggest that that either the high bits must be zero or
 that the number actually be 64 bits (which are more or less the same
 thing).  That would let us use the high bits for something interesting
 in the future.
>>>
>>> Not really all that useful.  What we have is a C ABI.
>>
>> And we've already stolen a bit once for x32.  Maybe we'll want more.
>> For example, if we added a cancellable bit, if x86_32 didn't want it,
>> we could steal a high bit for ie.
>>
>
> I think we're worrying about the wrong thing here... we skipped bit 31
> to avoid signedness issues, and with bit 30 for x32 we now "only" have
> 20 bits that haven't been used for anything at all.
>
>>>
 In practice, we can probably still declare that the thing is a 64-bit
 number, given that most kernels in the wild currently fail syscalls
 that have the high bits set.
>>>
>>> They don't, and we can prove it...
>>
>> I'm confused.
>>
>>   asm volatile ("syscall" :
>> "=a" (ret) :
>> "a" (SYS_getpid | 0xbaadf00dULL) :
>> "memory", "cc", "rcx", "r11");
>>
>> gets -ENOSYS on the kernel I'm running on my laptop and on Fedora 23's
>> stock kernel.
>>
>> I'm not terribly worried about nasty security issues in here because
>> all the nasty stuff is in C now.
>>
>> What kernel had the other behavior?  In 2.6.11, I see:
>>
>> ENTRY(system_call)
>> CFI_STARTPROC
>> swapgs
>> movq%rsp,%gs:pda_oldrsp
>> movq%gs:pda_kernelstack,%rsp
>> sti
>> SAVE_ARGS 8,1
>> movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
>> movq  %rcx,RIP-ARGOFFSET(%rsp)
>> GET_THREAD_INFO(%rcx)
>> testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
>> jnz tracesys
>> cmpq $__NR_syscall_max,%rax
>>
>
> I can't remember what versions.  What I do know is that this was a bug
> which was introduced, fixed, re-introduced, and fixed again, and both
> resulted in CVEs.  The fact that you're seeing the cmpq indicates that
> it at least was not one of the security-buggy kernels.
>
> I do agree we should make the behavior consistent, and follow the
> documented behavior of treating the syscall argument as an int.
>

I think I prefer the "reject weird input" behavior over the "accept
and normalize weird input" if we can get away with it, and I'm fairly
confident that we can get away with "reject weird input" given that
distro kernels do exactly that already.

So I like Ben's patch.

--Andy

> -hpa
>
>



-- 
Andy Lutomirski
AMA Capital Management, LLC


Re: [PATCH v10 0/4] Introduce usb charger framework to deal with the usb gadget power negotation

2016-04-17 Thread Baolin Wang
Hi Felipe,

What do you think of this version patchset? Could you review this
patchset when you feel free? I really hope to move the usb charger
forwards with suggestions. Thanks.

On 7 April 2016 at 19:48, Baolin Wang  wrote:
> Currently the Linux kernel does not provide any standard integration of this
> feature that integrates the USB subsystem with the system power regulation
> provided by PMICs meaning that either vendors must add this in their kernels
> or USB gadget devices based on Linux (such as mobile phones) may not behave
> as they should. Thus provide a standard framework for doing this in kernel.
>
> Now introduce one user with wm831x_power to support and test the usb charger,
> which is pending testing. Moreover there may be other potential users will use
> it in future.
>
> Changes since v9:
>  - Remove some redundant sysfs attributes.
>  - Change the SDP charger default current if gadget is SS.
>  - Remove the 'get_charger_type' callback in gadget->ops.
>
> Baolin Wang (4):
>   gadget: Introduce the usb charger framework
>   gadget: Support for the usb charger framework
>   gadget: Integrate with the usb gadget supporting for usb charger
>   power: wm831x_power: Support USB charger current limit management
>
>  drivers/power/wm831x_power.c  |   69 
>  drivers/usb/gadget/Kconfig|7 +
>  drivers/usb/gadget/udc/Makefile   |1 +
>  drivers/usb/gadget/udc/charger.c  |  766 
> +
>  drivers/usb/gadget/udc/udc-core.c |   11 +
>  include/linux/mfd/wm831x/pdata.h  |3 +
>  include/linux/usb/charger.h   |  173 +
>  include/linux/usb/gadget.h|   13 +
>  include/uapi/linux/usb/charger.h  |   31 ++
>  9 files changed, 1074 insertions(+)
>  create mode 100644 drivers/usb/gadget/udc/charger.c
>  create mode 100644 include/linux/usb/charger.h
>  create mode 100644 include/uapi/linux/usb/charger.h
>
> --
> 1.7.9.5
>



-- 
Baolin.wang
Best Regards


Re: [PATCH v10 0/4] Introduce usb charger framework to deal with the usb gadget power negotation

2016-04-17 Thread Baolin Wang
Hi Felipe,

What do you think of this version patchset? Could you review this
patchset when you feel free? I really hope to move the usb charger
forwards with suggestions. Thanks.

On 7 April 2016 at 19:48, Baolin Wang  wrote:
> Currently the Linux kernel does not provide any standard integration of this
> feature that integrates the USB subsystem with the system power regulation
> provided by PMICs meaning that either vendors must add this in their kernels
> or USB gadget devices based on Linux (such as mobile phones) may not behave
> as they should. Thus provide a standard framework for doing this in kernel.
>
> Now introduce one user with wm831x_power to support and test the usb charger,
> which is pending testing. Moreover there may be other potential users will use
> it in future.
>
> Changes since v9:
>  - Remove some redundant sysfs attributes.
>  - Change the SDP charger default current if gadget is SS.
>  - Remove the 'get_charger_type' callback in gadget->ops.
>
> Baolin Wang (4):
>   gadget: Introduce the usb charger framework
>   gadget: Support for the usb charger framework
>   gadget: Integrate with the usb gadget supporting for usb charger
>   power: wm831x_power: Support USB charger current limit management
>
>  drivers/power/wm831x_power.c  |   69 
>  drivers/usb/gadget/Kconfig|7 +
>  drivers/usb/gadget/udc/Makefile   |1 +
>  drivers/usb/gadget/udc/charger.c  |  766 
> +
>  drivers/usb/gadget/udc/udc-core.c |   11 +
>  include/linux/mfd/wm831x/pdata.h  |3 +
>  include/linux/usb/charger.h   |  173 +
>  include/linux/usb/gadget.h|   13 +
>  include/uapi/linux/usb/charger.h  |   31 ++
>  9 files changed, 1074 insertions(+)
>  create mode 100644 drivers/usb/gadget/udc/charger.c
>  create mode 100644 include/linux/usb/charger.h
>  create mode 100644 include/uapi/linux/usb/charger.h
>
> --
> 1.7.9.5
>



-- 
Baolin.wang
Best Regards


Re: [PATCH v2 0/4] Introduce bulk mode for crypto engine framework

2016-04-17 Thread Herbert Xu
On Mon, Apr 18, 2016 at 01:31:09PM +0800, Baolin Wang wrote:
> 
> We've tried to do this in dm-crypt, but it failed.
> The dm-crypt maintainer explained to me that I should optimize the
> driver, not add strange hw-dependent crypto modes to dm-crypt, this is
> not the first crypto accelerator that is just not suited for this kind
> of use.
> He thought if it can process batch of chunks of data each with own IV,
> then it can work with dm-crypt, but he thought such optimized code
> should be inside crypto API, not in dmcrypt.

That's a completely bogus argument.  The user always has more
information available than the underlying API.  So it is totally
stupid to have the API try to extract information that the user
could have provided in the first place.

I'm not taking this patch-set.

Cheers,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


Re: [PATCH v2 0/4] Introduce bulk mode for crypto engine framework

2016-04-17 Thread Herbert Xu
On Mon, Apr 18, 2016 at 01:31:09PM +0800, Baolin Wang wrote:
> 
> We've tried to do this in dm-crypt, but it failed.
> The dm-crypt maintainer explained to me that I should optimize the
> driver, not add strange hw-dependent crypto modes to dm-crypt, this is
> not the first crypto accelerator that is just not suited for this kind
> of use.
> He thought if it can process batch of chunks of data each with own IV,
> then it can work with dm-crypt, but he thought such optimized code
> should be inside crypto API, not in dmcrypt.

That's a completely bogus argument.  The user always has more
information available than the underlying API.  So it is totally
stupid to have the API try to extract information that the user
could have provided in the first place.

I'm not taking this patch-set.

Cheers,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread H. Peter Anvin
On 04/17/16 22:39, Andy Lutomirski wrote:
>>
>> I'm reasonably confident they have, because we have had security bugs
>> TWICE when someone has tried to "optimize" the code.  The masking was
>> generally done with a movl instruction, which confused people.
>>
>>> So the type of the syscall nr is a bit confused.  If there was an
>>> installed base of programs that leaved garbage in the high bits, we
>>> would have noticed *years* ago.  On the other hand, the 32-bit ptrace
>>> ABI and the seccomp ABI both think it's 32-bits.
>>
>> Incorrect.  We have seen these failures in real life.
> 
> What kind of failure?  Programs that accidentally set rax to
> 0xbaadf00d0003 get -ENOSYS in most cases, not close().  If we'd
> broken programs like this, I assume we would have had to fix it a long
> time ago.
> 
>>> If we were designing the x86_64 ABI and everything around it from
>>> scratch, I'd suggest that that either the high bits must be zero or
>>> that the number actually be 64 bits (which are more or less the same
>>> thing).  That would let us use the high bits for something interesting
>>> in the future.
>>
>> Not really all that useful.  What we have is a C ABI.
> 
> And we've already stolen a bit once for x32.  Maybe we'll want more.
> For example, if we added a cancellable bit, if x86_32 didn't want it,
> we could steal a high bit for ie.
> 

I think we're worrying about the wrong thing here... we skipped bit 31
to avoid signedness issues, and with bit 30 for x32 we now "only" have
20 bits that haven't been used for anything at all.

>>
>>> In practice, we can probably still declare that the thing is a 64-bit
>>> number, given that most kernels in the wild currently fail syscalls
>>> that have the high bits set.
>>
>> They don't, and we can prove it...
> 
> I'm confused.
> 
>   asm volatile ("syscall" :
> "=a" (ret) :
> "a" (SYS_getpid | 0xbaadf00dULL) :
> "memory", "cc", "rcx", "r11");
> 
> gets -ENOSYS on the kernel I'm running on my laptop and on Fedora 23's
> stock kernel.
> 
> I'm not terribly worried about nasty security issues in here because
> all the nasty stuff is in C now.
> 
> What kernel had the other behavior?  In 2.6.11, I see:
> 
> ENTRY(system_call)
> CFI_STARTPROC
> swapgs
> movq%rsp,%gs:pda_oldrsp
> movq%gs:pda_kernelstack,%rsp
> sti
> SAVE_ARGS 8,1
> movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
> movq  %rcx,RIP-ARGOFFSET(%rsp)
> GET_THREAD_INFO(%rcx)
> testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
> jnz tracesys
> cmpq $__NR_syscall_max,%rax
> 

I can't remember what versions.  What I do know is that this was a bug
which was introduced, fixed, re-introduced, and fixed again, and both
resulted in CVEs.  The fact that you're seeing the cmpq indicates that
it at least was not one of the security-buggy kernels.

I do agree we should make the behavior consistent, and follow the
documented behavior of treating the syscall argument as an int.

-hpa




Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread H. Peter Anvin
On 04/17/16 22:39, Andy Lutomirski wrote:
>>
>> I'm reasonably confident they have, because we have had security bugs
>> TWICE when someone has tried to "optimize" the code.  The masking was
>> generally done with a movl instruction, which confused people.
>>
>>> So the type of the syscall nr is a bit confused.  If there was an
>>> installed base of programs that leaved garbage in the high bits, we
>>> would have noticed *years* ago.  On the other hand, the 32-bit ptrace
>>> ABI and the seccomp ABI both think it's 32-bits.
>>
>> Incorrect.  We have seen these failures in real life.
> 
> What kind of failure?  Programs that accidentally set rax to
> 0xbaadf00d0003 get -ENOSYS in most cases, not close().  If we'd
> broken programs like this, I assume we would have had to fix it a long
> time ago.
> 
>>> If we were designing the x86_64 ABI and everything around it from
>>> scratch, I'd suggest that that either the high bits must be zero or
>>> that the number actually be 64 bits (which are more or less the same
>>> thing).  That would let us use the high bits for something interesting
>>> in the future.
>>
>> Not really all that useful.  What we have is a C ABI.
> 
> And we've already stolen a bit once for x32.  Maybe we'll want more.
> For example, if we added a cancellable bit, if x86_32 didn't want it,
> we could steal a high bit for ie.
> 

I think we're worrying about the wrong thing here... we skipped bit 31
to avoid signedness issues, and with bit 30 for x32 we now "only" have
20 bits that haven't been used for anything at all.

>>
>>> In practice, we can probably still declare that the thing is a 64-bit
>>> number, given that most kernels in the wild currently fail syscalls
>>> that have the high bits set.
>>
>> They don't, and we can prove it...
> 
> I'm confused.
> 
>   asm volatile ("syscall" :
> "=a" (ret) :
> "a" (SYS_getpid | 0xbaadf00dULL) :
> "memory", "cc", "rcx", "r11");
> 
> gets -ENOSYS on the kernel I'm running on my laptop and on Fedora 23's
> stock kernel.
> 
> I'm not terribly worried about nasty security issues in here because
> all the nasty stuff is in C now.
> 
> What kernel had the other behavior?  In 2.6.11, I see:
> 
> ENTRY(system_call)
> CFI_STARTPROC
> swapgs
> movq%rsp,%gs:pda_oldrsp
> movq%gs:pda_kernelstack,%rsp
> sti
> SAVE_ARGS 8,1
> movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
> movq  %rcx,RIP-ARGOFFSET(%rsp)
> GET_THREAD_INFO(%rcx)
> testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
> jnz tracesys
> cmpq $__NR_syscall_max,%rax
> 

I can't remember what versions.  What I do know is that this was a bug
which was introduced, fixed, re-introduced, and fixed again, and both
resulted in CVEs.  The fact that you're seeing the cmpq indicates that
it at least was not one of the security-buggy kernels.

I do agree we should make the behavior consistent, and follow the
documented behavior of treating the syscall argument as an int.

-hpa




Re: [RESEND PATCH 1/3] power: charger-manager: Replace deprecatd API of extcon

2016-04-17 Thread Chanwoo Choi
Hi Sebastian,

On 2016년 04월 15일 23:13, Sebastian Reichel wrote:
> Hi,
> 
> On Fri, Apr 15, 2016 at 09:43:34AM +0900, Chanwoo Choi wrote:
>> This patch removes the deprecated notifier API of extcon framework and then 
>> use
>> the new extcon API[2] with the unique id[1] to indicate the each external
>> connector. Alter deprecated API as following:
>> - extcon_register_interest() -> extcon_register_notifier()
>> - extcon_unregister_interest() -> extcon_unregister_notifier()
>>
>> And, extcon alters the name of USB charger connector in patch[3] as 
>> following:
>> - EXTCON_CHG_USB_SDP /* Standard Downstream Port */
>> - EXTCON_CHG_USB_DCP /* Dedicated Charging Port */
>> - EXTCON_CHG_USB_CDP /* Charging Downstream Port */
>> - EXTCON_CHG_USB_ACA /* Accessory Charger Adapter */
>>
>> So, the name of external charger connector are changed as following:
>> - "USB" -> "SDP"
>> - "TA" -> "DCP"
>>
>> [1] Commit 2a9de9c0f08d61
>> - ("extcon: Use the unique id for external connector instead of string)
>> [2] Commit 046050f6e623e4
>> - ("extcon: Update the prototype of extcon_register_notifier() with enum 
>> extcon
>> [3] Commit 11eecf910bd81d
>> - ("extcon: Modify the id and name of external connector")
>>
>> Signed-off-by: Chanwoo Choi 
>> ---
>>  .../bindings/power_supply/charger-manager.txt  |  4 +--
>>  drivers/power/charger-manager.c| 31 
>> ++
>>  include/linux/power/charger-manager.h  |  4 +--
>>  3 files changed, 24 insertions(+), 15 deletions(-)
>>
>> diff --git 
>> a/Documentation/devicetree/bindings/power_supply/charger-manager.txt 
>> b/Documentation/devicetree/bindings/power_supply/charger-manager.txt
>> index ec4fe9de3137..73193e380dc2 100644
>> --- a/Documentation/devicetree/bindings/power_supply/charger-manager.txt
>> +++ b/Documentation/devicetree/bindings/power_supply/charger-manager.txt
>> @@ -65,13 +65,13 @@ Example :
>>  regulator@0 {
>>  cm-regulator-name = "chg-reg";
>>  cable@0 {
>> -cm-cable-name = "USB";
>> +cm-cable-id = 5; /* EXTCON_CHG_USB_SDP */
>>  cm-cable-extcon = "extcon-dev.0";
>>  cm-cable-min = <475000>;
>>  cm-cable-max = <50>;
>>  };
>>  cable@1 {
>> -cm-cable-name = "TA";
>> +cm-cable-id = 6; /* EXTCON_CHG_USB_DCP */
>>  cm-cable-extcon = "extcon-dev.0";
>>  cm-cable-min = <65>;
>>  cm-cable-max = <675000>;
> 
> This breaks DT ABI. Looks like charger-manager is not used in
> mainline,but I guess ther should be an explicit Acked-By from
> a DT binding maintainer.

As I mentioned on other mail to Rob, the purpose of this patch
removes the deprecated the EXTCON APIs. So, I touched the DT binding.

About DT ABI about charger-manager, we should handle it on separate patches.

Also I think the defines should be in
> some header includable from DTS, so that something like this
> can be done:
> 
> cm-cable-id = ;


I agree to use some definition for the kind of charger cable.
But, Not yet. I'm preparing the EXTCON update to use the definitions
on Device Tree file. I'll send some separate patches in the near future.
> 
> Apart from that:
> 
> Acked-By: Sebastian Reichel 

Thanks for review.

Best Regards,
Chanwoo Choi



Re: [RESEND PATCH 1/3] power: charger-manager: Replace deprecatd API of extcon

2016-04-17 Thread Chanwoo Choi
Hi Sebastian,

On 2016년 04월 15일 23:13, Sebastian Reichel wrote:
> Hi,
> 
> On Fri, Apr 15, 2016 at 09:43:34AM +0900, Chanwoo Choi wrote:
>> This patch removes the deprecated notifier API of extcon framework and then 
>> use
>> the new extcon API[2] with the unique id[1] to indicate the each external
>> connector. Alter deprecated API as following:
>> - extcon_register_interest() -> extcon_register_notifier()
>> - extcon_unregister_interest() -> extcon_unregister_notifier()
>>
>> And, extcon alters the name of USB charger connector in patch[3] as 
>> following:
>> - EXTCON_CHG_USB_SDP /* Standard Downstream Port */
>> - EXTCON_CHG_USB_DCP /* Dedicated Charging Port */
>> - EXTCON_CHG_USB_CDP /* Charging Downstream Port */
>> - EXTCON_CHG_USB_ACA /* Accessory Charger Adapter */
>>
>> So, the name of external charger connector are changed as following:
>> - "USB" -> "SDP"
>> - "TA" -> "DCP"
>>
>> [1] Commit 2a9de9c0f08d61
>> - ("extcon: Use the unique id for external connector instead of string)
>> [2] Commit 046050f6e623e4
>> - ("extcon: Update the prototype of extcon_register_notifier() with enum 
>> extcon
>> [3] Commit 11eecf910bd81d
>> - ("extcon: Modify the id and name of external connector")
>>
>> Signed-off-by: Chanwoo Choi 
>> ---
>>  .../bindings/power_supply/charger-manager.txt  |  4 +--
>>  drivers/power/charger-manager.c| 31 
>> ++
>>  include/linux/power/charger-manager.h  |  4 +--
>>  3 files changed, 24 insertions(+), 15 deletions(-)
>>
>> diff --git 
>> a/Documentation/devicetree/bindings/power_supply/charger-manager.txt 
>> b/Documentation/devicetree/bindings/power_supply/charger-manager.txt
>> index ec4fe9de3137..73193e380dc2 100644
>> --- a/Documentation/devicetree/bindings/power_supply/charger-manager.txt
>> +++ b/Documentation/devicetree/bindings/power_supply/charger-manager.txt
>> @@ -65,13 +65,13 @@ Example :
>>  regulator@0 {
>>  cm-regulator-name = "chg-reg";
>>  cable@0 {
>> -cm-cable-name = "USB";
>> +cm-cable-id = 5; /* EXTCON_CHG_USB_SDP */
>>  cm-cable-extcon = "extcon-dev.0";
>>  cm-cable-min = <475000>;
>>  cm-cable-max = <50>;
>>  };
>>  cable@1 {
>> -cm-cable-name = "TA";
>> +cm-cable-id = 6; /* EXTCON_CHG_USB_DCP */
>>  cm-cable-extcon = "extcon-dev.0";
>>  cm-cable-min = <65>;
>>  cm-cable-max = <675000>;
> 
> This breaks DT ABI. Looks like charger-manager is not used in
> mainline,but I guess ther should be an explicit Acked-By from
> a DT binding maintainer.

As I mentioned on other mail to Rob, the purpose of this patch
removes the deprecated the EXTCON APIs. So, I touched the DT binding.

About DT ABI about charger-manager, we should handle it on separate patches.

Also I think the defines should be in
> some header includable from DTS, so that something like this
> can be done:
> 
> cm-cable-id = ;


I agree to use some definition for the kind of charger cable.
But, Not yet. I'm preparing the EXTCON update to use the definitions
on Device Tree file. I'll send some separate patches in the near future.
> 
> Apart from that:
> 
> Acked-By: Sebastian Reichel 

Thanks for review.

Best Regards,
Chanwoo Choi



Re: [PATCH 3/7] [Media] vcodec: mediatek: Add Mediatek V4L2 Video Decoder Driver

2016-04-17 Thread tiffany lin

snipped.

> > +
> > +void mtk_vcodec_dec_set_default_params(struct mtk_vcodec_ctx *ctx)
> > +{
> > +   struct mtk_q_data *q_data;
> > +
> > +   ctx->m2m_ctx->q_lock = >dev->dev_mutex;
> > +   ctx->fh.m2m_ctx = ctx->m2m_ctx;
> > +   ctx->fh.ctrl_handler = >ctrl_hdl;
> > +   INIT_WORK(>decode_work, mtk_vdec_worker);
> > +
> > +   q_data = >q_data[MTK_Q_DATA_SRC];
> > +   memset(q_data, 0, sizeof(struct mtk_q_data));
> > +   q_data->visible_width = DFT_CFG_WIDTH;
> > +   q_data->visible_height = DFT_CFG_HEIGHT;
> > +   q_data->fmt = _video_formats[OUT_FMT_IDX];
> > +   q_data->colorspace = V4L2_COLORSPACE_REC709;
> > +   q_data->field = V4L2_FIELD_NONE;
> > +   ctx->q_data[MTK_Q_DATA_DST].sizeimage[0] =
> > +   DFT_CFG_WIDTH * DFT_CFG_HEIGHT;
> > +   ctx->q_data[MTK_Q_DATA_DST].bytesperline[0] = 0;
> > +
> > +
> > +   q_data = >q_data[MTK_Q_DATA_DST];
> > +   memset(q_data, 0, sizeof(struct mtk_q_data));
> > +   q_data->visible_width = DFT_CFG_WIDTH;
> > +   q_data->visible_height = DFT_CFG_HEIGHT;
> > +   q_data->coded_width = DFT_CFG_WIDTH;
> > +   q_data->coded_height = DFT_CFG_HEIGHT;
> > +   q_data->colorspace = V4L2_COLORSPACE_REC709;
> > +   q_data->field = V4L2_FIELD_NONE;
> > +
> > +   q_data->fmt = _video_formats[CAP_FMT_IDX];
> > +
> > +   v4l_bound_align_image(_data->coded_width,
> > +   MTK_VDEC_MIN_W,
> > +   MTK_VDEC_MAX_W, 4,
> > +   _data->coded_height,
> > +   MTK_VDEC_MIN_H,
> > +   MTK_VDEC_MAX_H, 5, 6);
> > +
> > +   q_data->sizeimage[0] = q_data->coded_width * q_data->coded_height;
> > +   q_data->bytesperline[0] = q_data->coded_width;
> > +   q_data->sizeimage[1] = q_data->sizeimage[0] / 2;
> > +   q_data->bytesperline[1] = q_data->coded_width;
> > +
> > +}
> > +
> > +static int vidioc_vdec_streamon(struct file *file, void *priv,
> > +   enum v4l2_buf_type type)
> > +{
> > +   struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
> > +
> > +   mtk_v4l2_debug(3, "[%d] (%d)", ctx->idx, type);
> > +
> > +   return v4l2_m2m_streamon(file, ctx->m2m_ctx, type);
> > +}
> > +
> > +static int vidioc_vdec_streamoff(struct file *file, void *priv,
> > +enum v4l2_buf_type type)
> > +{
> > +   struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
> > +
> > +   mtk_v4l2_debug(3, "[%d] (%d)", ctx->idx, type);
> > +   return v4l2_m2m_streamoff(file, ctx->m2m_ctx, type);
> > +}
> > +
> > +static int vidioc_vdec_reqbufs(struct file *file, void *priv,
> > +  struct v4l2_requestbuffers *reqbufs)
> > +{
> > +   struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
> > +   int ret;
> > +
> > +   mtk_v4l2_debug(3, "[%d] (%d) count=%d", ctx->idx,
> > +reqbufs->type, reqbufs->count);
> > +   ret = v4l2_m2m_reqbufs(file, ctx->m2m_ctx, reqbufs);
> > +
> > +   return ret;
> > +}
> 
> Please use the v4l2_m2m_ioctl_* helper functions were applicable.
> 



snipped.
> > +static unsigned int fops_vcodec_poll(struct file *file,
> > +struct poll_table_struct *wait)
> > +{
> > +   struct mtk_vcodec_ctx *ctx = fh_to_ctx(file->private_data);
> > +   struct mtk_vcodec_dev *dev = ctx->dev;
> > +   int ret;
> > +
> > +   mutex_lock(>dev_mutex);
> > +   ret = v4l2_m2m_poll(file, ctx->m2m_ctx, wait);
> > +   mutex_unlock(>dev_mutex);
> > +
> > +   return ret;
> > +}
> > +
> > +static int fops_vcodec_mmap(struct file *file, struct vm_area_struct *vma)
> > +{
> > +   struct mtk_vcodec_ctx *ctx = fh_to_ctx(file->private_data);
> > +
> > +   return v4l2_m2m_mmap(file, ctx->m2m_ctx, vma);
> > +}
> > +
> > +static const struct v4l2_file_operations mtk_vcodec_fops = {
> > +   .owner  = THIS_MODULE,
> > +   .open   = fops_vcodec_open,
> > +   .release= fops_vcodec_release,
> > +   .poll   = fops_vcodec_poll,
> > +   .unlocked_ioctl = video_ioctl2,
> > +   .mmap   = fops_vcodec_mmap,
> 
> You should be able to use the v4l2_m2m_fop helper functions for poll and mmap.
> 

Hi Hans,

We are plaining to remove m2m framework in th feature, although we think
it is easy to use and could save a lot of code similar to what m2m
framework implemented and reduce code size.
The main reason is that in v4l2_m2m_try_schedule, it required that at
least one output buffer and one capture buffer to run device_run.
We want to start device_run without capture buffer queued.
Is there any suggestion that we could use m2m framework but trigger
device_run with only output buffer.
Or we need to remove m2m and write our own implementation.



snipped.


best regards,
Tiffany



Re: [PATCH 3/7] [Media] vcodec: mediatek: Add Mediatek V4L2 Video Decoder Driver

2016-04-17 Thread tiffany lin

snipped.

> > +
> > +void mtk_vcodec_dec_set_default_params(struct mtk_vcodec_ctx *ctx)
> > +{
> > +   struct mtk_q_data *q_data;
> > +
> > +   ctx->m2m_ctx->q_lock = >dev->dev_mutex;
> > +   ctx->fh.m2m_ctx = ctx->m2m_ctx;
> > +   ctx->fh.ctrl_handler = >ctrl_hdl;
> > +   INIT_WORK(>decode_work, mtk_vdec_worker);
> > +
> > +   q_data = >q_data[MTK_Q_DATA_SRC];
> > +   memset(q_data, 0, sizeof(struct mtk_q_data));
> > +   q_data->visible_width = DFT_CFG_WIDTH;
> > +   q_data->visible_height = DFT_CFG_HEIGHT;
> > +   q_data->fmt = _video_formats[OUT_FMT_IDX];
> > +   q_data->colorspace = V4L2_COLORSPACE_REC709;
> > +   q_data->field = V4L2_FIELD_NONE;
> > +   ctx->q_data[MTK_Q_DATA_DST].sizeimage[0] =
> > +   DFT_CFG_WIDTH * DFT_CFG_HEIGHT;
> > +   ctx->q_data[MTK_Q_DATA_DST].bytesperline[0] = 0;
> > +
> > +
> > +   q_data = >q_data[MTK_Q_DATA_DST];
> > +   memset(q_data, 0, sizeof(struct mtk_q_data));
> > +   q_data->visible_width = DFT_CFG_WIDTH;
> > +   q_data->visible_height = DFT_CFG_HEIGHT;
> > +   q_data->coded_width = DFT_CFG_WIDTH;
> > +   q_data->coded_height = DFT_CFG_HEIGHT;
> > +   q_data->colorspace = V4L2_COLORSPACE_REC709;
> > +   q_data->field = V4L2_FIELD_NONE;
> > +
> > +   q_data->fmt = _video_formats[CAP_FMT_IDX];
> > +
> > +   v4l_bound_align_image(_data->coded_width,
> > +   MTK_VDEC_MIN_W,
> > +   MTK_VDEC_MAX_W, 4,
> > +   _data->coded_height,
> > +   MTK_VDEC_MIN_H,
> > +   MTK_VDEC_MAX_H, 5, 6);
> > +
> > +   q_data->sizeimage[0] = q_data->coded_width * q_data->coded_height;
> > +   q_data->bytesperline[0] = q_data->coded_width;
> > +   q_data->sizeimage[1] = q_data->sizeimage[0] / 2;
> > +   q_data->bytesperline[1] = q_data->coded_width;
> > +
> > +}
> > +
> > +static int vidioc_vdec_streamon(struct file *file, void *priv,
> > +   enum v4l2_buf_type type)
> > +{
> > +   struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
> > +
> > +   mtk_v4l2_debug(3, "[%d] (%d)", ctx->idx, type);
> > +
> > +   return v4l2_m2m_streamon(file, ctx->m2m_ctx, type);
> > +}
> > +
> > +static int vidioc_vdec_streamoff(struct file *file, void *priv,
> > +enum v4l2_buf_type type)
> > +{
> > +   struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
> > +
> > +   mtk_v4l2_debug(3, "[%d] (%d)", ctx->idx, type);
> > +   return v4l2_m2m_streamoff(file, ctx->m2m_ctx, type);
> > +}
> > +
> > +static int vidioc_vdec_reqbufs(struct file *file, void *priv,
> > +  struct v4l2_requestbuffers *reqbufs)
> > +{
> > +   struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
> > +   int ret;
> > +
> > +   mtk_v4l2_debug(3, "[%d] (%d) count=%d", ctx->idx,
> > +reqbufs->type, reqbufs->count);
> > +   ret = v4l2_m2m_reqbufs(file, ctx->m2m_ctx, reqbufs);
> > +
> > +   return ret;
> > +}
> 
> Please use the v4l2_m2m_ioctl_* helper functions were applicable.
> 



snipped.
> > +static unsigned int fops_vcodec_poll(struct file *file,
> > +struct poll_table_struct *wait)
> > +{
> > +   struct mtk_vcodec_ctx *ctx = fh_to_ctx(file->private_data);
> > +   struct mtk_vcodec_dev *dev = ctx->dev;
> > +   int ret;
> > +
> > +   mutex_lock(>dev_mutex);
> > +   ret = v4l2_m2m_poll(file, ctx->m2m_ctx, wait);
> > +   mutex_unlock(>dev_mutex);
> > +
> > +   return ret;
> > +}
> > +
> > +static int fops_vcodec_mmap(struct file *file, struct vm_area_struct *vma)
> > +{
> > +   struct mtk_vcodec_ctx *ctx = fh_to_ctx(file->private_data);
> > +
> > +   return v4l2_m2m_mmap(file, ctx->m2m_ctx, vma);
> > +}
> > +
> > +static const struct v4l2_file_operations mtk_vcodec_fops = {
> > +   .owner  = THIS_MODULE,
> > +   .open   = fops_vcodec_open,
> > +   .release= fops_vcodec_release,
> > +   .poll   = fops_vcodec_poll,
> > +   .unlocked_ioctl = video_ioctl2,
> > +   .mmap   = fops_vcodec_mmap,
> 
> You should be able to use the v4l2_m2m_fop helper functions for poll and mmap.
> 

Hi Hans,

We are plaining to remove m2m framework in th feature, although we think
it is easy to use and could save a lot of code similar to what m2m
framework implemented and reduce code size.
The main reason is that in v4l2_m2m_try_schedule, it required that at
least one output buffer and one capture buffer to run device_run.
We want to start device_run without capture buffer queued.
Is there any suggestion that we could use m2m framework but trigger
device_run with only output buffer.
Or we need to remove m2m and write our own implementation.



snipped.


best regards,
Tiffany



Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread Andy Lutomirski
On Sun, Apr 17, 2016 at 10:21 PM, H. Peter Anvin  wrote:
> On 04/17/16 22:18, Andy Lutomirski wrote:
>> On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvin  wrote:
>>> On 04/17/16 17:47, Ben Hutchings wrote:
 We've always masked off the top 32 bits when x32 is enabled, but
 hopefully no-one relies on that.  Now that the slow path is in C, we
 check all the bits there, regardless of whether x32 is enabled.  Let's
 make the fast path consistent with it.
>>>
>>> We have always masked off the top 32 bits *period*.
>>>
>>> We have had some bugs where we haven't, because someone has tried to
>>> "optimize" the code and they have been quite serious.  The system call
>>> number is an int, which means the upper 32 bits are undefined on call
>>> entry: we HAVE to mask them.
>>
>> I'm reasonably confident that normal kernels (non-x32) have not masked
>> those bits since before I started hacking on the entry code.
>>
>
> I'm reasonably confident they have, because we have had security bugs
> TWICE when someone has tried to "optimize" the code.  The masking was
> generally done with a movl instruction, which confused people.
>
>> So the type of the syscall nr is a bit confused.  If there was an
>> installed base of programs that leaved garbage in the high bits, we
>> would have noticed *years* ago.  On the other hand, the 32-bit ptrace
>> ABI and the seccomp ABI both think it's 32-bits.
>
> Incorrect.  We have seen these failures in real life.

What kind of failure?  Programs that accidentally set rax to
0xbaadf00d0003 get -ENOSYS in most cases, not close().  If we'd
broken programs like this, I assume we would have had to fix it a long
time ago.

>
>> If we were designing the x86_64 ABI and everything around it from
>> scratch, I'd suggest that that either the high bits must be zero or
>> that the number actually be 64 bits (which are more or less the same
>> thing).  That would let us use the high bits for something interesting
>> in the future.
>
> Not really all that useful.  What we have is a C ABI.

And we've already stolen a bit once for x32.  Maybe we'll want more.
For example, if we added a cancellable bit, if x86_32 didn't want it,
we could steal a high bit for ie.

>
>> In practice, we can probably still declare that the thing is a 64-bit
>> number, given that most kernels in the wild currently fail syscalls
>> that have the high bits set.
>
> They don't, and we can prove it...

I'm confused.

  asm volatile ("syscall" :
"=a" (ret) :
"a" (SYS_getpid | 0xbaadf00dULL) :
"memory", "cc", "rcx", "r11");

gets -ENOSYS on the kernel I'm running on my laptop and on Fedora 23's
stock kernel.

I'm not terribly worried about nasty security issues in here because
all the nasty stuff is in C now.

What kernel had the other behavior?  In 2.6.11, I see:

ENTRY(system_call)
CFI_STARTPROC
swapgs
movq%rsp,%gs:pda_oldrsp
movq%gs:pda_kernelstack,%rsp
sti
SAVE_ARGS 8,1
movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq  %rcx,RIP-ARGOFFSET(%rsp)
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
jnz tracesys
cmpq $__NR_syscall_max,%rax

--Andy


Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread Andy Lutomirski
On Sun, Apr 17, 2016 at 10:21 PM, H. Peter Anvin  wrote:
> On 04/17/16 22:18, Andy Lutomirski wrote:
>> On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvin  wrote:
>>> On 04/17/16 17:47, Ben Hutchings wrote:
 We've always masked off the top 32 bits when x32 is enabled, but
 hopefully no-one relies on that.  Now that the slow path is in C, we
 check all the bits there, regardless of whether x32 is enabled.  Let's
 make the fast path consistent with it.
>>>
>>> We have always masked off the top 32 bits *period*.
>>>
>>> We have had some bugs where we haven't, because someone has tried to
>>> "optimize" the code and they have been quite serious.  The system call
>>> number is an int, which means the upper 32 bits are undefined on call
>>> entry: we HAVE to mask them.
>>
>> I'm reasonably confident that normal kernels (non-x32) have not masked
>> those bits since before I started hacking on the entry code.
>>
>
> I'm reasonably confident they have, because we have had security bugs
> TWICE when someone has tried to "optimize" the code.  The masking was
> generally done with a movl instruction, which confused people.
>
>> So the type of the syscall nr is a bit confused.  If there was an
>> installed base of programs that leaved garbage in the high bits, we
>> would have noticed *years* ago.  On the other hand, the 32-bit ptrace
>> ABI and the seccomp ABI both think it's 32-bits.
>
> Incorrect.  We have seen these failures in real life.

What kind of failure?  Programs that accidentally set rax to
0xbaadf00d0003 get -ENOSYS in most cases, not close().  If we'd
broken programs like this, I assume we would have had to fix it a long
time ago.

>
>> If we were designing the x86_64 ABI and everything around it from
>> scratch, I'd suggest that that either the high bits must be zero or
>> that the number actually be 64 bits (which are more or less the same
>> thing).  That would let us use the high bits for something interesting
>> in the future.
>
> Not really all that useful.  What we have is a C ABI.

And we've already stolen a bit once for x32.  Maybe we'll want more.
For example, if we added a cancellable bit, if x86_32 didn't want it,
we could steal a high bit for ie.

>
>> In practice, we can probably still declare that the thing is a 64-bit
>> number, given that most kernels in the wild currently fail syscalls
>> that have the high bits set.
>
> They don't, and we can prove it...

I'm confused.

  asm volatile ("syscall" :
"=a" (ret) :
"a" (SYS_getpid | 0xbaadf00dULL) :
"memory", "cc", "rcx", "r11");

gets -ENOSYS on the kernel I'm running on my laptop and on Fedora 23's
stock kernel.

I'm not terribly worried about nasty security issues in here because
all the nasty stuff is in C now.

What kernel had the other behavior?  In 2.6.11, I see:

ENTRY(system_call)
CFI_STARTPROC
swapgs
movq%rsp,%gs:pda_oldrsp
movq%gs:pda_kernelstack,%rsp
sti
SAVE_ARGS 8,1
movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq  %rcx,RIP-ARGOFFSET(%rsp)
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
jnz tracesys
cmpq $__NR_syscall_max,%rax

--Andy


linux-next: Tree for Apr 18

2016-04-17 Thread Stephen Rothwell
Hi all,

Changes since 20160415:

The net-next tree gained conflicts against the net tree.

The tip tree still had its build failure for which I reverted a commit
and gained another for which I applied a build fix.

The gpio tree gained a build failure so I used the version from
next-20160415.

The livepatching tree gained conflicts against Linus' and the powerpc
trees.

The akpm-current tree still had its build failure for which I applied
a patch.

Non-merge commits (relative to Linus' tree): 4344
 3992 files changed, 163277 insertions(+), 92002 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc and an allmodconfig (with
CONFIG_BUILD_DOCSRC=n) for x86_64, a multi_v7_defconfig for arm and a
native build of tools/perf. After the final fixups (if any), I do an
x86_64 modules_install followed by builds for x86_64 allnoconfig,
powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig, allyesconfig
(this fails its final link) and pseries_le_defconfig and i386, sparc
and sparc64 defconfig.

Below is a summary of the state of the merge.

I am currently merging 232 trees (counting Linus' and 35 trees of patches
pending for Linus' tree).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwell

$ git checkout master
$ git reset --hard stable
Merging origin/master (9d090d01e3ef Merge tag 'dm-4.6-fix-2' of 
git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm)
Merging fixes/master (9735a22799b9 Linux 4.6-rc2)
Merging kbuild-current/rc-fixes (3d1450d54a4f Makefile: Force gzip and xz on 
module install)
Merging arc-current/for-curr (d01ebf06e305 ARCv2: Enable LOCKDEP)
Merging arm-current/fixes (9c18fcf7ae0e ARM: 8551/2: DMA: Fix kzalloc flags in 
__dma_alloc)
Merging m68k-current/for-linus (7b8ba82ad4ad m68k/defconfig: Update defconfigs 
for v4.6-rc2)
Merging metag-fixes/fixes (0164a711c97b metag: Fix ioremap_wc/ioremap_cached 
build errors)
Merging powerpc-fixes/fixes (71528d8bd7a8 powerpc: Correct used_vsr comment)
Merging powerpc-merge-mpe/fixes (bc0195aad0da Linux 4.2-rc2)
Merging sparc/master (5ec712934ce1 sparc: Write up preadv2/pwritev2 syscalls.)
Merging net/master (ab2ed0171a50 macsec: fix crypto Kconfig dependency)
Merging ipsec/master (d6af1a31cc72 vti: Add pmtu handling to vti_xmit.)
Merging ipvs/master (bcf493428840 netfilter: ebtables: Fix extension lookup 
with identical name)
Merging wireless-drivers/master (de478a61389c ath9k: 
ar5008_hw_cmn_spur_mitigate: add missing mask_m & mask_p initialisation)
Merging mac80211/master (8f815cdde3e5 nl80211: check netlink protocol in socket 
release notification)
Merging sound-current/for-linus (c44da62b55bb ALSA: hda - Fix inconsistent 
monitor_present state until repoll)
Merging pci-current/for-linus (67e658794ca1 cxgb4: Set VPD size so we can read 
both VPD structures)
Merging driver-core.current/driver-core-linus (dea5c24a1404 lib: lz4: cleanup 
unaligned access efficiency detection)
Merging tty.current/tty-linus (bf1620068911 Linux 4.6-rc3)
Merging usb.current/usb-linus (e86103a75705 usb: hcd: out of bounds access in 
for_each_companion)
Merging usb-gadget-fixes/fixes (bf1620068911 Linux 4.6-rc3)
Merging usb-serial-fixes/usb-linus (bf1620068911 Linux 4.6-rc3)
Merging usb-chipidea-fixes/ci-for-usb-stable (d144dfea8af7 usb: chipidea: otg: 
change workqueue ci_otg as freezable)
Merging staging.current/staging-linus (bf1620068911 Linux 4.6-rc3)
Merging char-misc.current/char-misc-linus (053f78d35995 Merge tag 
'lkdtm-4.6-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux 
into char-misc-linus)
Merging input-current/for-linus (eda5ecc0a6b8 Input: pmic8xxx-pwrkey - fix 
algorithm for converting trigger delay)
Merging crypto-current/master (f709b45ec461 crypto: ccp - Prevent information 
leakage on export)
Merging ide/master (1993b176a822 Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/ide)
Merging devicetree-current/devicetree/merge (f76502aa9140 of/dynamic: Fix test 
for 

linux-next: Tree for Apr 18

2016-04-17 Thread Stephen Rothwell
Hi all,

Changes since 20160415:

The net-next tree gained conflicts against the net tree.

The tip tree still had its build failure for which I reverted a commit
and gained another for which I applied a build fix.

The gpio tree gained a build failure so I used the version from
next-20160415.

The livepatching tree gained conflicts against Linus' and the powerpc
trees.

The akpm-current tree still had its build failure for which I applied
a patch.

Non-merge commits (relative to Linus' tree): 4344
 3992 files changed, 163277 insertions(+), 92002 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc and an allmodconfig (with
CONFIG_BUILD_DOCSRC=n) for x86_64, a multi_v7_defconfig for arm and a
native build of tools/perf. After the final fixups (if any), I do an
x86_64 modules_install followed by builds for x86_64 allnoconfig,
powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig, allyesconfig
(this fails its final link) and pseries_le_defconfig and i386, sparc
and sparc64 defconfig.

Below is a summary of the state of the merge.

I am currently merging 232 trees (counting Linus' and 35 trees of patches
pending for Linus' tree).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwell

$ git checkout master
$ git reset --hard stable
Merging origin/master (9d090d01e3ef Merge tag 'dm-4.6-fix-2' of 
git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm)
Merging fixes/master (9735a22799b9 Linux 4.6-rc2)
Merging kbuild-current/rc-fixes (3d1450d54a4f Makefile: Force gzip and xz on 
module install)
Merging arc-current/for-curr (d01ebf06e305 ARCv2: Enable LOCKDEP)
Merging arm-current/fixes (9c18fcf7ae0e ARM: 8551/2: DMA: Fix kzalloc flags in 
__dma_alloc)
Merging m68k-current/for-linus (7b8ba82ad4ad m68k/defconfig: Update defconfigs 
for v4.6-rc2)
Merging metag-fixes/fixes (0164a711c97b metag: Fix ioremap_wc/ioremap_cached 
build errors)
Merging powerpc-fixes/fixes (71528d8bd7a8 powerpc: Correct used_vsr comment)
Merging powerpc-merge-mpe/fixes (bc0195aad0da Linux 4.2-rc2)
Merging sparc/master (5ec712934ce1 sparc: Write up preadv2/pwritev2 syscalls.)
Merging net/master (ab2ed0171a50 macsec: fix crypto Kconfig dependency)
Merging ipsec/master (d6af1a31cc72 vti: Add pmtu handling to vti_xmit.)
Merging ipvs/master (bcf493428840 netfilter: ebtables: Fix extension lookup 
with identical name)
Merging wireless-drivers/master (de478a61389c ath9k: 
ar5008_hw_cmn_spur_mitigate: add missing mask_m & mask_p initialisation)
Merging mac80211/master (8f815cdde3e5 nl80211: check netlink protocol in socket 
release notification)
Merging sound-current/for-linus (c44da62b55bb ALSA: hda - Fix inconsistent 
monitor_present state until repoll)
Merging pci-current/for-linus (67e658794ca1 cxgb4: Set VPD size so we can read 
both VPD structures)
Merging driver-core.current/driver-core-linus (dea5c24a1404 lib: lz4: cleanup 
unaligned access efficiency detection)
Merging tty.current/tty-linus (bf1620068911 Linux 4.6-rc3)
Merging usb.current/usb-linus (e86103a75705 usb: hcd: out of bounds access in 
for_each_companion)
Merging usb-gadget-fixes/fixes (bf1620068911 Linux 4.6-rc3)
Merging usb-serial-fixes/usb-linus (bf1620068911 Linux 4.6-rc3)
Merging usb-chipidea-fixes/ci-for-usb-stable (d144dfea8af7 usb: chipidea: otg: 
change workqueue ci_otg as freezable)
Merging staging.current/staging-linus (bf1620068911 Linux 4.6-rc3)
Merging char-misc.current/char-misc-linus (053f78d35995 Merge tag 
'lkdtm-4.6-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux 
into char-misc-linus)
Merging input-current/for-linus (eda5ecc0a6b8 Input: pmic8xxx-pwrkey - fix 
algorithm for converting trigger delay)
Merging crypto-current/master (f709b45ec461 crypto: ccp - Prevent information 
leakage on export)
Merging ide/master (1993b176a822 Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/ide)
Merging devicetree-current/devicetree/merge (f76502aa9140 of/dynamic: Fix test 
for 

Re: [RESEND PATCH 2/3] power: axp288_charger: Replace deprecatd API of extcon

2016-04-17 Thread Chanwoo Choi
Hi Sebastian,

On 2016년 04월 15일 23:20, Sebastian Reichel wrote:
> Hi,
> 
> On Fri, Apr 15, 2016 at 09:43:35AM +0900, Chanwoo Choi wrote:
>> This patch removes the deprecated notifier API of extcon framework and then 
>> use
>> the new extcon API[2] with the unique id[1] to indicate the each external
>> connector. Alter deprecated API as following:
>> - extcon_register_interest() -> extcon_register_notifier()
>> - extcon_unregister_interest() -> extcon_unregister_notifier()
>> - extcon_get_cable_state() -> extcon_get_cable_state_()
>>
>> And, extcon alters the name of USB charger connector in patch[3] as 
>> following:
>> - EXTCON_CHG_USB_SDP /* Standard Downstream Port */
>> - EXTCON_CHG_USB_DCP /* Dedicated Charging Port */
>> - EXTCON_CHG_USB_CDP /* Charging Downstream Port */
>> - EXTCON_CHG_USB_ACA /* Accessory Charger Adapter */
>>
>> [1] Commit 2a9de9c0f08d61
>> - ("extcon: Use the unique id for external connector instead of string)
>> [2] Commit 046050f6e623e4
>> - ("extcon: Update the prototype of extcon_register_notifier() with enum 
>> extcon
>> [3] Commit 11eecf910bd81d
>> - ("extcon: Modify the id and name of external connector")
>>
>> Signed-off-by: Chanwoo Choi 
>> ---
>>  drivers/power/axp288_charger.c | 77 
>> +-
>>  1 file changed, 53 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/power/axp288_charger.c b/drivers/power/axp288_charger.c
>> index e4d569f57acc..e5c2569befa6 100644
>> --- a/drivers/power/axp288_charger.c
>> +++ b/drivers/power/axp288_charger.c
>> @@ -129,10 +129,6 @@
>>  

[snip]

>>  /* Register charger interrupts */
>>  for (i = 0; i < CHRG_INTR_END; i++) {
>> @@ -905,11 +922,17 @@ static int axp288_charger_probe(struct platform_device 
>> *pdev)
>>  return 0;
>>  
>>  intr_reg_failed:
>> -if (info->otg.cable.edev)
>> -extcon_unregister_interest(>otg.cable);
>> +if (info->otg.cable)
>> +extcon_unregister_notifier(info->otg.cable, EXTCON_USB_HOST,
>> +>otg.id_nb);
>>  power_supply_unregister(info->psy_usb);
>>  psy_reg_failed:
>> -extcon_unregister_notifier(info->cable.edev, EXTCON_NONE, 
>> >cable.nb);
>> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP,
>> +>cable.nb);
>> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP,
>> +>cable.nb);
> 
> EXTCON_CHG_USB_SDP -> EXTCON_CHG_USB_CDP?

I was mistaken. I'll fix it.

> 
>> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_DCP,
>> +>cable.nb);
>>  return ret;
>>  }
>>  
>> @@ -917,10 +940,16 @@ static int axp288_charger_remove(struct 
>> platform_device *pdev)
>>  {
>>  struct axp288_chrg_info *info =  dev_get_drvdata(>dev);
>>  
>> -if (info->otg.cable.edev)
>> -extcon_unregister_interest(>otg.cable);
>> +if (info->otg.cable)
>> +extcon_unregister_notifier(info->otg.cable, EXTCON_USB_HOST,
>> +>otg.id_nb);
>>  
>> -extcon_unregister_notifier(info->cable.edev, EXTCON_NONE, 
>> >cable.nb);
>> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP,
>> +>cable.nb);
>> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP,
>> +>cable.nb);
> 
> EXTCON_CHG_USB_SDP -> EXTCON_CHG_USB_CDP?

ditto.

> 
>> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_DCP,
>> +>cable.nb);
>>  power_supply_unregister(info->psy_usb);
>>  
>>  return 0;
> 
> Has this dependencies to your tree or are all dependencies already
> in torvalds tree?

This patch-set have the dependency on Linus torvalds tree[1].
[1] https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/

So, when picking this patch-set, someone make the immutable branch for this 
patchset
to prevent the merge conflict between power_supply and extcon tree.

Thanks,
Chanwoo Choi



Re: [RESEND PATCH 2/3] power: axp288_charger: Replace deprecatd API of extcon

2016-04-17 Thread Chanwoo Choi
Hi Sebastian,

On 2016년 04월 15일 23:20, Sebastian Reichel wrote:
> Hi,
> 
> On Fri, Apr 15, 2016 at 09:43:35AM +0900, Chanwoo Choi wrote:
>> This patch removes the deprecated notifier API of extcon framework and then 
>> use
>> the new extcon API[2] with the unique id[1] to indicate the each external
>> connector. Alter deprecated API as following:
>> - extcon_register_interest() -> extcon_register_notifier()
>> - extcon_unregister_interest() -> extcon_unregister_notifier()
>> - extcon_get_cable_state() -> extcon_get_cable_state_()
>>
>> And, extcon alters the name of USB charger connector in patch[3] as 
>> following:
>> - EXTCON_CHG_USB_SDP /* Standard Downstream Port */
>> - EXTCON_CHG_USB_DCP /* Dedicated Charging Port */
>> - EXTCON_CHG_USB_CDP /* Charging Downstream Port */
>> - EXTCON_CHG_USB_ACA /* Accessory Charger Adapter */
>>
>> [1] Commit 2a9de9c0f08d61
>> - ("extcon: Use the unique id for external connector instead of string)
>> [2] Commit 046050f6e623e4
>> - ("extcon: Update the prototype of extcon_register_notifier() with enum 
>> extcon
>> [3] Commit 11eecf910bd81d
>> - ("extcon: Modify the id and name of external connector")
>>
>> Signed-off-by: Chanwoo Choi 
>> ---
>>  drivers/power/axp288_charger.c | 77 
>> +-
>>  1 file changed, 53 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/power/axp288_charger.c b/drivers/power/axp288_charger.c
>> index e4d569f57acc..e5c2569befa6 100644
>> --- a/drivers/power/axp288_charger.c
>> +++ b/drivers/power/axp288_charger.c
>> @@ -129,10 +129,6 @@
>>  

[snip]

>>  /* Register charger interrupts */
>>  for (i = 0; i < CHRG_INTR_END; i++) {
>> @@ -905,11 +922,17 @@ static int axp288_charger_probe(struct platform_device 
>> *pdev)
>>  return 0;
>>  
>>  intr_reg_failed:
>> -if (info->otg.cable.edev)
>> -extcon_unregister_interest(>otg.cable);
>> +if (info->otg.cable)
>> +extcon_unregister_notifier(info->otg.cable, EXTCON_USB_HOST,
>> +>otg.id_nb);
>>  power_supply_unregister(info->psy_usb);
>>  psy_reg_failed:
>> -extcon_unregister_notifier(info->cable.edev, EXTCON_NONE, 
>> >cable.nb);
>> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP,
>> +>cable.nb);
>> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP,
>> +>cable.nb);
> 
> EXTCON_CHG_USB_SDP -> EXTCON_CHG_USB_CDP?

I was mistaken. I'll fix it.

> 
>> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_DCP,
>> +>cable.nb);
>>  return ret;
>>  }
>>  
>> @@ -917,10 +940,16 @@ static int axp288_charger_remove(struct 
>> platform_device *pdev)
>>  {
>>  struct axp288_chrg_info *info =  dev_get_drvdata(>dev);
>>  
>> -if (info->otg.cable.edev)
>> -extcon_unregister_interest(>otg.cable);
>> +if (info->otg.cable)
>> +extcon_unregister_notifier(info->otg.cable, EXTCON_USB_HOST,
>> +>otg.id_nb);
>>  
>> -extcon_unregister_notifier(info->cable.edev, EXTCON_NONE, 
>> >cable.nb);
>> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP,
>> +>cable.nb);
>> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP,
>> +>cable.nb);
> 
> EXTCON_CHG_USB_SDP -> EXTCON_CHG_USB_CDP?

ditto.

> 
>> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_DCP,
>> +>cable.nb);
>>  power_supply_unregister(info->psy_usb);
>>  
>>  return 0;
> 
> Has this dependencies to your tree or are all dependencies already
> in torvalds tree?

This patch-set have the dependency on Linus torvalds tree[1].
[1] https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/

So, when picking this patch-set, someone make the immutable branch for this 
patchset
to prevent the merge conflict between power_supply and extcon tree.

Thanks,
Chanwoo Choi



Re: [PATCH v2 0/4] Introduce bulk mode for crypto engine framework

2016-04-17 Thread Baolin Wang
Hi Herbert,

On 15 April 2016 at 21:48, Herbert Xu  wrote:
> On Tue, Mar 15, 2016 at 03:47:58PM +0800, Baolin Wang wrote:
>> Now some cipher hardware engines prefer to handle bulk block by merging 
>> requests
>> to increase the block size and thus increase the hardware engine processing 
>> speed.
>>
>> This patchset introduces request bulk mode to help the crypto hardware 
>> drivers
>> improve in efficiency.
>
> Could you please explain why this merging can't be done in dm-crypt
> instead?

We've tried to do this in dm-crypt, but it failed.
The dm-crypt maintainer explained to me that I should optimize the
driver, not add strange hw-dependent crypto modes to dm-crypt, this is
not the first crypto accelerator that is just not suited for this kind
of use.
He thought if it can process batch of chunks of data each with own IV,
then it can work with dm-crypt, but he thought such optimized code
should be inside crypto API, not in dmcrypt.

I think his suggestion is reasonable, so we introduce the crypto
engine framework to factor out the common patterns for driving the
queue of operations. Then it will be more reasonable to do the bulk
mode optimization in crypto engine framework. Thanks.

>
> Thanks,
> --
> Email: Herbert Xu 
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt



-- 
Baolin.wang
Best Regards


Re: [PATCH v2 0/4] Introduce bulk mode for crypto engine framework

2016-04-17 Thread Baolin Wang
Hi Herbert,

On 15 April 2016 at 21:48, Herbert Xu  wrote:
> On Tue, Mar 15, 2016 at 03:47:58PM +0800, Baolin Wang wrote:
>> Now some cipher hardware engines prefer to handle bulk block by merging 
>> requests
>> to increase the block size and thus increase the hardware engine processing 
>> speed.
>>
>> This patchset introduces request bulk mode to help the crypto hardware 
>> drivers
>> improve in efficiency.
>
> Could you please explain why this merging can't be done in dm-crypt
> instead?

We've tried to do this in dm-crypt, but it failed.
The dm-crypt maintainer explained to me that I should optimize the
driver, not add strange hw-dependent crypto modes to dm-crypt, this is
not the first crypto accelerator that is just not suited for this kind
of use.
He thought if it can process batch of chunks of data each with own IV,
then it can work with dm-crypt, but he thought such optimized code
should be inside crypto API, not in dmcrypt.

I think his suggestion is reasonable, so we introduce the crypto
engine framework to factor out the common patterns for driving the
queue of operations. Then it will be more reasonable to do the bulk
mode optimization in crypto engine framework. Thanks.

>
> Thanks,
> --
> Email: Herbert Xu 
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt



-- 
Baolin.wang
Best Regards


[PATCH 3/2] cgroup_show_path: use a new helper to get current cgns css_set

2016-04-17 Thread Serge E. Hallyn
Since we're getting current's cgroup namespace info, and are not
modifying it, we can use rcu_read_lock() instead of cgroup_mutex.

Signed-off-by: Serge Hallyn 
---
 kernel/cgroup.c | 40 
 1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9a0d7b3..cd8269e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1215,6 +1215,41 @@ static void cgroup_destroy_root(struct cgroup_root *root)
cgroup_free_root(root);
 }
 
+/*
+ * look up cgroup associated with current task's cgroup namespace on the
+ * specified hierarchy
+ */
+static struct cgroup *
+current_cgns_cgroup_from_root(struct cgroup_root *root)
+{
+   struct cgroup *res = NULL;
+   struct css_set *css;
+
+   lockdep_assert_held(_set_lock);
+
+   rcu_read_lock();
+
+   css = current->nsproxy->cgroup_ns->root_cset;
+   if (cset == _css_set) {
+   res = >cgrp;
+   } else {
+   struct cgrp_cset_link *link;
+
+   list_for_each_entry(link, >cgrp_links, cgrp_link) {
+   struct cgroup *c = link->cgrp;
+
+   if (c->root == root) {
+   res = c;
+   break;
+   }
+   }
+   }
+   rcu_read_unlock();
+
+   BUG_ON(!res);
+   return res;
+}
+
 /* look up cgroup associated with given css_set on the specified hierarchy */
 static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
@@ -1598,13 +1633,11 @@ static int cgroup_show_path(struct seq_file *sf, struct 
kernfs_node *kf_node,
 {
int len = 0, ret = 0;
char *buf = NULL;
-   struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
struct cgroup *ns_cgroup;
 
-   mutex_lock(_mutex);
spin_lock_bh(_set_lock);
-   ns_cgroup = cset_cgroup_from_root(ns->root_cset, kf_cgroot);
+   ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
len = kernfs_path_from_node(kf_node, ns_cgroup->kn, NULL, 0);
if (len > 0)
buf = kmalloc(len + 1, GFP_ATOMIC);
@@ -1612,7 +1645,6 @@ static int cgroup_show_path(struct seq_file *sf, struct 
kernfs_node *kf_node,
ret = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, len + 
1);
 
spin_unlock_bh(_set_lock);
-   mutex_unlock(_mutex);
 
if (len <= 0)
return len;
-- 
2.7.4



[PATCH 3/2] cgroup_show_path: use a new helper to get current cgns css_set

2016-04-17 Thread Serge E. Hallyn
Since we're getting current's cgroup namespace info, and are not
modifying it, we can use rcu_read_lock() instead of cgroup_mutex.

Signed-off-by: Serge Hallyn 
---
 kernel/cgroup.c | 40 
 1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9a0d7b3..cd8269e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1215,6 +1215,41 @@ static void cgroup_destroy_root(struct cgroup_root *root)
cgroup_free_root(root);
 }
 
+/*
+ * look up cgroup associated with current task's cgroup namespace on the
+ * specified hierarchy
+ */
+static struct cgroup *
+current_cgns_cgroup_from_root(struct cgroup_root *root)
+{
+   struct cgroup *res = NULL;
+   struct css_set *css;
+
+   lockdep_assert_held(_set_lock);
+
+   rcu_read_lock();
+
+   css = current->nsproxy->cgroup_ns->root_cset;
+   if (cset == _css_set) {
+   res = >cgrp;
+   } else {
+   struct cgrp_cset_link *link;
+
+   list_for_each_entry(link, >cgrp_links, cgrp_link) {
+   struct cgroup *c = link->cgrp;
+
+   if (c->root == root) {
+   res = c;
+   break;
+   }
+   }
+   }
+   rcu_read_unlock();
+
+   BUG_ON(!res);
+   return res;
+}
+
 /* look up cgroup associated with given css_set on the specified hierarchy */
 static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
@@ -1598,13 +1633,11 @@ static int cgroup_show_path(struct seq_file *sf, struct 
kernfs_node *kf_node,
 {
int len = 0, ret = 0;
char *buf = NULL;
-   struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
struct cgroup *ns_cgroup;
 
-   mutex_lock(_mutex);
spin_lock_bh(_set_lock);
-   ns_cgroup = cset_cgroup_from_root(ns->root_cset, kf_cgroot);
+   ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
len = kernfs_path_from_node(kf_node, ns_cgroup->kn, NULL, 0);
if (len > 0)
buf = kmalloc(len + 1, GFP_ATOMIC);
@@ -1612,7 +1645,6 @@ static int cgroup_show_path(struct seq_file *sf, struct 
kernfs_node *kf_node,
ret = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, len + 
1);
 
spin_unlock_bh(_set_lock);
-   mutex_unlock(_mutex);
 
if (len <= 0)
return len;
-- 
2.7.4



Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread H. Peter Anvin
On 04/17/16 22:18, Andy Lutomirski wrote:
> On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvin  wrote:
>> On 04/17/16 17:47, Ben Hutchings wrote:
>>> We've always masked off the top 32 bits when x32 is enabled, but
>>> hopefully no-one relies on that.  Now that the slow path is in C, we
>>> check all the bits there, regardless of whether x32 is enabled.  Let's
>>> make the fast path consistent with it.
>>
>> We have always masked off the top 32 bits *period*.
>>
>> We have had some bugs where we haven't, because someone has tried to
>> "optimize" the code and they have been quite serious.  The system call
>> number is an int, which means the upper 32 bits are undefined on call
>> entry: we HAVE to mask them.
> 
> I'm reasonably confident that normal kernels (non-x32) have not masked
> those bits since before I started hacking on the entry code.
> 
> So the type of the syscall nr is a bit confused.  If there was an
> installed base of programs that leaved garbage in the high bits, we
> would have noticed *years* ago.  On the other hand, the 32-bit ptrace
> ABI and the seccomp ABI both think it's 32-bits.
> 
> If we were designing the x86_64 ABI and everything around it from
> scratch, I'd suggest that that either the high bits must be zero or
> that the number actually be 64 bits (which are more or less the same
> thing).  That would let us use the high bits for something interesting
> in the future.
> 
> In practice, we can probably still declare that the thing is a 64-bit
> number, given that most kernels in the wild currently fail syscalls
> that have the high bits set.
> 

For the record, I changed the range comparison from cmpl to cmpq so if
someone re-introduced this bug *again* it would be a functionality
problem as opposed to a security hole a mile wide.

-hpa




Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread H. Peter Anvin
On 04/17/16 22:18, Andy Lutomirski wrote:
> On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvin  wrote:
>> On 04/17/16 17:47, Ben Hutchings wrote:
>>> We've always masked off the top 32 bits when x32 is enabled, but
>>> hopefully no-one relies on that.  Now that the slow path is in C, we
>>> check all the bits there, regardless of whether x32 is enabled.  Let's
>>> make the fast path consistent with it.
>>
>> We have always masked off the top 32 bits *period*.
>>
>> We have had some bugs where we haven't, because someone has tried to
>> "optimize" the code and they have been quite serious.  The system call
>> number is an int, which means the upper 32 bits are undefined on call
>> entry: we HAVE to mask them.
> 
> I'm reasonably confident that normal kernels (non-x32) have not masked
> those bits since before I started hacking on the entry code.
> 
> So the type of the syscall nr is a bit confused.  If there was an
> installed base of programs that leaved garbage in the high bits, we
> would have noticed *years* ago.  On the other hand, the 32-bit ptrace
> ABI and the seccomp ABI both think it's 32-bits.
> 
> If we were designing the x86_64 ABI and everything around it from
> scratch, I'd suggest that that either the high bits must be zero or
> that the number actually be 64 bits (which are more or less the same
> thing).  That would let us use the high bits for something interesting
> in the future.
> 
> In practice, we can probably still declare that the thing is a 64-bit
> number, given that most kernels in the wild currently fail syscalls
> that have the high bits set.
> 

For the record, I changed the range comparison from cmpl to cmpq so if
someone re-introduced this bug *again* it would be a functionality
problem as opposed to a security hole a mile wide.

-hpa




Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread H. Peter Anvin
On 04/17/16 22:18, Andy Lutomirski wrote:
> On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvin  wrote:
>> On 04/17/16 17:47, Ben Hutchings wrote:
>>> We've always masked off the top 32 bits when x32 is enabled, but
>>> hopefully no-one relies on that.  Now that the slow path is in C, we
>>> check all the bits there, regardless of whether x32 is enabled.  Let's
>>> make the fast path consistent with it.
>>
>> We have always masked off the top 32 bits *period*.
>>
>> We have had some bugs where we haven't, because someone has tried to
>> "optimize" the code and they have been quite serious.  The system call
>> number is an int, which means the upper 32 bits are undefined on call
>> entry: we HAVE to mask them.
> 
> I'm reasonably confident that normal kernels (non-x32) have not masked
> those bits since before I started hacking on the entry code.
> 

I'm reasonably confident they have, because we have had security bugs
TWICE when someone has tried to "optimize" the code.  The masking was
generally done with a movl instruction, which confused people.

> So the type of the syscall nr is a bit confused.  If there was an
> installed base of programs that leaved garbage in the high bits, we
> would have noticed *years* ago.  On the other hand, the 32-bit ptrace
> ABI and the seccomp ABI both think it's 32-bits.

Incorrect.  We have seen these failures in real life.

> If we were designing the x86_64 ABI and everything around it from
> scratch, I'd suggest that that either the high bits must be zero or
> that the number actually be 64 bits (which are more or less the same
> thing).  That would let us use the high bits for something interesting
> in the future.

Not really all that useful.  What we have is a C ABI.

> In practice, we can probably still declare that the thing is a 64-bit
> number, given that most kernels in the wild currently fail syscalls
> that have the high bits set.

They don't, and we can prove it...

-hpa




Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread H. Peter Anvin
On 04/17/16 22:18, Andy Lutomirski wrote:
> On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvin  wrote:
>> On 04/17/16 17:47, Ben Hutchings wrote:
>>> We've always masked off the top 32 bits when x32 is enabled, but
>>> hopefully no-one relies on that.  Now that the slow path is in C, we
>>> check all the bits there, regardless of whether x32 is enabled.  Let's
>>> make the fast path consistent with it.
>>
>> We have always masked off the top 32 bits *period*.
>>
>> We have had some bugs where we haven't, because someone has tried to
>> "optimize" the code and they have been quite serious.  The system call
>> number is an int, which means the upper 32 bits are undefined on call
>> entry: we HAVE to mask them.
> 
> I'm reasonably confident that normal kernels (non-x32) have not masked
> those bits since before I started hacking on the entry code.
> 

I'm reasonably confident they have, because we have had security bugs
TWICE when someone has tried to "optimize" the code.  The masking was
generally done with a movl instruction, which confused people.

> So the type of the syscall nr is a bit confused.  If there was an
> installed base of programs that leaved garbage in the high bits, we
> would have noticed *years* ago.  On the other hand, the 32-bit ptrace
> ABI and the seccomp ABI both think it's 32-bits.

Incorrect.  We have seen these failures in real life.

> If we were designing the x86_64 ABI and everything around it from
> scratch, I'd suggest that that either the high bits must be zero or
> that the number actually be 64 bits (which are more or less the same
> thing).  That would let us use the high bits for something interesting
> in the future.

Not really all that useful.  What we have is a C ABI.

> In practice, we can probably still declare that the thing is a 64-bit
> number, given that most kernels in the wild currently fail syscalls
> that have the high bits set.

They don't, and we can prove it...

-hpa




Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread Andy Lutomirski
On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvin  wrote:
> On 04/17/16 17:47, Ben Hutchings wrote:
>> We've always masked off the top 32 bits when x32 is enabled, but
>> hopefully no-one relies on that.  Now that the slow path is in C, we
>> check all the bits there, regardless of whether x32 is enabled.  Let's
>> make the fast path consistent with it.
>
> We have always masked off the top 32 bits *period*.
>
> We have had some bugs where we haven't, because someone has tried to
> "optimize" the code and they have been quite serious.  The system call
> number is an int, which means the upper 32 bits are undefined on call
> entry: we HAVE to mask them.

I'm reasonably confident that normal kernels (non-x32) have not masked
those bits since before I started hacking on the entry code.

So the type of the syscall nr is a bit confused.  If there was an
installed base of programs that leaved garbage in the high bits, we
would have noticed *years* ago.  On the other hand, the 32-bit ptrace
ABI and the seccomp ABI both think it's 32-bits.

If we were designing the x86_64 ABI and everything around it from
scratch, I'd suggest that that either the high bits must be zero or
that the number actually be 64 bits (which are more or less the same
thing).  That would let us use the high bits for something interesting
in the future.

In practice, we can probably still declare that the thing is a 64-bit
number, given that most kernels in the wild currently fail syscalls
that have the high bits set.

--Andy


Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread Andy Lutomirski
On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvin  wrote:
> On 04/17/16 17:47, Ben Hutchings wrote:
>> We've always masked off the top 32 bits when x32 is enabled, but
>> hopefully no-one relies on that.  Now that the slow path is in C, we
>> check all the bits there, regardless of whether x32 is enabled.  Let's
>> make the fast path consistent with it.
>
> We have always masked off the top 32 bits *period*.
>
> We have had some bugs where we haven't, because someone has tried to
> "optimize" the code and they have been quite serious.  The system call
> number is an int, which means the upper 32 bits are undefined on call
> entry: we HAVE to mask them.

I'm reasonably confident that normal kernels (non-x32) have not masked
those bits since before I started hacking on the entry code.

So the type of the syscall nr is a bit confused.  If there was an
installed base of programs that leaved garbage in the high bits, we
would have noticed *years* ago.  On the other hand, the 32-bit ptrace
ABI and the seccomp ABI both think it's 32-bits.

If we were designing the x86_64 ABI and everything around it from
scratch, I'd suggest that that either the high bits must be zero or
that the number actually be 64 bits (which are more or less the same
thing).  That would let us use the high bits for something interesting
in the future.

In practice, we can probably still declare that the thing is a 64-bit
number, given that most kernels in the wild currently fail syscalls
that have the high bits set.

--Andy


arch/ia64/kernel/entry.S:621: Error: Operand 2 of `adds' should be a 14-bit integer (-8192-8191)

2016-04-17 Thread kbuild test robot
Hi Will,

FYI, the error/warning still remains.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   c3b46c73264b03000d1e18b22f5caf63332547c9
commit: da48d094ce5d7c7dcdad9011648a81c42fd1c2ef Kconfig: remove 
HAVE_LATENCYTOP_SUPPORT
date:   3 months ago
config: ia64-allmodconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout da48d094ce5d7c7dcdad9011648a81c42fd1c2ef
# save the attached .config to linux build tree
make.cross ARCH=ia64 

All errors (new ones prefixed by >>):

   arch/ia64/kernel/entry.S: Assembler messages:
>> arch/ia64/kernel/entry.S:621: Error: Operand 2 of `adds' should be a 14-bit 
>> integer (-8192-8191)
   arch/ia64/kernel/entry.S:728: Error: Operand 2 of `adds' should be a 14-bit 
integer (-8192-8191)
   arch/ia64/kernel/entry.S:859: Error: Operand 2 of `adds' should be a 14-bit 
integer (-8192-8191)
--
   arch/ia64/kernel/fsys.S: Assembler messages:
>> arch/ia64/kernel/fsys.S:67: Error: Operand 3 of `add' should be a general 
>> register r0-r3
   arch/ia64/kernel/fsys.S:97: Error: Operand 3 of `add' should be a general 
register r0-r3
   arch/ia64/kernel/fsys.S:193: Error: Operand 3 of `add' should be a general 
register r0-r3
   arch/ia64/kernel/fsys.S:336: Error: Operand 3 of `add' should be a general 
register r0-r3
   arch/ia64/kernel/fsys.S:338: Error: Operand 3 of `add' should be a general 
register r0-r3
--
   arch/ia64/kernel/ivt.S: Assembler messages:
>> arch/ia64/kernel/ivt.S:759: Error: Operand 3 of `add' should be a general 
>> register r0-r3

vim +621 arch/ia64/kernel/entry.S

^1da177e Linus Torvalds 2005-04-16  605 PT_REGS_UNWIND_INFO(0)
^1da177e Linus Torvalds 2005-04-16  606  {  /*
^1da177e Linus Torvalds 2005-04-16  607  * Some versions of gas 
generate bad unwind info if the first instruction of a
^1da177e Linus Torvalds 2005-04-16  608  * procedure doesn't go into 
the first slot of a bundle.  This is a workaround.
^1da177e Linus Torvalds 2005-04-16  609  */
^1da177e Linus Torvalds 2005-04-16  610 nop.m 0
^1da177e Linus Torvalds 2005-04-16  611 nop.i 0
^1da177e Linus Torvalds 2005-04-16  612 /*
^1da177e Linus Torvalds 2005-04-16  613  * We need to call 
schedule_tail() to complete the scheduling process.
^1da177e Linus Torvalds 2005-04-16  614  * Called by ia64_switch_to() 
after do_fork()->copy_thread().  r8 contains the
^1da177e Linus Torvalds 2005-04-16  615  * address of the previously 
executing task.
^1da177e Linus Torvalds 2005-04-16  616  */
^1da177e Linus Torvalds 2005-04-16  617 br.call.sptk.many 
rp=ia64_invoke_schedule_tail
^1da177e Linus Torvalds 2005-04-16  618  }
^1da177e Linus Torvalds 2005-04-16  619  .ret8:
54d496c3 Al Viro2012-10-14  620  (pKStk)br.call.sptk.many 
rp=call_payload
^1da177e Linus Torvalds 2005-04-16 @621 adds 
r2=TI_FLAGS+IA64_TASK_SIZE,r13
^1da177e Linus Torvalds 2005-04-16  622 ;;
^1da177e Linus Torvalds 2005-04-16  623 ld4 r2=[r2]
^1da177e Linus Torvalds 2005-04-16  624 ;;
^1da177e Linus Torvalds 2005-04-16  625 mov r8=0
^1da177e Linus Torvalds 2005-04-16  626 and 
r2=_TIF_SYSCALL_TRACEAUDIT,r2
^1da177e Linus Torvalds 2005-04-16  627 ;;
^1da177e Linus Torvalds 2005-04-16  628 cmp.ne p6,p0=r2,r0
^1da177e Linus Torvalds 2005-04-16  629  (p6)   br.cond.spnt 
.strace_check_retval

:: The code at line 621 was first introduced by commit
:: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Linux-2.6.12-rc2

:: TO: Linus Torvalds 
:: CC: Linus Torvalds 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


arch/ia64/kernel/entry.S:621: Error: Operand 2 of `adds' should be a 14-bit integer (-8192-8191)

2016-04-17 Thread kbuild test robot
Hi Will,

FYI, the error/warning still remains.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   c3b46c73264b03000d1e18b22f5caf63332547c9
commit: da48d094ce5d7c7dcdad9011648a81c42fd1c2ef Kconfig: remove 
HAVE_LATENCYTOP_SUPPORT
date:   3 months ago
config: ia64-allmodconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout da48d094ce5d7c7dcdad9011648a81c42fd1c2ef
# save the attached .config to linux build tree
make.cross ARCH=ia64 

All errors (new ones prefixed by >>):

   arch/ia64/kernel/entry.S: Assembler messages:
>> arch/ia64/kernel/entry.S:621: Error: Operand 2 of `adds' should be a 14-bit 
>> integer (-8192-8191)
   arch/ia64/kernel/entry.S:728: Error: Operand 2 of `adds' should be a 14-bit 
integer (-8192-8191)
   arch/ia64/kernel/entry.S:859: Error: Operand 2 of `adds' should be a 14-bit 
integer (-8192-8191)
--
   arch/ia64/kernel/fsys.S: Assembler messages:
>> arch/ia64/kernel/fsys.S:67: Error: Operand 3 of `add' should be a general 
>> register r0-r3
   arch/ia64/kernel/fsys.S:97: Error: Operand 3 of `add' should be a general 
register r0-r3
   arch/ia64/kernel/fsys.S:193: Error: Operand 3 of `add' should be a general 
register r0-r3
   arch/ia64/kernel/fsys.S:336: Error: Operand 3 of `add' should be a general 
register r0-r3
   arch/ia64/kernel/fsys.S:338: Error: Operand 3 of `add' should be a general 
register r0-r3
--
   arch/ia64/kernel/ivt.S: Assembler messages:
>> arch/ia64/kernel/ivt.S:759: Error: Operand 3 of `add' should be a general 
>> register r0-r3

vim +621 arch/ia64/kernel/entry.S

^1da177e Linus Torvalds 2005-04-16  605 PT_REGS_UNWIND_INFO(0)
^1da177e Linus Torvalds 2005-04-16  606  {  /*
^1da177e Linus Torvalds 2005-04-16  607  * Some versions of gas 
generate bad unwind info if the first instruction of a
^1da177e Linus Torvalds 2005-04-16  608  * procedure doesn't go into 
the first slot of a bundle.  This is a workaround.
^1da177e Linus Torvalds 2005-04-16  609  */
^1da177e Linus Torvalds 2005-04-16  610 nop.m 0
^1da177e Linus Torvalds 2005-04-16  611 nop.i 0
^1da177e Linus Torvalds 2005-04-16  612 /*
^1da177e Linus Torvalds 2005-04-16  613  * We need to call 
schedule_tail() to complete the scheduling process.
^1da177e Linus Torvalds 2005-04-16  614  * Called by ia64_switch_to() 
after do_fork()->copy_thread().  r8 contains the
^1da177e Linus Torvalds 2005-04-16  615  * address of the previously 
executing task.
^1da177e Linus Torvalds 2005-04-16  616  */
^1da177e Linus Torvalds 2005-04-16  617 br.call.sptk.many 
rp=ia64_invoke_schedule_tail
^1da177e Linus Torvalds 2005-04-16  618  }
^1da177e Linus Torvalds 2005-04-16  619  .ret8:
54d496c3 Al Viro2012-10-14  620  (pKStk)br.call.sptk.many 
rp=call_payload
^1da177e Linus Torvalds 2005-04-16 @621 adds 
r2=TI_FLAGS+IA64_TASK_SIZE,r13
^1da177e Linus Torvalds 2005-04-16  622 ;;
^1da177e Linus Torvalds 2005-04-16  623 ld4 r2=[r2]
^1da177e Linus Torvalds 2005-04-16  624 ;;
^1da177e Linus Torvalds 2005-04-16  625 mov r8=0
^1da177e Linus Torvalds 2005-04-16  626 and 
r2=_TIF_SYSCALL_TRACEAUDIT,r2
^1da177e Linus Torvalds 2005-04-16  627 ;;
^1da177e Linus Torvalds 2005-04-16  628 cmp.ne p6,p0=r2,r0
^1da177e Linus Torvalds 2005-04-16  629  (p6)   br.cond.spnt 
.strace_check_retval

:: The code at line 621 was first introduced by commit
:: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Linux-2.6.12-rc2

:: TO: Linus Torvalds 
:: CC: Linus Torvalds 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


Re: [PATCH v1] ARM: dts: omap3-n900: Specify peripherals LDO regulators initial mode

2016-04-17 Thread Sebastian Reichel
Hi,

On Sun, Apr 17, 2016 at 05:29:23PM +0300, Ivaylo Dimitrov wrote:
> Without that, regulators are left in the mode last set by the bootloader or
> by the kernel the device was rebooted from. This leads to various problems,
> like non-working peripherals.
> 
> Signed-off-by: Ivaylo Dimitrov 

Reviewed-By: Sebastian Reichel 

-- Sebastian


signature.asc
Description: PGP signature


Re: [PATCH v1] ARM: dts: omap3-n900: Specify peripherals LDO regulators initial mode

2016-04-17 Thread Sebastian Reichel
Hi,

On Sun, Apr 17, 2016 at 05:29:23PM +0300, Ivaylo Dimitrov wrote:
> Without that, regulators are left in the mode last set by the bootloader or
> by the kernel the device was rebooted from. This leads to various problems,
> like non-working peripherals.
> 
> Signed-off-by: Ivaylo Dimitrov 

Reviewed-By: Sebastian Reichel 

-- Sebastian


signature.asc
Description: PGP signature


Re: [PATCH V2] net: ethernet: mellanox: correct page conversion

2016-04-17 Thread okaya

On 2016-04-18 00:00, David Miller wrote:

From: Sinan Kaya 
Date: Sat, 16 Apr 2016 18:23:32 -0400

Current code is assuming that the address returned by 
dma_alloc_coherent
is a logical address. This is not true on ARM/ARM64 systems. This 
patch
replaces dma_alloc_coherent with dma_map_page API. The address 
returned

can later by virtually mapped from the CPU side with vmap API.

Signed-off-by: Sinan Kaya 


You can't do this.

The DMA map page API gives non-coherent mappings, and thus requires
proper flushing.

So a straight conversion like this is never legitimate.


I would agree on proper dma api usage. However, the code is already 
assuming coherent architecture by mapping the cpu pages as page_kernel.


Dma_map_page returns cached buffers and you don't need cache flushes on 
coherent architecture to make the data visible.




Re: [PATCH V2] net: ethernet: mellanox: correct page conversion

2016-04-17 Thread okaya

On 2016-04-18 00:00, David Miller wrote:

From: Sinan Kaya 
Date: Sat, 16 Apr 2016 18:23:32 -0400

Current code is assuming that the address returned by 
dma_alloc_coherent
is a logical address. This is not true on ARM/ARM64 systems. This 
patch
replaces dma_alloc_coherent with dma_map_page API. The address 
returned

can later by virtually mapped from the CPU side with vmap API.

Signed-off-by: Sinan Kaya 


You can't do this.

The DMA map page API gives non-coherent mappings, and thus requires
proper flushing.

So a straight conversion like this is never legitimate.


I would agree on proper dma api usage. However, the code is already 
assuming coherent architecture by mapping the cpu pages as page_kernel.


Dma_map_page returns cached buffers and you don't need cache flushes on 
coherent architecture to make the data visible.




Re: [PATCH 2/2] ARM: davinci: da850: use clk->set_parent for async3

2016-04-17 Thread Sekhar Nori
On Sunday 17 April 2016 01:01 AM, David Lechner wrote:

>> +static int da850_async3_set_parent(struct clk *clk, struct clk *parent)
>> +{
>> +u32 val;
>> +
>> +val = readl(DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP3_REG));
>> +
>> +/* Set the USB 1.1 PHY clock mux based on the parent clock. */
> 
> I seem to have regressed here since the last revision, this is supposed
> to read:
> 
> /* Set the async3 clock domain mux based on the parent clock. */
> 
> Although now that I am looking at it again, it doesn't really add
> anything useful and could be omitted altogether.

Agree the comment is redundant. No need resend just for this though. I
can drop it when applying.

Thanks,
Sekhar


Re: [PATCH 2/2] ARM: davinci: da850: use clk->set_parent for async3

2016-04-17 Thread Sekhar Nori
On Sunday 17 April 2016 01:01 AM, David Lechner wrote:

>> +static int da850_async3_set_parent(struct clk *clk, struct clk *parent)
>> +{
>> +u32 val;
>> +
>> +val = readl(DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP3_REG));
>> +
>> +/* Set the USB 1.1 PHY clock mux based on the parent clock. */
> 
> I seem to have regressed here since the last revision, this is supposed
> to read:
> 
> /* Set the async3 clock domain mux based on the parent clock. */
> 
> Although now that I am looking at it again, it doesn't really add
> anything useful and could be omitted altogether.

Agree the comment is redundant. No need resend just for this though. I
can drop it when applying.

Thanks,
Sekhar


Re: [PATCH v2 4/5] iio: health: afe4404: use regmap to retrieve struct device

2016-04-17 Thread Alison Schofield
On Sun, Apr 17, 2016 at 01:07:52PM -0500, Andrew F. Davis wrote:
> On 04/16/2016 02:22 PM, Jonathan Cameron wrote:
> > On 10/04/16 20:07, Alison Schofield wrote:
> >> Driver includes struct regmap and struct device in its global data.
> >> Remove the struct device and use regmap API to retrieve device info.
> >>
> 
> Why? This adds nothing but more code to get dev through some
> container_of trickery when we could just keep a dev pointer in the data
> structure.
> 
> Andrew

Thanks for the review and response.  The why would be for
simplification and uniformity across IIO.

I think I see your point in general, but not sure I get your
specific concerns with these afe4403/04 drivers.

The drivers only use the device struct in probe and then
again at device remove time.  At probe, the change no
longer stores it in the global data. At remove the
regmap_get_device() func is a simple dereference to retrieve
the device struct. That's the simplification: we don't carry
that ptr in global data waiting for the opportunity to use it
at device remove.  We just find it when we need it at device
remove.  (Perhaps these devices are getting removed frequently?)

Regards,
alisons


Re: [PATCH v2 4/5] iio: health: afe4404: use regmap to retrieve struct device

2016-04-17 Thread Alison Schofield
On Sun, Apr 17, 2016 at 01:07:52PM -0500, Andrew F. Davis wrote:
> On 04/16/2016 02:22 PM, Jonathan Cameron wrote:
> > On 10/04/16 20:07, Alison Schofield wrote:
> >> Driver includes struct regmap and struct device in its global data.
> >> Remove the struct device and use regmap API to retrieve device info.
> >>
> 
> Why? This adds nothing but more code to get dev through some
> container_of trickery when we could just keep a dev pointer in the data
> structure.
> 
> Andrew

Thanks for the review and response.  The why would be for
simplification and uniformity across IIO.

I think I see your point in general, but not sure I get your
specific concerns with these afe4403/04 drivers.

The drivers only use the device struct in probe and then
again at device remove time.  At probe, the change no
longer stores it in the global data. At remove the
regmap_get_device() func is a simple dereference to retrieve
the device struct. That's the simplification: we don't carry
that ptr in global data waiting for the opportunity to use it
at device remove.  We just find it when we need it at device
remove.  (Perhaps these devices are getting removed frequently?)

Regards,
alisons


Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread H. Peter Anvin
On 04/17/16 17:47, Ben Hutchings wrote:
> We've always masked off the top 32 bits when x32 is enabled, but
> hopefully no-one relies on that.  Now that the slow path is in C, we
> check all the bits there, regardless of whether x32 is enabled.  Let's
> make the fast path consistent with it.

We have always masked off the top 32 bits *period*.

We have had some bugs where we haven't, because someone has tried to
"optimize" the code and they have been quite serious.  The system call
number is an int, which means the upper 32 bits are undefined on call
entry: we HAVE to mask them.

-hpa




Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path

2016-04-17 Thread H. Peter Anvin
On 04/17/16 17:47, Ben Hutchings wrote:
> We've always masked off the top 32 bits when x32 is enabled, but
> hopefully no-one relies on that.  Now that the slow path is in C, we
> check all the bits there, regardless of whether x32 is enabled.  Let's
> make the fast path consistent with it.

We have always masked off the top 32 bits *period*.

We have had some bugs where we haven't, because someone has tried to
"optimize" the code and they have been quite serious.  The system call
number is an int, which means the upper 32 bits are undefined on call
entry: we HAVE to mask them.

-hpa




RE: [patch] intel_telemetry_pltdrv: silence an unintialized variable warning

2016-04-17 Thread Chakravarty, Souvik K
Looks good...thanks Dan.

> -Original Message-
> From: Dan Carpenter [mailto:dan.carpen...@oracle.com]
> Sent: Friday, April 15, 2016 8:16 PM
> To: Chakravarty, Souvik K 
> Cc: Darren Hart ; platform-driver-
> x...@vger.kernel.org; linux-kernel@vger.kernel.org; kernel-
> janit...@vger.kernel.org
> Subject: [patch] intel_telemetry_pltdrv: silence an unintialized variable
> warning
> 
> Presumably "pss_period" and "ioss_period" can't both be zero, but this
> function is never called so we can't infer that using static analysis alone.
> 
> Let's silence the warning by setting "ret" to zero.
> 
> Signed-off-by: Dan Carpenter 
> 
> diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c
> b/drivers/platform/x86/intel_telemetry_pltdrv.c
> index 397119f..781bd10 100644
> --- a/drivers/platform/x86/intel_telemetry_pltdrv.c
> +++ b/drivers/platform/x86/intel_telemetry_pltdrv.c
> @@ -659,7 +659,7 @@ static int telemetry_plt_update_events(struct
> telemetry_evtconfig pss_evtconfig,  static int
> telemetry_plt_set_sampling_period(u8 pss_period, u8 ioss_period)  {
>   u32 telem_ctrl = 0;
> - int ret;
> + int ret = 0;
> 
>   mutex_lock(&(telm_conf->telem_lock));
>   if (ioss_period) {


RE: [patch] intel_telemetry_pltdrv: silence an unintialized variable warning

2016-04-17 Thread Chakravarty, Souvik K
Looks good...thanks Dan.

> -Original Message-
> From: Dan Carpenter [mailto:dan.carpen...@oracle.com]
> Sent: Friday, April 15, 2016 8:16 PM
> To: Chakravarty, Souvik K 
> Cc: Darren Hart ; platform-driver-
> x...@vger.kernel.org; linux-kernel@vger.kernel.org; kernel-
> janit...@vger.kernel.org
> Subject: [patch] intel_telemetry_pltdrv: silence an unintialized variable
> warning
> 
> Presumably "pss_period" and "ioss_period" can't both be zero, but this
> function is never called so we can't infer that using static analysis alone.
> 
> Let's silence the warning by setting "ret" to zero.
> 
> Signed-off-by: Dan Carpenter 
> 
> diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c
> b/drivers/platform/x86/intel_telemetry_pltdrv.c
> index 397119f..781bd10 100644
> --- a/drivers/platform/x86/intel_telemetry_pltdrv.c
> +++ b/drivers/platform/x86/intel_telemetry_pltdrv.c
> @@ -659,7 +659,7 @@ static int telemetry_plt_update_events(struct
> telemetry_evtconfig pss_evtconfig,  static int
> telemetry_plt_set_sampling_period(u8 pss_period, u8 ioss_period)  {
>   u32 telem_ctrl = 0;
> - int ret;
> + int ret = 0;
> 
>   mutex_lock(&(telm_conf->telem_lock));
>   if (ioss_period) {


linux-next: manual merge of the livepatching tree with the powerpc tree

2016-04-17 Thread Stephen Rothwell
Hi Jiri,

Today's linux-next merge of the livepatching tree got a conflict in:

  arch/powerpc/kernel/process.c

between commit:

  7f92bc569455 ("powerpc: sparse: Include headers for __weak symbols")

from the powerpc tree and commit:

  5d31a96e6c01 ("powerpc/livepatch: Add livepatch stack to struct thread_info")

from the livepatching tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc arch/powerpc/kernel/process.c
index 4695088e7dd2,a290ee374aa0..
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@@ -56,7 -55,8 +56,9 @@@
  #include 
  #endif
  #include 
 +#include 
+ #include 
+ 
  #include 
  #include 
  


[PATCHSET v4 0/8] Make background writeback not suck

2016-04-17 Thread Jens Axboe
Hi,

Since the dawn of time, our background buffered writeback has sucked.
When we do background buffered writeback, it should have little impact
on foreground activity. That's the definition of background activity...
But for as long as I can remember, heavy buffered writers have not
behaved like that. For instance, if I do something like this:

$ dd if=/dev/zero of=foo bs=1M count=10k

on my laptop, and then try and start chrome, it basically won't start
before the buffered writeback is done. Or, for server oriented
workloads, where installation of a big RPM (or similar) adversely
impacts database reads or sync writes. When that happens, I get people
yelling at me.

I have posted plenty of results previously, I'll keep it shorter
this time. Here's a run on my laptop, using read-to-pipe-async for
reading a 5g file, and rewriting it.

4.6-rc3:

$ t/read-to-pipe-async -f ~/5g > 5g-new

Latency percentiles (usec) (READERS)
50.th: 2
75.th: 3
90.th: 5
95.th: 7
99.th: 43
99.5000th: 77
99.9000th: 9008
99.9900th: 91008
99.9990th: 286208
99.th: 347648
Over=1251, min=0, max=358081
Latency percentiles (usec) (WRITERS)
50.th: 4
75.th: 8
90.th: 13
95.th: 15
99.th: 32
99.5000th: 43
99.9000th: 81
99.9900th: 2372
99.9990th: 104320
99.th: 349696
Over=63, min=1, max=358321
Read rate (KB/sec) : 91859
Write rate (KB/sec): 91859

4.6-rc3 + wb-buf-throttle

Latency percentiles (usec) (READERS)
50.th: 2
75.th: 3
90.th: 5
95.th: 8
99.th: 48
99.5000th: 79
99.9000th: 5304
99.9900th: 22496
99.9990th: 29408
99.th: 33728
Over=860, min=0, max=37599
Latency percentiles (usec) (WRITERS)
50.th: 4
75.th: 9
90.th: 14
95.th: 16
99.th: 34
99.5000th: 45
99.9000th: 87
99.9900th: 1342
99.9990th: 13648
99.th: 21280
Over=29, min=1, max=30457
Read rate (KB/sec) : 95832
Write rate (KB/sec): 95832

Better throughput and tighter latencies, for both reads and writes.
That's hard not to like.

The above was the why. The how is basically throttling background
writeback. We still want to issue big writes from the vm side of things,
so we get nice and big extents on the file system end. But we don't need
to flood the device with THOUSANDS of requests for background writeback.
For most devices, we don't need a whole lot to get decent throughput.

This adds some simple blk-wb code that keeps limits how much buffered
writeback we keep in flight on the device end. It's all about managing
the queues on the hardware side. The big change in this version is that
it should be pretty much auto-tuning - you no longer have to set a
given percentage of writeback bandwidth. I've implemented something
similar to CoDel to manage the writeback queue. See the last patch
for a full description, but the tldr is that we monitor min latencies
over a window of time, and scale up/down the queue based on that. This
needs a minimum of tunables, and it stays out of the way, if your device
is fast enough. There's a single tunable now, wb_last_usec, that simply
sets this latency target. Most people won't have to touch this, it'll
work pretty well just being in the ballpark.

I welcome testing. If you are sick of Linux bogging down when buffered
writes are happening, then this is for you, laptop or server. The
patchset is fully stable, I have not observed problems. It passes full
xfstest runs, and a variety of benchmarks as well. It works equally well
on blk-mq/scsi-mq, and "classic" setups.

You can also find this in a branch in the block git repo:

git://git.kernel.dk/linux-block.git wb-buf-throttle

Note that I rebase this branch when I collapse patches. The
wb-buf-throttle-v4 will remain the same as this version. I've folded
the device write cache changes into my 4.7 branches, so they are not
a part of this posting. Get the full wb-buf-throttle branch, or apply
the patches here on top of my for-next. A full patch against Linus'
current tree can also be downloaded here:

http://brick.kernel.dk/snaps/wb-buf-throttle-v4.patch

Changes since v3

- Re-do the mm/ writheback parts. Add REQ_BG for background writes,
  and don't overload the wbc 'reason' for writeback decisions.
- Add tracking for when apps are sleeping waiting for a page to complete.
- Change wbc_to_write() to wbc_to_write_cmd().
- Use atomic_t for the balance_dirty_pages() sleep count.
- Add a basic scalable block stats tracking framework.
- Rewrite blk-wb core as described above, to dynamically adapt. This is
  a big change, see the last patch for a full description of it.
- Add tracing to blk-wb, instead of using debug printk's.
- Rebased to 4.6-rc3 (ish)

Changes since v2

- 

[PATCH 1/8] block: add WRITE_BG

2016-04-17 Thread Jens Axboe
This adds a new request flag, REQ_BG, that callers can use to tell
the block layer that this is background (non-urgent) IO.

Signed-off-by: Jens Axboe 
---
 include/linux/blk_types.h | 4 +++-
 include/linux/fs.h| 4 
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 86a38ea1823f..223012451c7a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -161,6 +161,7 @@ enum rq_flag_bits {
__REQ_INTEGRITY,/* I/O includes block integrity payload */
__REQ_FUA,  /* forced unit access */
__REQ_FLUSH,/* request for cache flush */
+   __REQ_BG,   /* background activity */
 
/* bio only flags */
__REQ_RAHEAD,   /* read ahead, can fail anytime */
@@ -208,7 +209,7 @@ enum rq_flag_bits {
 #define REQ_COMMON_MASK \
(REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
 REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
-REQ_SECURE | REQ_INTEGRITY)
+REQ_SECURE | REQ_INTEGRITY | REQ_BG)
 #define REQ_CLONE_MASK REQ_COMMON_MASK
 
 #define BIO_NO_ADVANCE_ITER_MASK   (REQ_DISCARD|REQ_WRITE_SAME)
@@ -235,6 +236,7 @@ enum rq_flag_bits {
 #define REQ_COPY_USER  (1ULL << __REQ_COPY_USER)
 #define REQ_FLUSH  (1ULL << __REQ_FLUSH)
 #define REQ_FLUSH_SEQ  (1ULL << __REQ_FLUSH_SEQ)
+#define REQ_BG (1ULL << __REQ_BG)
 #define REQ_IO_STAT(1ULL << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE(1ULL << __REQ_MIXED_MERGE)
 #define REQ_SECURE (1ULL << __REQ_SECURE)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 70e61b58baaf..bb8f951cc619 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -192,6 +192,9 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int 
uptodate);
  * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded
  * by a cache flush and data is guaranteed to be on
  * non-volatile media on completion.
+ * WRITE_BGBackground write. This is for background activity like
+ * the periodic flush and background threshold writeback
+ *
  *
  */
 #define RW_MASKREQ_WRITE
@@ -207,6 +210,7 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int 
uptodate);
 #define WRITE_FLUSH(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH)
 #define WRITE_FUA  (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
 #define WRITE_FLUSH_FUA(WRITE | REQ_SYNC | REQ_NOIDLE | 
REQ_FLUSH | REQ_FUA)
+#define WRITE_BG   (WRITE | REQ_NOIDLE | REQ_BG)
 
 /*
  * Attribute flags.  These should be or-ed together to figure out what
-- 
2.8.0.rc4.6.g7e4ba36



linux-next: manual merge of the livepatching tree with the powerpc tree

2016-04-17 Thread Stephen Rothwell
Hi Jiri,

Today's linux-next merge of the livepatching tree got a conflict in:

  arch/powerpc/kernel/process.c

between commit:

  7f92bc569455 ("powerpc: sparse: Include headers for __weak symbols")

from the powerpc tree and commit:

  5d31a96e6c01 ("powerpc/livepatch: Add livepatch stack to struct thread_info")

from the livepatching tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc arch/powerpc/kernel/process.c
index 4695088e7dd2,a290ee374aa0..
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@@ -56,7 -55,8 +56,9 @@@
  #include 
  #endif
  #include 
 +#include 
+ #include 
+ 
  #include 
  #include 
  


[PATCHSET v4 0/8] Make background writeback not suck

2016-04-17 Thread Jens Axboe
Hi,

Since the dawn of time, our background buffered writeback has sucked.
When we do background buffered writeback, it should have little impact
on foreground activity. That's the definition of background activity...
But for as long as I can remember, heavy buffered writers have not
behaved like that. For instance, if I do something like this:

$ dd if=/dev/zero of=foo bs=1M count=10k

on my laptop, and then try and start chrome, it basically won't start
before the buffered writeback is done. Or, for server oriented
workloads, where installation of a big RPM (or similar) adversely
impacts database reads or sync writes. When that happens, I get people
yelling at me.

I have posted plenty of results previously, I'll keep it shorter
this time. Here's a run on my laptop, using read-to-pipe-async for
reading a 5g file, and rewriting it.

4.6-rc3:

$ t/read-to-pipe-async -f ~/5g > 5g-new

Latency percentiles (usec) (READERS)
50.th: 2
75.th: 3
90.th: 5
95.th: 7
99.th: 43
99.5000th: 77
99.9000th: 9008
99.9900th: 91008
99.9990th: 286208
99.th: 347648
Over=1251, min=0, max=358081
Latency percentiles (usec) (WRITERS)
50.th: 4
75.th: 8
90.th: 13
95.th: 15
99.th: 32
99.5000th: 43
99.9000th: 81
99.9900th: 2372
99.9990th: 104320
99.th: 349696
Over=63, min=1, max=358321
Read rate (KB/sec) : 91859
Write rate (KB/sec): 91859

4.6-rc3 + wb-buf-throttle

Latency percentiles (usec) (READERS)
50.th: 2
75.th: 3
90.th: 5
95.th: 8
99.th: 48
99.5000th: 79
99.9000th: 5304
99.9900th: 22496
99.9990th: 29408
99.th: 33728
Over=860, min=0, max=37599
Latency percentiles (usec) (WRITERS)
50.th: 4
75.th: 9
90.th: 14
95.th: 16
99.th: 34
99.5000th: 45
99.9000th: 87
99.9900th: 1342
99.9990th: 13648
99.th: 21280
Over=29, min=1, max=30457
Read rate (KB/sec) : 95832
Write rate (KB/sec): 95832

Better throughput and tighter latencies, for both reads and writes.
That's hard not to like.

The above was the why. The how is basically throttling background
writeback. We still want to issue big writes from the vm side of things,
so we get nice and big extents on the file system end. But we don't need
to flood the device with THOUSANDS of requests for background writeback.
For most devices, we don't need a whole lot to get decent throughput.

This adds some simple blk-wb code that keeps limits how much buffered
writeback we keep in flight on the device end. It's all about managing
the queues on the hardware side. The big change in this version is that
it should be pretty much auto-tuning - you no longer have to set a
given percentage of writeback bandwidth. I've implemented something
similar to CoDel to manage the writeback queue. See the last patch
for a full description, but the tldr is that we monitor min latencies
over a window of time, and scale up/down the queue based on that. This
needs a minimum of tunables, and it stays out of the way, if your device
is fast enough. There's a single tunable now, wb_last_usec, that simply
sets this latency target. Most people won't have to touch this, it'll
work pretty well just being in the ballpark.

I welcome testing. If you are sick of Linux bogging down when buffered
writes are happening, then this is for you, laptop or server. The
patchset is fully stable, I have not observed problems. It passes full
xfstest runs, and a variety of benchmarks as well. It works equally well
on blk-mq/scsi-mq, and "classic" setups.

You can also find this in a branch in the block git repo:

git://git.kernel.dk/linux-block.git wb-buf-throttle

Note that I rebase this branch when I collapse patches. The
wb-buf-throttle-v4 will remain the same as this version. I've folded
the device write cache changes into my 4.7 branches, so they are not
a part of this posting. Get the full wb-buf-throttle branch, or apply
the patches here on top of my for-next. A full patch against Linus'
current tree can also be downloaded here:

http://brick.kernel.dk/snaps/wb-buf-throttle-v4.patch

Changes since v3

- Re-do the mm/ writheback parts. Add REQ_BG for background writes,
  and don't overload the wbc 'reason' for writeback decisions.
- Add tracking for when apps are sleeping waiting for a page to complete.
- Change wbc_to_write() to wbc_to_write_cmd().
- Use atomic_t for the balance_dirty_pages() sleep count.
- Add a basic scalable block stats tracking framework.
- Rewrite blk-wb core as described above, to dynamically adapt. This is
  a big change, see the last patch for a full description of it.
- Add tracing to blk-wb, instead of using debug printk's.
- Rebased to 4.6-rc3 (ish)

Changes since v2

- 

[PATCH 1/8] block: add WRITE_BG

2016-04-17 Thread Jens Axboe
This adds a new request flag, REQ_BG, that callers can use to tell
the block layer that this is background (non-urgent) IO.

Signed-off-by: Jens Axboe 
---
 include/linux/blk_types.h | 4 +++-
 include/linux/fs.h| 4 
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 86a38ea1823f..223012451c7a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -161,6 +161,7 @@ enum rq_flag_bits {
__REQ_INTEGRITY,/* I/O includes block integrity payload */
__REQ_FUA,  /* forced unit access */
__REQ_FLUSH,/* request for cache flush */
+   __REQ_BG,   /* background activity */
 
/* bio only flags */
__REQ_RAHEAD,   /* read ahead, can fail anytime */
@@ -208,7 +209,7 @@ enum rq_flag_bits {
 #define REQ_COMMON_MASK \
(REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
 REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
-REQ_SECURE | REQ_INTEGRITY)
+REQ_SECURE | REQ_INTEGRITY | REQ_BG)
 #define REQ_CLONE_MASK REQ_COMMON_MASK
 
 #define BIO_NO_ADVANCE_ITER_MASK   (REQ_DISCARD|REQ_WRITE_SAME)
@@ -235,6 +236,7 @@ enum rq_flag_bits {
 #define REQ_COPY_USER  (1ULL << __REQ_COPY_USER)
 #define REQ_FLUSH  (1ULL << __REQ_FLUSH)
 #define REQ_FLUSH_SEQ  (1ULL << __REQ_FLUSH_SEQ)
+#define REQ_BG (1ULL << __REQ_BG)
 #define REQ_IO_STAT(1ULL << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE(1ULL << __REQ_MIXED_MERGE)
 #define REQ_SECURE (1ULL << __REQ_SECURE)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 70e61b58baaf..bb8f951cc619 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -192,6 +192,9 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int 
uptodate);
  * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded
  * by a cache flush and data is guaranteed to be on
  * non-volatile media on completion.
+ * WRITE_BGBackground write. This is for background activity like
+ * the periodic flush and background threshold writeback
+ *
  *
  */
 #define RW_MASKREQ_WRITE
@@ -207,6 +210,7 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int 
uptodate);
 #define WRITE_FLUSH(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH)
 #define WRITE_FUA  (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
 #define WRITE_FLUSH_FUA(WRITE | REQ_SYNC | REQ_NOIDLE | 
REQ_FLUSH | REQ_FUA)
+#define WRITE_BG   (WRITE | REQ_NOIDLE | REQ_BG)
 
 /*
  * Attribute flags.  These should be or-ed together to figure out what
-- 
2.8.0.rc4.6.g7e4ba36



[PATCH 6/8] block: add code to track actual device queue depth

2016-04-17 Thread Jens Axboe
For blk-mq, ->nr_requests does track queue depth, at least at init
time. But for the older queue paths, it's simply a soft setting.
On top of that, it's generally larger than the hardware setting
on purpose, to allow backup of requests for merging.

Fill a hole in struct request with a 'queue_depth' member, that
drivers can call to more closely inform the block layer of the
real queue depth.

Signed-off-by: Jens Axboe 
---
 block/blk-settings.c   | 12 
 drivers/scsi/scsi.c|  3 +++
 include/linux/blkdev.h | 11 +++
 3 files changed, 26 insertions(+)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index f679ae122843..f7e122e717e8 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -832,6 +832,18 @@ void blk_queue_flush_queueable(struct request_queue *q, 
bool queueable)
 EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
 
 /**
+ * blk_set_queue_depth - tell the block layer about the device queue depth
+ * @q: the request queue for the device
+ * @depth: queue depth
+ *
+ */
+void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
+{
+   q->queue_depth = depth;
+}
+EXPORT_SYMBOL(blk_set_queue_depth);
+
+/**
  * blk_queue_write_cache - configure queue's write cache
  * @q: the request queue for the device
  * @wc:write back cache on or off
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 1deb6adc411f..75455d4dab68 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -621,6 +621,9 @@ int scsi_change_queue_depth(struct scsi_device *sdev, int 
depth)
wmb();
}
 
+   if (sdev->request_queue)
+   blk_set_queue_depth(sdev->request_queue, depth);
+
return sdev->queue_depth;
 }
 EXPORT_SYMBOL(scsi_change_queue_depth);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fc1894996b12..eee94bd6de52 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -315,6 +315,8 @@ struct request_queue {
struct blk_mq_ctx __percpu  *queue_ctx;
unsigned intnr_queues;
 
+   unsigned intqueue_depth;
+
/* hw dispatch queues */
struct blk_mq_hw_ctx**queue_hw_ctx;
unsigned intnr_hw_queues;
@@ -681,6 +683,14 @@ static inline bool blk_write_same_mergeable(struct bio *a, 
struct bio *b)
return false;
 }
 
+static inline unsigned int blk_queue_depth(struct request_queue *q)
+{
+   if (q->queue_depth)
+   return q->queue_depth;
+
+   return q->nr_requests;
+}
+
 /*
  * q->prep_rq_fn return values
  */
@@ -984,6 +994,7 @@ extern void blk_limits_io_min(struct queue_limits *limits, 
unsigned int min);
 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
+extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern void blk_set_stacking_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
-- 
2.8.0.rc4.6.g7e4ba36



[PATCH 5/8] writeback: increment page wait count when waiting

2016-04-17 Thread Jens Axboe
If we end up waiting on a page that is dirty or marked writeback,
then increment the corresponding bdi_writeback counter.

Signed-off-by: Jens Axboe 
---
 mm/filemap.c | 42 +++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index f2479af09da9..a8854a083b71 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -764,37 +764,73 @@ wait_queue_head_t *page_waitqueue(struct page *page)
 }
 EXPORT_SYMBOL(page_waitqueue);
 
+static bool inc_dirty_wait(struct page *page)
+{
+   if (!page->mapping || !PageDirty(page) || !PageWriteback(page))
+   return false;
+   else {
+   struct bdi_writeback *wb = inode_to_wb(page->mapping->host);
+
+   atomic_inc(>dirty_sleeping);
+   return true;
+   }
+}
+
+static void dec_dirty_wait(struct page *page)
+{
+   struct bdi_writeback *wb = inode_to_wb(page->mapping->host);
+
+   atomic_dec(>dirty_sleeping);
+}
+
 void wait_on_page_bit(struct page *page, int bit_nr)
 {
DEFINE_WAIT_BIT(wait, >flags, bit_nr);
 
-   if (test_bit(bit_nr, >flags))
+   if (test_bit(bit_nr, >flags)) {
+   bool did_inc = inc_dirty_wait(page);
__wait_on_bit(page_waitqueue(page), , bit_wait_io,
TASK_UNINTERRUPTIBLE);
+   if (did_inc)
+   dec_dirty_wait(page);
+   }
 }
 EXPORT_SYMBOL(wait_on_page_bit);
 
 int wait_on_page_bit_killable(struct page *page, int bit_nr)
 {
DEFINE_WAIT_BIT(wait, >flags, bit_nr);
+   bool did_inc;
+   int ret;
 
if (!test_bit(bit_nr, >flags))
return 0;
 
-   return __wait_on_bit(page_waitqueue(page), ,
+   did_inc = inc_dirty_wait(page);
+   ret = __wait_on_bit(page_waitqueue(page), ,
 bit_wait_io, TASK_KILLABLE);
+   if (did_inc)
+   dec_dirty_wait(page);
+   return ret;
 }
 
 int wait_on_page_bit_killable_timeout(struct page *page,
   int bit_nr, unsigned long timeout)
 {
DEFINE_WAIT_BIT(wait, >flags, bit_nr);
+   bool did_inc;
+   int ret;
 
wait.key.timeout = jiffies + timeout;
if (!test_bit(bit_nr, >flags))
return 0;
-   return __wait_on_bit(page_waitqueue(page), ,
+
+   did_inc = inc_dirty_wait(page);
+   ret = __wait_on_bit(page_waitqueue(page), ,
 bit_wait_io_timeout, TASK_KILLABLE);
+   if (did_inc)
+   dec_dirty_wait(page);
+   return ret;
 }
 EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout);
 
-- 
2.8.0.rc4.6.g7e4ba36



[PATCH 6/8] block: add code to track actual device queue depth

2016-04-17 Thread Jens Axboe
For blk-mq, ->nr_requests does track queue depth, at least at init
time. But for the older queue paths, it's simply a soft setting.
On top of that, it's generally larger than the hardware setting
on purpose, to allow backup of requests for merging.

Fill a hole in struct request with a 'queue_depth' member, that
drivers can call to more closely inform the block layer of the
real queue depth.

Signed-off-by: Jens Axboe 
---
 block/blk-settings.c   | 12 
 drivers/scsi/scsi.c|  3 +++
 include/linux/blkdev.h | 11 +++
 3 files changed, 26 insertions(+)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index f679ae122843..f7e122e717e8 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -832,6 +832,18 @@ void blk_queue_flush_queueable(struct request_queue *q, 
bool queueable)
 EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
 
 /**
+ * blk_set_queue_depth - tell the block layer about the device queue depth
+ * @q: the request queue for the device
+ * @depth: queue depth
+ *
+ */
+void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
+{
+   q->queue_depth = depth;
+}
+EXPORT_SYMBOL(blk_set_queue_depth);
+
+/**
  * blk_queue_write_cache - configure queue's write cache
  * @q: the request queue for the device
  * @wc:write back cache on or off
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 1deb6adc411f..75455d4dab68 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -621,6 +621,9 @@ int scsi_change_queue_depth(struct scsi_device *sdev, int 
depth)
wmb();
}
 
+   if (sdev->request_queue)
+   blk_set_queue_depth(sdev->request_queue, depth);
+
return sdev->queue_depth;
 }
 EXPORT_SYMBOL(scsi_change_queue_depth);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fc1894996b12..eee94bd6de52 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -315,6 +315,8 @@ struct request_queue {
struct blk_mq_ctx __percpu  *queue_ctx;
unsigned intnr_queues;
 
+   unsigned intqueue_depth;
+
/* hw dispatch queues */
struct blk_mq_hw_ctx**queue_hw_ctx;
unsigned intnr_hw_queues;
@@ -681,6 +683,14 @@ static inline bool blk_write_same_mergeable(struct bio *a, 
struct bio *b)
return false;
 }
 
+static inline unsigned int blk_queue_depth(struct request_queue *q)
+{
+   if (q->queue_depth)
+   return q->queue_depth;
+
+   return q->nr_requests;
+}
+
 /*
  * q->prep_rq_fn return values
  */
@@ -984,6 +994,7 @@ extern void blk_limits_io_min(struct queue_limits *limits, 
unsigned int min);
 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
+extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern void blk_set_stacking_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
-- 
2.8.0.rc4.6.g7e4ba36



[PATCH 5/8] writeback: increment page wait count when waiting

2016-04-17 Thread Jens Axboe
If we end up waiting on a page that is dirty or marked writeback,
then increment the corresponding bdi_writeback counter.

Signed-off-by: Jens Axboe 
---
 mm/filemap.c | 42 +++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index f2479af09da9..a8854a083b71 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -764,37 +764,73 @@ wait_queue_head_t *page_waitqueue(struct page *page)
 }
 EXPORT_SYMBOL(page_waitqueue);
 
+static bool inc_dirty_wait(struct page *page)
+{
+   if (!page->mapping || !PageDirty(page) || !PageWriteback(page))
+   return false;
+   else {
+   struct bdi_writeback *wb = inode_to_wb(page->mapping->host);
+
+   atomic_inc(>dirty_sleeping);
+   return true;
+   }
+}
+
+static void dec_dirty_wait(struct page *page)
+{
+   struct bdi_writeback *wb = inode_to_wb(page->mapping->host);
+
+   atomic_dec(>dirty_sleeping);
+}
+
 void wait_on_page_bit(struct page *page, int bit_nr)
 {
DEFINE_WAIT_BIT(wait, >flags, bit_nr);
 
-   if (test_bit(bit_nr, >flags))
+   if (test_bit(bit_nr, >flags)) {
+   bool did_inc = inc_dirty_wait(page);
__wait_on_bit(page_waitqueue(page), , bit_wait_io,
TASK_UNINTERRUPTIBLE);
+   if (did_inc)
+   dec_dirty_wait(page);
+   }
 }
 EXPORT_SYMBOL(wait_on_page_bit);
 
 int wait_on_page_bit_killable(struct page *page, int bit_nr)
 {
DEFINE_WAIT_BIT(wait, >flags, bit_nr);
+   bool did_inc;
+   int ret;
 
if (!test_bit(bit_nr, >flags))
return 0;
 
-   return __wait_on_bit(page_waitqueue(page), ,
+   did_inc = inc_dirty_wait(page);
+   ret = __wait_on_bit(page_waitqueue(page), ,
 bit_wait_io, TASK_KILLABLE);
+   if (did_inc)
+   dec_dirty_wait(page);
+   return ret;
 }
 
 int wait_on_page_bit_killable_timeout(struct page *page,
   int bit_nr, unsigned long timeout)
 {
DEFINE_WAIT_BIT(wait, >flags, bit_nr);
+   bool did_inc;
+   int ret;
 
wait.key.timeout = jiffies + timeout;
if (!test_bit(bit_nr, >flags))
return 0;
-   return __wait_on_bit(page_waitqueue(page), ,
+
+   did_inc = inc_dirty_wait(page);
+   ret = __wait_on_bit(page_waitqueue(page), ,
 bit_wait_io_timeout, TASK_KILLABLE);
+   if (did_inc)
+   dec_dirty_wait(page);
+   return ret;
 }
 EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout);
 
-- 
2.8.0.rc4.6.g7e4ba36



[PATCH 2/8] writeback: add wbc_to_write_cmd()

2016-04-17 Thread Jens Axboe
Add wbc_to_write_cmd(), which returns the write type to use, based on a
struct writeback_control. No functional changes in this patch, but it
prepares us for factoring other wbc fields for write type.

Signed-off-by: Jens Axboe 
---
 fs/block_dev.c| 2 +-
 fs/buffer.c   | 2 +-
 fs/f2fs/data.c| 2 +-
 fs/f2fs/node.c| 2 +-
 fs/gfs2/meta_io.c | 3 +--
 fs/mpage.c| 9 -
 fs/xfs/xfs_aops.c | 2 +-
 include/linux/writeback.h | 8 
 8 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 20a2c02b77c4..8662da6aa07c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -432,7 +432,7 @@ int bdev_write_page(struct block_device *bdev, sector_t 
sector,
struct page *page, struct writeback_control *wbc)
 {
int result;
-   int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
+   int rw = wbc_to_write_cmd(wbc);
const struct block_device_operations *ops = bdev->bd_disk->fops;
 
if (!ops->rw_page || bdev_get_integrity(bdev))
diff --git a/fs/buffer.c b/fs/buffer.c
index af0d9a82a8ed..46763c58e786 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1697,7 +1697,7 @@ static int __block_write_full_page(struct inode *inode, 
struct page *page,
struct buffer_head *bh, *head;
unsigned int blocksize, bbits;
int nr_underway = 0;
-   int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
+   int write_op = wbc_to_write_cmd(wbc);
 
head = create_page_buffers(page, inode,
(1 << BH_Dirty)|(1 << BH_Uptodate));
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 5dafb9cef12e..e4e81ce663c5 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1153,7 +1153,7 @@ static int f2fs_write_data_page(struct page *page,
struct f2fs_io_info fio = {
.sbi = sbi,
.type = DATA,
-   .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+   .rw = wbc_to_write_cmd(wbc),
.page = page,
.encrypted_page = NULL,
};
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 1a33de9d84b1..3b377258dc09 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1397,7 +1397,7 @@ static int f2fs_write_node_page(struct page *page,
struct f2fs_io_info fio = {
.sbi = sbi,
.type = NODE,
-   .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+   .rw = wbc_to_write_cmd(wbc),
.page = page,
.encrypted_page = NULL,
};
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 0448524c11bc..3fdfa3848f18 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -37,8 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct 
writeback_control *wb
 {
struct buffer_head *bh, *head;
int nr_underway = 0;
-   int write_op = REQ_META | REQ_PRIO |
-   (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
+   int write_op = REQ_META | REQ_PRIO | wbc_to_write_cmd(wbc);
 
BUG_ON(!PageLocked(page));
BUG_ON(!page_has_buffers(page));
diff --git a/fs/mpage.c b/fs/mpage.c
index eedc644b78d7..bcbdb61b24f1 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -486,7 +486,6 @@ static int __mpage_writepage(struct page *page, struct 
writeback_control *wbc,
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
-   int wr = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
 
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
@@ -595,7 +594,7 @@ page_is_mapped:
 * This page will go to BIO.  Do we need to send this BIO off first?
 */
if (bio && mpd->last_block_in_bio != blocks[0] - 1)
-   bio = mpage_bio_submit(wr, bio);
+   bio = mpage_bio_submit(wbc_to_write_cmd(wbc), bio);
 
 alloc_new:
if (bio == NULL) {
@@ -622,7 +621,7 @@ alloc_new:
wbc_account_io(wbc, page, PAGE_SIZE);
length = first_unmapped << blkbits;
if (bio_add_page(bio, page, length, 0) < length) {
-   bio = mpage_bio_submit(wr, bio);
+   bio = mpage_bio_submit(wbc_to_write_cmd(wbc), bio);
goto alloc_new;
}
 
@@ -632,7 +631,7 @@ alloc_new:
set_page_writeback(page);
unlock_page(page);
if (boundary || (first_unmapped != blocks_per_page)) {
-   bio = mpage_bio_submit(wr, bio);
+   bio = mpage_bio_submit(wbc_to_write_cmd(wbc), bio);
if (boundary_block) {
write_boundary_block(boundary_bdev,
boundary_block, 1 << blkbits);
@@ -644,7 +643,7 @@ alloc_new:
 
 confused:
if (bio)
-   bio = mpage_bio_submit(wr, bio);
+

[PATCH 3/8] writeback: use WRITE_BG for kupdate and background writeback

2016-04-17 Thread Jens Axboe
If we're doing background type writes, then use the appropriate
write command for that.

Signed-off-by: Jens Axboe 
---
 include/linux/writeback.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index aa66fa05ff0d..6e4a35acaa3e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -104,6 +104,8 @@ static inline int wbc_to_write_cmd(struct writeback_control 
*wbc)
 {
if (wbc->sync_mode == WB_SYNC_ALL)
return WRITE_SYNC;
+   else if (wbc->for_kupdate || wbc->for_background)
+   return WRITE_BG;
 
return WRITE;
 }
-- 
2.8.0.rc4.6.g7e4ba36



[PATCH 4/8] writeback: track if we're sleeping on progress in balance_dirty_pages()

2016-04-17 Thread Jens Axboe
Note in the bdi_writeback structure if a task is currently being
limited in balance_dirty_pages(), waiting for writeback to
proceed.

Signed-off-by: Jens Axboe 
---
 include/linux/backing-dev-defs.h | 2 ++
 mm/backing-dev.c | 1 +
 mm/page-writeback.c  | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 3f103076d0bf..1212c374b928 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -116,6 +116,8 @@ struct bdi_writeback {
struct list_head work_list;
struct delayed_work dwork;  /* work item used for writeback */
 
+   atomic_t dirty_sleeping;/* waiting on dirty limit exceeded */
+
struct list_head bdi_node;  /* anchored at bdi->wb_list */
 
 #ifdef CONFIG_CGROUP_WRITEBACK
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0c6317b7db38..41db7dff11d0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -310,6 +310,7 @@ static int wb_init(struct bdi_writeback *wb, struct 
backing_dev_info *bdi,
spin_lock_init(>work_lock);
INIT_LIST_HEAD(>work_list);
INIT_DELAYED_WORK(>dwork, wb_workfn);
+   atomic_set(>dirty_sleeping, 0);
 
wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
if (!wb->congested)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 999792d35ccc..028a3d4d7129 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1746,7 +1746,9 @@ pause:
  pause,
  start_time);
__set_current_state(TASK_KILLABLE);
+   atomic_inc(>dirty_sleeping);
io_schedule_timeout(pause);
+   atomic_dec(>dirty_sleeping);
 
current->dirty_paused_when = now + pause;
current->nr_dirtied = 0;
-- 
2.8.0.rc4.6.g7e4ba36



[PATCH 7/8] block: add scalable completion tracking of requests

2016-04-17 Thread Jens Axboe
For legacy block, we simply track them in the request queue. For
blk-mq, we track them on a per-sw queue basis, which we can then
sum up through the hardware queues and finally to a per device
state.

The stats are tracked in, roughly, 0.1s interval windows.

Add sysfs files to display the stats.

Signed-off-by: Jens Axboe 
---
 block/Makefile|   2 +-
 block/blk-core.c  |   4 +
 block/blk-mq-sysfs.c  |  47 
 block/blk-mq.c|  14 
 block/blk-mq.h|   3 +
 block/blk-stat.c  | 184 ++
 block/blk-stat.h  |  17 +
 block/blk-sysfs.c |  26 +++
 include/linux/blk_types.h |   8 ++
 include/linux/blkdev.h|   4 +
 10 files changed, 308 insertions(+), 1 deletion(-)
 create mode 100644 block/blk-stat.c
 create mode 100644 block/blk-stat.h

diff --git a/block/Makefile b/block/Makefile
index 9eda2322b2d4..3446e0472df0 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-   blk-lib.o blk-mq.o blk-mq-tag.o \
+   blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
diff --git a/block/blk-core.c b/block/blk-core.c
index 74c16fd8995d..40b57bf4852c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2514,6 +2514,8 @@ void blk_start_request(struct request *req)
 {
blk_dequeue_request(req);
 
+   req->issue_time = ktime_to_ns(ktime_get());
+
/*
 * We are now handing the request to the hardware, initialize
 * resid_len to full count and add the timeout handler.
@@ -2581,6 +2583,8 @@ bool blk_update_request(struct request *req, int error, 
unsigned int nr_bytes)
 
trace_block_rq_complete(req->q, req, nr_bytes);
 
+   blk_stat_add(>q->rq_stats[rq_data_dir(req)], req);
+
if (!req->bio)
return false;
 
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 4ea4dd8a1eed..2f68015f8616 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -247,6 +247,47 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct 
blk_mq_hw_ctx *hctx, char *page)
return ret;
 }
 
+static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx)
+{
+   struct blk_mq_ctx *ctx;
+   unsigned int i;
+
+   hctx_for_each_ctx(hctx, ctx, i) {
+   blk_stat_init(>stat[0]);
+   blk_stat_init(>stat[1]);
+   }
+}
+
+static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
+ const char *page, size_t count)
+{
+   blk_mq_stat_clear(hctx);
+   return count;
+}
+
+static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char 
*pre)
+{
+   return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
+   pre, (long long) stat->nr_samples,
+   (long long) stat->mean, (long long) stat->min,
+   (long long) stat->max);
+}
+
+static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char 
*page)
+{
+   struct blk_rq_stat stat[2];
+   ssize_t ret;
+
+   blk_stat_init([0]);
+   blk_stat_init([1]);
+
+   blk_hctx_stat_get(hctx, stat);
+
+   ret = print_stat(page, [0], "read :");
+   ret += print_stat(page + ret, [1], "write:");
+   return ret;
+}
+
 static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_sysfs_dispatched_show,
@@ -304,6 +345,11 @@ static struct blk_mq_hw_ctx_sysfs_entry 
blk_mq_hw_sysfs_poll = {
.attr = {.name = "io_poll", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_poll_show,
 };
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
+   .attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
+   .show = blk_mq_hw_sysfs_stat_show,
+   .store = blk_mq_hw_sysfs_stat_store,
+};
 
 static struct attribute *default_hw_ctx_attrs[] = {
_mq_hw_sysfs_queued.attr,
@@ -314,6 +360,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
_mq_hw_sysfs_cpus.attr,
_mq_hw_sysfs_active.attr,
_mq_hw_sysfs_poll.attr,
+   _mq_hw_sysfs_stat.attr,
NULL,
 };
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1699baf39b78..71b4a13fbf94 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -29,6 +29,7 @@
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
+#include "blk-stat.h"
 
 static DEFINE_MUTEX(all_q_mutex);
 static LIST_HEAD(all_q_list);
@@ -356,10 +357,19 @@ static void 

[PATCH 7/8] block: add scalable completion tracking of requests

2016-04-17 Thread Jens Axboe
For legacy block, we simply track them in the request queue. For
blk-mq, we track them on a per-sw queue basis, which we can then
sum up through the hardware queues and finally to a per device
state.

The stats are tracked in, roughly, 0.1s interval windows.

Add sysfs files to display the stats.

Signed-off-by: Jens Axboe 
---
 block/Makefile|   2 +-
 block/blk-core.c  |   4 +
 block/blk-mq-sysfs.c  |  47 
 block/blk-mq.c|  14 
 block/blk-mq.h|   3 +
 block/blk-stat.c  | 184 ++
 block/blk-stat.h  |  17 +
 block/blk-sysfs.c |  26 +++
 include/linux/blk_types.h |   8 ++
 include/linux/blkdev.h|   4 +
 10 files changed, 308 insertions(+), 1 deletion(-)
 create mode 100644 block/blk-stat.c
 create mode 100644 block/blk-stat.h

diff --git a/block/Makefile b/block/Makefile
index 9eda2322b2d4..3446e0472df0 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-   blk-lib.o blk-mq.o blk-mq-tag.o \
+   blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
diff --git a/block/blk-core.c b/block/blk-core.c
index 74c16fd8995d..40b57bf4852c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2514,6 +2514,8 @@ void blk_start_request(struct request *req)
 {
blk_dequeue_request(req);
 
+   req->issue_time = ktime_to_ns(ktime_get());
+
/*
 * We are now handing the request to the hardware, initialize
 * resid_len to full count and add the timeout handler.
@@ -2581,6 +2583,8 @@ bool blk_update_request(struct request *req, int error, 
unsigned int nr_bytes)
 
trace_block_rq_complete(req->q, req, nr_bytes);
 
+   blk_stat_add(>q->rq_stats[rq_data_dir(req)], req);
+
if (!req->bio)
return false;
 
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 4ea4dd8a1eed..2f68015f8616 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -247,6 +247,47 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct 
blk_mq_hw_ctx *hctx, char *page)
return ret;
 }
 
+static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx)
+{
+   struct blk_mq_ctx *ctx;
+   unsigned int i;
+
+   hctx_for_each_ctx(hctx, ctx, i) {
+   blk_stat_init(>stat[0]);
+   blk_stat_init(>stat[1]);
+   }
+}
+
+static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
+ const char *page, size_t count)
+{
+   blk_mq_stat_clear(hctx);
+   return count;
+}
+
+static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char 
*pre)
+{
+   return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
+   pre, (long long) stat->nr_samples,
+   (long long) stat->mean, (long long) stat->min,
+   (long long) stat->max);
+}
+
+static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char 
*page)
+{
+   struct blk_rq_stat stat[2];
+   ssize_t ret;
+
+   blk_stat_init([0]);
+   blk_stat_init([1]);
+
+   blk_hctx_stat_get(hctx, stat);
+
+   ret = print_stat(page, [0], "read :");
+   ret += print_stat(page + ret, [1], "write:");
+   return ret;
+}
+
 static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_sysfs_dispatched_show,
@@ -304,6 +345,11 @@ static struct blk_mq_hw_ctx_sysfs_entry 
blk_mq_hw_sysfs_poll = {
.attr = {.name = "io_poll", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_poll_show,
 };
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
+   .attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
+   .show = blk_mq_hw_sysfs_stat_show,
+   .store = blk_mq_hw_sysfs_stat_store,
+};
 
 static struct attribute *default_hw_ctx_attrs[] = {
_mq_hw_sysfs_queued.attr,
@@ -314,6 +360,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
_mq_hw_sysfs_cpus.attr,
_mq_hw_sysfs_active.attr,
_mq_hw_sysfs_poll.attr,
+   _mq_hw_sysfs_stat.attr,
NULL,
 };
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1699baf39b78..71b4a13fbf94 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -29,6 +29,7 @@
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
+#include "blk-stat.h"
 
 static DEFINE_MUTEX(all_q_mutex);
 static LIST_HEAD(all_q_list);
@@ -356,10 +357,19 @@ static void 

[PATCH 2/8] writeback: add wbc_to_write_cmd()

2016-04-17 Thread Jens Axboe
Add wbc_to_write_cmd(), which returns the write type to use, based on a
struct writeback_control. No functional changes in this patch, but it
prepares us for factoring other wbc fields for write type.

Signed-off-by: Jens Axboe 
---
 fs/block_dev.c| 2 +-
 fs/buffer.c   | 2 +-
 fs/f2fs/data.c| 2 +-
 fs/f2fs/node.c| 2 +-
 fs/gfs2/meta_io.c | 3 +--
 fs/mpage.c| 9 -
 fs/xfs/xfs_aops.c | 2 +-
 include/linux/writeback.h | 8 
 8 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 20a2c02b77c4..8662da6aa07c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -432,7 +432,7 @@ int bdev_write_page(struct block_device *bdev, sector_t 
sector,
struct page *page, struct writeback_control *wbc)
 {
int result;
-   int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
+   int rw = wbc_to_write_cmd(wbc);
const struct block_device_operations *ops = bdev->bd_disk->fops;
 
if (!ops->rw_page || bdev_get_integrity(bdev))
diff --git a/fs/buffer.c b/fs/buffer.c
index af0d9a82a8ed..46763c58e786 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1697,7 +1697,7 @@ static int __block_write_full_page(struct inode *inode, 
struct page *page,
struct buffer_head *bh, *head;
unsigned int blocksize, bbits;
int nr_underway = 0;
-   int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
+   int write_op = wbc_to_write_cmd(wbc);
 
head = create_page_buffers(page, inode,
(1 << BH_Dirty)|(1 << BH_Uptodate));
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 5dafb9cef12e..e4e81ce663c5 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1153,7 +1153,7 @@ static int f2fs_write_data_page(struct page *page,
struct f2fs_io_info fio = {
.sbi = sbi,
.type = DATA,
-   .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+   .rw = wbc_to_write_cmd(wbc),
.page = page,
.encrypted_page = NULL,
};
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 1a33de9d84b1..3b377258dc09 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1397,7 +1397,7 @@ static int f2fs_write_node_page(struct page *page,
struct f2fs_io_info fio = {
.sbi = sbi,
.type = NODE,
-   .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+   .rw = wbc_to_write_cmd(wbc),
.page = page,
.encrypted_page = NULL,
};
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 0448524c11bc..3fdfa3848f18 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -37,8 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct 
writeback_control *wb
 {
struct buffer_head *bh, *head;
int nr_underway = 0;
-   int write_op = REQ_META | REQ_PRIO |
-   (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
+   int write_op = REQ_META | REQ_PRIO | wbc_to_write_cmd(wbc);
 
BUG_ON(!PageLocked(page));
BUG_ON(!page_has_buffers(page));
diff --git a/fs/mpage.c b/fs/mpage.c
index eedc644b78d7..bcbdb61b24f1 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -486,7 +486,6 @@ static int __mpage_writepage(struct page *page, struct 
writeback_control *wbc,
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
-   int wr = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
 
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
@@ -595,7 +594,7 @@ page_is_mapped:
 * This page will go to BIO.  Do we need to send this BIO off first?
 */
if (bio && mpd->last_block_in_bio != blocks[0] - 1)
-   bio = mpage_bio_submit(wr, bio);
+   bio = mpage_bio_submit(wbc_to_write_cmd(wbc), bio);
 
 alloc_new:
if (bio == NULL) {
@@ -622,7 +621,7 @@ alloc_new:
wbc_account_io(wbc, page, PAGE_SIZE);
length = first_unmapped << blkbits;
if (bio_add_page(bio, page, length, 0) < length) {
-   bio = mpage_bio_submit(wr, bio);
+   bio = mpage_bio_submit(wbc_to_write_cmd(wbc), bio);
goto alloc_new;
}
 
@@ -632,7 +631,7 @@ alloc_new:
set_page_writeback(page);
unlock_page(page);
if (boundary || (first_unmapped != blocks_per_page)) {
-   bio = mpage_bio_submit(wr, bio);
+   bio = mpage_bio_submit(wbc_to_write_cmd(wbc), bio);
if (boundary_block) {
write_boundary_block(boundary_bdev,
boundary_block, 1 << blkbits);
@@ -644,7 +643,7 @@ alloc_new:
 
 confused:
if (bio)
-   bio = mpage_bio_submit(wr, bio);
+   bio = 

[PATCH 3/8] writeback: use WRITE_BG for kupdate and background writeback

2016-04-17 Thread Jens Axboe
If we're doing background type writes, then use the appropriate
write command for that.

Signed-off-by: Jens Axboe 
---
 include/linux/writeback.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index aa66fa05ff0d..6e4a35acaa3e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -104,6 +104,8 @@ static inline int wbc_to_write_cmd(struct writeback_control 
*wbc)
 {
if (wbc->sync_mode == WB_SYNC_ALL)
return WRITE_SYNC;
+   else if (wbc->for_kupdate || wbc->for_background)
+   return WRITE_BG;
 
return WRITE;
 }
-- 
2.8.0.rc4.6.g7e4ba36



[PATCH 4/8] writeback: track if we're sleeping on progress in balance_dirty_pages()

2016-04-17 Thread Jens Axboe
Note in the bdi_writeback structure if a task is currently being
limited in balance_dirty_pages(), waiting for writeback to
proceed.

Signed-off-by: Jens Axboe 
---
 include/linux/backing-dev-defs.h | 2 ++
 mm/backing-dev.c | 1 +
 mm/page-writeback.c  | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 3f103076d0bf..1212c374b928 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -116,6 +116,8 @@ struct bdi_writeback {
struct list_head work_list;
struct delayed_work dwork;  /* work item used for writeback */
 
+   atomic_t dirty_sleeping;/* waiting on dirty limit exceeded */
+
struct list_head bdi_node;  /* anchored at bdi->wb_list */
 
 #ifdef CONFIG_CGROUP_WRITEBACK
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0c6317b7db38..41db7dff11d0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -310,6 +310,7 @@ static int wb_init(struct bdi_writeback *wb, struct 
backing_dev_info *bdi,
spin_lock_init(>work_lock);
INIT_LIST_HEAD(>work_list);
INIT_DELAYED_WORK(>dwork, wb_workfn);
+   atomic_set(>dirty_sleeping, 0);
 
wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
if (!wb->congested)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 999792d35ccc..028a3d4d7129 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1746,7 +1746,9 @@ pause:
  pause,
  start_time);
__set_current_state(TASK_KILLABLE);
+   atomic_inc(>dirty_sleeping);
io_schedule_timeout(pause);
+   atomic_dec(>dirty_sleeping);
 
current->dirty_paused_when = now + pause;
current->nr_dirtied = 0;
-- 
2.8.0.rc4.6.g7e4ba36



[PATCH 8/8] writeback: throttle buffered writeback

2016-04-17 Thread Jens Axboe
Test patch that throttles buffered writeback to make it a lot
more smooth, and has way less impact on other system activity.
Background writeback should be, by definition, background
activity. The fact that we flush huge bundles of it at the time
means that it potentially has heavy impacts on foreground workloads,
which isn't ideal. We can't easily limit the sizes of writes that
we do, since that would impact file system layout in the presence
of delayed allocation. So just throttle back buffered writeback,
unless someone is waiting for it.

The algorithm for when to throttle takes its inspiration in the
CoDel networking scheduling algorithm. Like CoDel, blk-wb monitors
the minimum latencies of requests over a window of time. In that
window of time, if the minimum latency of any request exceeds a
given target, then a scale count is incremented and the queue depth
is shrunk. The next monitoring window is shrunk accordingly. Unlike
CoDel, if we hit a window that exhibits good behavior, then we
simply increment the scale count and re-calculate the limits for that
scale value. This prevents us from oscillating between a
close-to-ideal value and max all the time, instead remaining in the
windows where we get good behavior.

The patch registers two sysfs entries. The first one, 'wb_lat_usec',
sets the latency target for the window. It defaults to 2 msec for
non-rotational storage, and 75 msec for rotational storage. Setting
this value to '0' disables blk-wb.

The second entry, 'wb_stats', is a debug entry, that simply shows the
current internal state of the throttling machine:

$ cat /sys/block/nvme0n1/queue/wb_stats
background=16, normal=32, max=64, inflight=0, wait=0, bdp_wait=0

'background' denotes how many requests we will allow in-flight for
idle background buffered writeback, 'normal' for higher priority
writeback, and 'max' for when it's urgent we clean pages.

'inflight' shows how many requests are currently in-flight for
buffered writeback, 'wait' shows if anyone is currently waiting for
access, and 'bdp_wait' shows if someone is currently throttled on this
device in balance_dirty_pages().

blk-wb also registers a few trace events, that can be used to monitor
the state changes:

block_wb_lat: Latency 2446318

block_wb_stat:  read lat: mean=2446318, min=2446318, max=2446318, samples=1,
   write lat: mean=518866, min=15522, max=5330353, samples=57

block_wb_step: step down: step=1, background=8, normal=16, max=32

'block_wb_lat' logs a violation in sync issue latency, 'block_wb_stat'
logs a window violation of latencies and dumps the stats that lead to
that, and finally, 'block_wb_stat' logs a step up/down and the new
limits associated with that state.

Signed-off-by: Jens Axboe 
---
 block/Makefile   |   2 +-
 block/blk-core.c |  15 ++
 block/blk-mq.c   |  31 ++-
 block/blk-settings.c |   4 +
 block/blk-sysfs.c|  57 +
 block/blk-wb.c   | 495 +++
 block/blk-wb.h   |  42 
 include/linux/blk_types.h|   2 +
 include/linux/blkdev.h   |   3 +
 include/trace/events/block.h |  98 +
 10 files changed, 746 insertions(+), 3 deletions(-)
 create mode 100644 block/blk-wb.c
 create mode 100644 block/blk-wb.h

diff --git a/block/Makefile b/block/Makefile
index 3446e0472df0..7e4be7a56a59 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-   blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
+   blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o blk-wb.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
diff --git a/block/blk-core.c b/block/blk-core.c
index 40b57bf4852c..d941f69dfb4b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,7 @@
 
 #include "blk.h"
 #include "blk-mq.h"
+#include "blk-wb.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -880,6 +881,7 @@ blk_init_allocated_queue(struct request_queue *q, 
request_fn_proc *rfn,
 
 fail:
blk_free_flush_queue(q->fq);
+   blk_wb_exit(q);
return NULL;
 }
 EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1395,6 +1397,7 @@ void blk_requeue_request(struct request_queue *q, struct 
request *rq)
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
+   blk_wb_requeue(q->rq_wb, rq);
 
if (rq->cmd_flags & REQ_QUEUED)
blk_queue_end_tag(q, rq);
@@ -1485,6 +1488,8 @@ void __blk_put_request(struct request_queue *q, struct 
request *req)
   

[PATCH 8/8] writeback: throttle buffered writeback

2016-04-17 Thread Jens Axboe
Test patch that throttles buffered writeback to make it a lot
more smooth, and has way less impact on other system activity.
Background writeback should be, by definition, background
activity. The fact that we flush huge bundles of it at the time
means that it potentially has heavy impacts on foreground workloads,
which isn't ideal. We can't easily limit the sizes of writes that
we do, since that would impact file system layout in the presence
of delayed allocation. So just throttle back buffered writeback,
unless someone is waiting for it.

The algorithm for when to throttle takes its inspiration in the
CoDel networking scheduling algorithm. Like CoDel, blk-wb monitors
the minimum latencies of requests over a window of time. In that
window of time, if the minimum latency of any request exceeds a
given target, then a scale count is incremented and the queue depth
is shrunk. The next monitoring window is shrunk accordingly. Unlike
CoDel, if we hit a window that exhibits good behavior, then we
simply increment the scale count and re-calculate the limits for that
scale value. This prevents us from oscillating between a
close-to-ideal value and max all the time, instead remaining in the
windows where we get good behavior.

The patch registers two sysfs entries. The first one, 'wb_lat_usec',
sets the latency target for the window. It defaults to 2 msec for
non-rotational storage, and 75 msec for rotational storage. Setting
this value to '0' disables blk-wb.

The second entry, 'wb_stats', is a debug entry, that simply shows the
current internal state of the throttling machine:

$ cat /sys/block/nvme0n1/queue/wb_stats
background=16, normal=32, max=64, inflight=0, wait=0, bdp_wait=0

'background' denotes how many requests we will allow in-flight for
idle background buffered writeback, 'normal' for higher priority
writeback, and 'max' for when it's urgent we clean pages.

'inflight' shows how many requests are currently in-flight for
buffered writeback, 'wait' shows if anyone is currently waiting for
access, and 'bdp_wait' shows if someone is currently throttled on this
device in balance_dirty_pages().

blk-wb also registers a few trace events, that can be used to monitor
the state changes:

block_wb_lat: Latency 2446318

block_wb_stat:  read lat: mean=2446318, min=2446318, max=2446318, samples=1,
   write lat: mean=518866, min=15522, max=5330353, samples=57

block_wb_step: step down: step=1, background=8, normal=16, max=32

'block_wb_lat' logs a violation in sync issue latency, 'block_wb_stat'
logs a window violation of latencies and dumps the stats that lead to
that, and finally, 'block_wb_stat' logs a step up/down and the new
limits associated with that state.

Signed-off-by: Jens Axboe 
---
 block/Makefile   |   2 +-
 block/blk-core.c |  15 ++
 block/blk-mq.c   |  31 ++-
 block/blk-settings.c |   4 +
 block/blk-sysfs.c|  57 +
 block/blk-wb.c   | 495 +++
 block/blk-wb.h   |  42 
 include/linux/blk_types.h|   2 +
 include/linux/blkdev.h   |   3 +
 include/trace/events/block.h |  98 +
 10 files changed, 746 insertions(+), 3 deletions(-)
 create mode 100644 block/blk-wb.c
 create mode 100644 block/blk-wb.h

diff --git a/block/Makefile b/block/Makefile
index 3446e0472df0..7e4be7a56a59 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-   blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
+   blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o blk-wb.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
diff --git a/block/blk-core.c b/block/blk-core.c
index 40b57bf4852c..d941f69dfb4b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,7 @@
 
 #include "blk.h"
 #include "blk-mq.h"
+#include "blk-wb.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -880,6 +881,7 @@ blk_init_allocated_queue(struct request_queue *q, 
request_fn_proc *rfn,
 
 fail:
blk_free_flush_queue(q->fq);
+   blk_wb_exit(q);
return NULL;
 }
 EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1395,6 +1397,7 @@ void blk_requeue_request(struct request_queue *q, struct 
request *rq)
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
+   blk_wb_requeue(q->rq_wb, rq);
 
if (rq->cmd_flags & REQ_QUEUED)
blk_queue_end_tag(q, rq);
@@ -1485,6 +1488,8 @@ void __blk_put_request(struct request_queue *q, struct 
request *req)
/* this 

linux-next: manual merge of the livepatching tree with Linus' tree

2016-04-17 Thread Stephen Rothwell
Hi Jiri,

Today's linux-next merge of the livepatching tree got a conflict in:

  arch/powerpc/Kconfig

between commit:

  7f2bd0063342 ("powerpc/mm: enable page parallel initialisation")

from Linus' tree and commit:

  85baa095497f ("powerpc/livepatch: Add live patching support on ppc64le")

from the livepatching tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

Maybe the selects under config PPC should be sorted ...

-- 
Cheers,
Stephen Rothwell

diff --cc arch/powerpc/Kconfig
index fbebde0771c8,944a79a2768f..
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@@ -159,7 -159,7 +159,8 @@@ config PP
select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_SECCOMP_FILTER
select ARCH_HAS_UBSAN_SANITIZE_ALL
 +  select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
+   select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
  
  config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN


linux-next: manual merge of the livepatching tree with Linus' tree

2016-04-17 Thread Stephen Rothwell
Hi Jiri,

Today's linux-next merge of the livepatching tree got a conflict in:

  arch/powerpc/Kconfig

between commit:

  7f2bd0063342 ("powerpc/mm: enable page parallel initialisation")

from Linus' tree and commit:

  85baa095497f ("powerpc/livepatch: Add live patching support on ppc64le")

from the livepatching tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

Maybe the selects under config PPC should be sorted ...

-- 
Cheers,
Stephen Rothwell

diff --cc arch/powerpc/Kconfig
index fbebde0771c8,944a79a2768f..
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@@ -159,7 -159,7 +159,8 @@@ config PP
select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_SECCOMP_FILTER
select ARCH_HAS_UBSAN_SANITIZE_ALL
 +  select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
+   select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
  
  config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN


Re: [PATCH] spi: cadence: mark pm functions __maybe_unused

2016-04-17 Thread Shubhrajyoti Datta
On Sun, Apr 17, 2016 at 2:09 AM, Arnd Bergmann  wrote:
> The newly added runtime PM support for the cadence spi driver
> causes harmless warnings when PM is disabled:
>
> drivers/spi/spi-cadence.c:681:12: warning: 'cnds_runtime_suspend' defined but 
> not used
> drivers/spi/spi-cadence.c:652:12: warning: 'cnds_runtime_resume' defined but 
> not used
>
> This adds __maybe_unused annotations to the respective functions
> to shut up the warnings, while leaving the code in place for
> compile testing and avoiding ugly #ifdefs.

Thanks for the patch.
Feel free to add my ack.
Acked-by: Shubhrajyoti Datta 

>
> Signed-off-by: Arnd Bergmann 
> Fixes: d36ccd9f7ea4 ("spi: cadence: Runtime pm adaptation")
> ---


Re: [PATCH] spi: cadence: mark pm functions __maybe_unused

2016-04-17 Thread Shubhrajyoti Datta
On Sun, Apr 17, 2016 at 2:09 AM, Arnd Bergmann  wrote:
> The newly added runtime PM support for the cadence spi driver
> causes harmless warnings when PM is disabled:
>
> drivers/spi/spi-cadence.c:681:12: warning: 'cnds_runtime_suspend' defined but 
> not used
> drivers/spi/spi-cadence.c:652:12: warning: 'cnds_runtime_resume' defined but 
> not used
>
> This adds __maybe_unused annotations to the respective functions
> to shut up the warnings, while leaving the code in place for
> compile testing and avoiding ugly #ifdefs.

Thanks for the patch.
Feel free to add my ack.
Acked-by: Shubhrajyoti Datta 

>
> Signed-off-by: Arnd Bergmann 
> Fixes: d36ccd9f7ea4 ("spi: cadence: Runtime pm adaptation")
> ---


vgacon.c:undefined reference to `screen_info'

2016-04-17 Thread kbuild test robot
Hi Chen,

It's probably a bug fix that unveils the link errors.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   9d090d01e3efdcdb8f7f07e86fcac91e01ae30cd
commit: f69405ce6c0fc9f4a039011007371b31f80b470d openrisc: include: asm: 
Kbuild: add default "vga.h"
date:   2 years, 5 months ago
config: openrisc-alldefconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout f69405ce6c0fc9f4a039011007371b31f80b470d
# save the attached .config to linux build tree
make.cross ARCH=openrisc 

All errors (new ones prefixed by >>):

   drivers/built-in.o: In function `vgacon_save_screen':
>> vgacon.c:(.text+0x20e0): undefined reference to `screen_info'
   vgacon.c:(.text+0x20e8): undefined reference to `screen_info'
   drivers/built-in.o: In function `vgacon_init':
   vgacon.c:(.text+0x284c): undefined reference to `screen_info'
   vgacon.c:(.text+0x2850): undefined reference to `screen_info'
   drivers/built-in.o: In function `vgacon_startup':
   vgacon.c:(.text+0x28d8): undefined reference to `screen_info'
   drivers/built-in.o:vgacon.c:(.text+0x28f0): more undefined references to 
`screen_info' follow

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


vgacon.c:undefined reference to `screen_info'

2016-04-17 Thread kbuild test robot
Hi Chen,

It's probably a bug fix that unveils the link errors.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   9d090d01e3efdcdb8f7f07e86fcac91e01ae30cd
commit: f69405ce6c0fc9f4a039011007371b31f80b470d openrisc: include: asm: 
Kbuild: add default "vga.h"
date:   2 years, 5 months ago
config: openrisc-alldefconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout f69405ce6c0fc9f4a039011007371b31f80b470d
# save the attached .config to linux build tree
make.cross ARCH=openrisc 

All errors (new ones prefixed by >>):

   drivers/built-in.o: In function `vgacon_save_screen':
>> vgacon.c:(.text+0x20e0): undefined reference to `screen_info'
   vgacon.c:(.text+0x20e8): undefined reference to `screen_info'
   drivers/built-in.o: In function `vgacon_init':
   vgacon.c:(.text+0x284c): undefined reference to `screen_info'
   vgacon.c:(.text+0x2850): undefined reference to `screen_info'
   drivers/built-in.o: In function `vgacon_startup':
   vgacon.c:(.text+0x28d8): undefined reference to `screen_info'
   drivers/built-in.o:vgacon.c:(.text+0x28f0): more undefined references to 
`screen_info' follow

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


Re: [PATCH 2/2] mountinfo: implement show_path for kernfs and cgroup

2016-04-17 Thread Serge E. Hallyn
On Sun, Apr 17, 2016 at 03:04:32PM -0500, serge.hal...@ubuntu.com wrote:
> From: Serge Hallyn 
> 
> When showing a cgroupfs entry in mountinfo, show the
> path of the mount root dentry relative to the reader's
> cgroup namespace root.
> 
> Signed-off-by: Serge Hallyn 
> ---
>  fs/kernfs/mount.c  | 14 ++
>  include/linux/kernfs.h |  2 ++
>  kernel/cgroup.c| 35 +++
>  3 files changed, 51 insertions(+)
> 
> diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
> index f73541f..3b78724 100644
> --- a/fs/kernfs/mount.c
> +++ b/fs/kernfs/mount.c
> @@ -15,6 +15,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "kernfs-internal.h"
>  
> @@ -40,6 +41,18 @@ static int kernfs_sop_show_options(struct seq_file *sf, 
> struct dentry *dentry)
>   return 0;
>  }
>  
> +static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry)
> +{
> + struct kernfs_node *node = dentry->d_fsdata;
> + struct kernfs_root *root = kernfs_root(node);
> + struct kernfs_syscall_ops *scops = root->syscall_ops;
> +
> + if (scops && scops->show_path)
> + return scops->show_path(sf, node, root);
> +
> + return seq_dentry(sf, dentry, " \t\n\\");
> +}
> +
>  const struct super_operations kernfs_sops = {
>   .statfs = simple_statfs,
>   .drop_inode = generic_delete_inode,
> @@ -47,6 +60,7 @@ const struct super_operations kernfs_sops = {
>  
>   .remount_fs = kernfs_sop_remount_fs,
>   .show_options   = kernfs_sop_show_options,
> + .show_path  = kernfs_sop_show_path,
>  };
>  
>  /**
> diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
> index c06c442..30f089e 100644
> --- a/include/linux/kernfs.h
> +++ b/include/linux/kernfs.h
> @@ -152,6 +152,8 @@ struct kernfs_syscall_ops {
>   int (*rmdir)(struct kernfs_node *kn);
>   int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent,
> const char *new_name);
> + int (*show_path)(struct seq_file *sf, struct kernfs_node *kn,
> +  struct kernfs_root *root);
>  };
>  
>  struct kernfs_root {
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 671dc05..9a0d7b3 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -1593,6 +1593,40 @@ static int rebind_subsystems(struct cgroup_root 
> *dst_root, u16 ss_mask)
>   return 0;
>  }
>  
> +static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
> + struct kernfs_root *kf_root)
> +{
> + int len = 0, ret = 0;
> + char *buf = NULL;
> + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
> + struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
> + struct cgroup *ns_cgroup;
> +
> + mutex_lock(_mutex);

Hm, I can't grab the cgroup mutex here because I already have the
namespace_sem.  But that's required by cset_cgroup_from_root().  Can
I just call that under rcu_read_lock() instead?  (Not without
changing the lockdep_assert_help()).  Is there another way to get the
info needed here?

> + spin_lock_bh(_set_lock);
> + ns_cgroup = cset_cgroup_from_root(ns->root_cset, kf_cgroot);
> + len = kernfs_path_from_node(kf_node, ns_cgroup->kn, NULL, 0);
> + if (len > 0)
> + buf = kmalloc(len + 1, GFP_ATOMIC);
> + if (buf)
> + ret = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, len + 
> 1);
> +
> + spin_unlock_bh(_set_lock);
> + mutex_unlock(_mutex);
> +
> + if (len <= 0)
> + return len;
> + if (!buf)
> + return -ENOMEM;
> + if (ret == len) {
> + seq_escape(sf, buf, " \t\n\\");
> + ret = 0;
> + } else if (ret >= 0)
> + ret = -EINVAL;
> + kfree(buf);
> + return ret;
> +}
> +
>  static int cgroup_show_options(struct seq_file *seq,
>  struct kernfs_root *kf_root)
>  {
> @@ -5430,6 +5464,7 @@ static struct kernfs_syscall_ops cgroup_kf_syscall_ops 
> = {
>   .mkdir  = cgroup_mkdir,
>   .rmdir  = cgroup_rmdir,
>   .rename = cgroup_rename,
> + .show_path  = cgroup_show_path,
>  };
>  
>  static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
> -- 
> 2.7.4
> 
> ___
> Containers mailing list
> contain...@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/containers


Re: [PATCH 2/2] mountinfo: implement show_path for kernfs and cgroup

2016-04-17 Thread Serge E. Hallyn
On Sun, Apr 17, 2016 at 03:04:32PM -0500, serge.hal...@ubuntu.com wrote:
> From: Serge Hallyn 
> 
> When showing a cgroupfs entry in mountinfo, show the
> path of the mount root dentry relative to the reader's
> cgroup namespace root.
> 
> Signed-off-by: Serge Hallyn 
> ---
>  fs/kernfs/mount.c  | 14 ++
>  include/linux/kernfs.h |  2 ++
>  kernel/cgroup.c| 35 +++
>  3 files changed, 51 insertions(+)
> 
> diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
> index f73541f..3b78724 100644
> --- a/fs/kernfs/mount.c
> +++ b/fs/kernfs/mount.c
> @@ -15,6 +15,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "kernfs-internal.h"
>  
> @@ -40,6 +41,18 @@ static int kernfs_sop_show_options(struct seq_file *sf, 
> struct dentry *dentry)
>   return 0;
>  }
>  
> +static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry)
> +{
> + struct kernfs_node *node = dentry->d_fsdata;
> + struct kernfs_root *root = kernfs_root(node);
> + struct kernfs_syscall_ops *scops = root->syscall_ops;
> +
> + if (scops && scops->show_path)
> + return scops->show_path(sf, node, root);
> +
> + return seq_dentry(sf, dentry, " \t\n\\");
> +}
> +
>  const struct super_operations kernfs_sops = {
>   .statfs = simple_statfs,
>   .drop_inode = generic_delete_inode,
> @@ -47,6 +60,7 @@ const struct super_operations kernfs_sops = {
>  
>   .remount_fs = kernfs_sop_remount_fs,
>   .show_options   = kernfs_sop_show_options,
> + .show_path  = kernfs_sop_show_path,
>  };
>  
>  /**
> diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
> index c06c442..30f089e 100644
> --- a/include/linux/kernfs.h
> +++ b/include/linux/kernfs.h
> @@ -152,6 +152,8 @@ struct kernfs_syscall_ops {
>   int (*rmdir)(struct kernfs_node *kn);
>   int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent,
> const char *new_name);
> + int (*show_path)(struct seq_file *sf, struct kernfs_node *kn,
> +  struct kernfs_root *root);
>  };
>  
>  struct kernfs_root {
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 671dc05..9a0d7b3 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -1593,6 +1593,40 @@ static int rebind_subsystems(struct cgroup_root 
> *dst_root, u16 ss_mask)
>   return 0;
>  }
>  
> +static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
> + struct kernfs_root *kf_root)
> +{
> + int len = 0, ret = 0;
> + char *buf = NULL;
> + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
> + struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
> + struct cgroup *ns_cgroup;
> +
> + mutex_lock(_mutex);

Hm, I can't grab the cgroup mutex here because I already have the
namespace_sem.  But that's required by cset_cgroup_from_root().  Can
I just call that under rcu_read_lock() instead?  (Not without
changing the lockdep_assert_help()).  Is there another way to get the
info needed here?

> + spin_lock_bh(_set_lock);
> + ns_cgroup = cset_cgroup_from_root(ns->root_cset, kf_cgroot);
> + len = kernfs_path_from_node(kf_node, ns_cgroup->kn, NULL, 0);
> + if (len > 0)
> + buf = kmalloc(len + 1, GFP_ATOMIC);
> + if (buf)
> + ret = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, len + 
> 1);
> +
> + spin_unlock_bh(_set_lock);
> + mutex_unlock(_mutex);
> +
> + if (len <= 0)
> + return len;
> + if (!buf)
> + return -ENOMEM;
> + if (ret == len) {
> + seq_escape(sf, buf, " \t\n\\");
> + ret = 0;
> + } else if (ret >= 0)
> + ret = -EINVAL;
> + kfree(buf);
> + return ret;
> +}
> +
>  static int cgroup_show_options(struct seq_file *seq,
>  struct kernfs_root *kf_root)
>  {
> @@ -5430,6 +5464,7 @@ static struct kernfs_syscall_ops cgroup_kf_syscall_ops 
> = {
>   .mkdir  = cgroup_mkdir,
>   .rmdir  = cgroup_rmdir,
>   .rename = cgroup_rename,
> + .show_path  = cgroup_show_path,
>  };
>  
>  static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
> -- 
> 2.7.4
> 
> ___
> Containers mailing list
> contain...@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/containers


linux-next: build failure after merge of the gpio tree

2016-04-17 Thread Stephen Rothwell
Hi Linus,

After merging the gpio tree, today's linux-next build (x86_64
allmodconfig) failed like this:

drivers/gpio/gpio-f7188x.c: In function 'f7188x_gpio_set_single_ended':
drivers/gpio/gpio-f7188x.c:331:26: error: implicit declaration of function 
'gpio_data_mode' [-Werror=implicit-function-declaration]
  superio_outb(sio->addr, gpio_data_mode(bank->regbase), data);
  ^

Caused by commit

  f90c6bdb690b ("gpio: f7188x: use the new open drain callback")

I have used the gpio tree from next-20160415 for today.

-- 
Cheers,
Stephen Rothwell


linux-next: build failure after merge of the gpio tree

2016-04-17 Thread Stephen Rothwell
Hi Linus,

After merging the gpio tree, today's linux-next build (x86_64
allmodconfig) failed like this:

drivers/gpio/gpio-f7188x.c: In function 'f7188x_gpio_set_single_ended':
drivers/gpio/gpio-f7188x.c:331:26: error: implicit declaration of function 
'gpio_data_mode' [-Werror=implicit-function-declaration]
  superio_outb(sio->addr, gpio_data_mode(bank->regbase), data);
  ^

Caused by commit

  f90c6bdb690b ("gpio: f7188x: use the new open drain callback")

I have used the gpio tree from next-20160415 for today.

-- 
Cheers,
Stephen Rothwell


Re: [PATCH] ixgbe: use msleep for long delays

2016-04-17 Thread David Miller
From: Arnd Bergmann 
Date: Sat, 16 Apr 2016 22:35:08 +0200

> The newly added x550em_a support causes a link failure on ARM because of
> an overly long time passed into udelay():
> 
> ERROR: "__bad_udelay" [drivers/net/ethernet/intel/ixgbe/ixgbe.ko] undefined!
> 
> There are multiple variants of the ixgbe_acquire_swfw_sync_*() function,
> and the other ones all use msleep(), so we can safely assume that all
> callers are allowed to sleep, which makes msleep() a better replacement
> than mdelay().
> 
> Signed-off-by: Arnd Bergmann 
> Fixes: 49425dfc7451 ("ixgbe: Add support for x550em_a 10G MAC type")

I'm assuming Jeff will pick this up.


Re: [PATCH] bpf: avoid warning for wrong pointer cast

2016-04-17 Thread David Miller
From: Arnd Bergmann 
Date: Sat, 16 Apr 2016 22:29:33 +0200

> Two new functions in bpf contain a cast from a 'u64' to a
> pointer. This works on 64-bit architectures but causes a warning
> on all 32-bit architectures:
> 
> kernel/trace/bpf_trace.c: In function 'bpf_perf_event_output_tp':
> kernel/trace/bpf_trace.c:350:13: error: cast to pointer from integer of 
> different size [-Werror=int-to-pointer-cast]
>   u64 ctx = *(long *)r1;
> 
> This changes the cast to first convert the u64 argument into a uintptr_t,
> which is guaranteed to be the same size as a pointer.
> 
> Signed-off-by: Arnd Bergmann 
> Fixes: 9940d67c93b5 ("bpf: support bpf_get_stackid() and 
> bpf_perf_event_output() in tracepoint programs")

Applied.


Re: [PATCH] ixgbe: use msleep for long delays

2016-04-17 Thread David Miller
From: Arnd Bergmann 
Date: Sat, 16 Apr 2016 22:35:08 +0200

> The newly added x550em_a support causes a link failure on ARM because of
> an overly long time passed into udelay():
> 
> ERROR: "__bad_udelay" [drivers/net/ethernet/intel/ixgbe/ixgbe.ko] undefined!
> 
> There are multiple variants of the ixgbe_acquire_swfw_sync_*() function,
> and the other ones all use msleep(), so we can safely assume that all
> callers are allowed to sleep, which makes msleep() a better replacement
> than mdelay().
> 
> Signed-off-by: Arnd Bergmann 
> Fixes: 49425dfc7451 ("ixgbe: Add support for x550em_a 10G MAC type")

I'm assuming Jeff will pick this up.


Re: [PATCH] bpf: avoid warning for wrong pointer cast

2016-04-17 Thread David Miller
From: Arnd Bergmann 
Date: Sat, 16 Apr 2016 22:29:33 +0200

> Two new functions in bpf contain a cast from a 'u64' to a
> pointer. This works on 64-bit architectures but causes a warning
> on all 32-bit architectures:
> 
> kernel/trace/bpf_trace.c: In function 'bpf_perf_event_output_tp':
> kernel/trace/bpf_trace.c:350:13: error: cast to pointer from integer of 
> different size [-Werror=int-to-pointer-cast]
>   u64 ctx = *(long *)r1;
> 
> This changes the cast to first convert the u64 argument into a uintptr_t,
> which is guaranteed to be the same size as a pointer.
> 
> Signed-off-by: Arnd Bergmann 
> Fixes: 9940d67c93b5 ("bpf: support bpf_get_stackid() and 
> bpf_perf_event_output() in tracepoint programs")

Applied.


include/linux/unaligned/access_ok.h:7:19: error: redefinition of 'get_unaligned_le16'

2016-04-17 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   9d090d01e3efdcdb8f7f07e86fcac91e01ae30cd
commit: 3194c6870158e305dac2af52f83681e9cb67280f NFC: nfcmrvl: add firmware 
download support
date:   6 months ago
config: ia64-allmodconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout 3194c6870158e305dac2af52f83681e9cb67280f
# save the attached .config to linux build tree
make.cross ARCH=ia64 

All errors (new ones prefixed by >>):

   In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0:
>> include/linux/unaligned/access_ok.h:7:19: error: redefinition of 
>> 'get_unaligned_le16'
static inline u16 get_unaligned_le16(const void *p)
  ^
   In file included from arch/ia64/include/asm/unaligned.h:4:0,
from arch/ia64/include/asm/io.h:22,
from arch/ia64/include/asm/smp.h:20,
from include/linux/smp.h:59,
from include/linux/topology.h:33,
from include/linux/gfp.h:8,
from include/linux/kmod.h:22,
from include/linux/module.h:13,
from drivers/nfc/nfcmrvl/fw_dnld.c:19:
   include/linux/unaligned/le_struct.h:6:19: note: previous definition of 
'get_unaligned_le16' was here
static inline u16 get_unaligned_le16(const void *p)
  ^
   In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0:
>> include/linux/unaligned/access_ok.h:12:19: error: redefinition of 
>> 'get_unaligned_le32'
static inline u32 get_unaligned_le32(const void *p)
  ^
   In file included from arch/ia64/include/asm/unaligned.h:4:0,
from arch/ia64/include/asm/io.h:22,
from arch/ia64/include/asm/smp.h:20,
from include/linux/smp.h:59,
from include/linux/topology.h:33,
from include/linux/gfp.h:8,
from include/linux/kmod.h:22,
from include/linux/module.h:13,
from drivers/nfc/nfcmrvl/fw_dnld.c:19:
   include/linux/unaligned/le_struct.h:11:19: note: previous definition of 
'get_unaligned_le32' was here
static inline u32 get_unaligned_le32(const void *p)
  ^
   In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0:
>> include/linux/unaligned/access_ok.h:17:19: error: redefinition of 
>> 'get_unaligned_le64'
static inline u64 get_unaligned_le64(const void *p)
  ^
   In file included from arch/ia64/include/asm/unaligned.h:4:0,
from arch/ia64/include/asm/io.h:22,
from arch/ia64/include/asm/smp.h:20,
from include/linux/smp.h:59,
from include/linux/topology.h:33,
from include/linux/gfp.h:8,
from include/linux/kmod.h:22,
from include/linux/module.h:13,
from drivers/nfc/nfcmrvl/fw_dnld.c:19:
   include/linux/unaligned/le_struct.h:16:19: note: previous definition of 
'get_unaligned_le64' was here
static inline u64 get_unaligned_le64(const void *p)
  ^
   In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0:
>> include/linux/unaligned/access_ok.h:22:19: error: redefinition of 
>> 'get_unaligned_be16'
static inline u16 get_unaligned_be16(const void *p)
  ^
   In file included from arch/ia64/include/asm/unaligned.h:5:0,
from arch/ia64/include/asm/io.h:22,
from arch/ia64/include/asm/smp.h:20,
from include/linux/smp.h:59,
from include/linux/topology.h:33,
from include/linux/gfp.h:8,
from include/linux/kmod.h:22,
from include/linux/module.h:13,
from drivers/nfc/nfcmrvl/fw_dnld.c:19:
   include/linux/unaligned/be_byteshift.h:40:19: note: previous definition of 
'get_unaligned_be16' was here
static inline u16 get_unaligned_be16(const void *p)
  ^
   In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0:
>> include/linux/unaligned/access_ok.h:27:19: error: redefinition of 
>> 'get_unaligned_be32'
static inline u32 get_unaligned_be32(const void *p)
  ^
   In file included from arch/ia64/include/asm/unaligned.h:5:0,
from arch/ia64/include/asm/io.h:22,
from arch/ia64/include/asm/smp.h:20,
from include/linux/smp.h:59,
from include/linux/topology.h:33,
from include/linux/gfp.h:8,
from include/linux/kmod.h:22,
from include/linux/module.h:13,
from 

include/linux/unaligned/access_ok.h:7:19: error: redefinition of 'get_unaligned_le16'

2016-04-17 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   9d090d01e3efdcdb8f7f07e86fcac91e01ae30cd
commit: 3194c6870158e305dac2af52f83681e9cb67280f NFC: nfcmrvl: add firmware 
download support
date:   6 months ago
config: ia64-allmodconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout 3194c6870158e305dac2af52f83681e9cb67280f
# save the attached .config to linux build tree
make.cross ARCH=ia64 

All errors (new ones prefixed by >>):

   In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0:
>> include/linux/unaligned/access_ok.h:7:19: error: redefinition of 
>> 'get_unaligned_le16'
static inline u16 get_unaligned_le16(const void *p)
  ^
   In file included from arch/ia64/include/asm/unaligned.h:4:0,
from arch/ia64/include/asm/io.h:22,
from arch/ia64/include/asm/smp.h:20,
from include/linux/smp.h:59,
from include/linux/topology.h:33,
from include/linux/gfp.h:8,
from include/linux/kmod.h:22,
from include/linux/module.h:13,
from drivers/nfc/nfcmrvl/fw_dnld.c:19:
   include/linux/unaligned/le_struct.h:6:19: note: previous definition of 
'get_unaligned_le16' was here
static inline u16 get_unaligned_le16(const void *p)
  ^
   In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0:
>> include/linux/unaligned/access_ok.h:12:19: error: redefinition of 
>> 'get_unaligned_le32'
static inline u32 get_unaligned_le32(const void *p)
  ^
   In file included from arch/ia64/include/asm/unaligned.h:4:0,
from arch/ia64/include/asm/io.h:22,
from arch/ia64/include/asm/smp.h:20,
from include/linux/smp.h:59,
from include/linux/topology.h:33,
from include/linux/gfp.h:8,
from include/linux/kmod.h:22,
from include/linux/module.h:13,
from drivers/nfc/nfcmrvl/fw_dnld.c:19:
   include/linux/unaligned/le_struct.h:11:19: note: previous definition of 
'get_unaligned_le32' was here
static inline u32 get_unaligned_le32(const void *p)
  ^
   In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0:
>> include/linux/unaligned/access_ok.h:17:19: error: redefinition of 
>> 'get_unaligned_le64'
static inline u64 get_unaligned_le64(const void *p)
  ^
   In file included from arch/ia64/include/asm/unaligned.h:4:0,
from arch/ia64/include/asm/io.h:22,
from arch/ia64/include/asm/smp.h:20,
from include/linux/smp.h:59,
from include/linux/topology.h:33,
from include/linux/gfp.h:8,
from include/linux/kmod.h:22,
from include/linux/module.h:13,
from drivers/nfc/nfcmrvl/fw_dnld.c:19:
   include/linux/unaligned/le_struct.h:16:19: note: previous definition of 
'get_unaligned_le64' was here
static inline u64 get_unaligned_le64(const void *p)
  ^
   In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0:
>> include/linux/unaligned/access_ok.h:22:19: error: redefinition of 
>> 'get_unaligned_be16'
static inline u16 get_unaligned_be16(const void *p)
  ^
   In file included from arch/ia64/include/asm/unaligned.h:5:0,
from arch/ia64/include/asm/io.h:22,
from arch/ia64/include/asm/smp.h:20,
from include/linux/smp.h:59,
from include/linux/topology.h:33,
from include/linux/gfp.h:8,
from include/linux/kmod.h:22,
from include/linux/module.h:13,
from drivers/nfc/nfcmrvl/fw_dnld.c:19:
   include/linux/unaligned/be_byteshift.h:40:19: note: previous definition of 
'get_unaligned_be16' was here
static inline u16 get_unaligned_be16(const void *p)
  ^
   In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0:
>> include/linux/unaligned/access_ok.h:27:19: error: redefinition of 
>> 'get_unaligned_be32'
static inline u32 get_unaligned_be32(const void *p)
  ^
   In file included from arch/ia64/include/asm/unaligned.h:5:0,
from arch/ia64/include/asm/io.h:22,
from arch/ia64/include/asm/smp.h:20,
from include/linux/smp.h:59,
from include/linux/topology.h:33,
from include/linux/gfp.h:8,
from include/linux/kmod.h:22,
from include/linux/module.h:13,
from 

Re: [PATCH V2] net: ethernet: mellanox: correct page conversion

2016-04-17 Thread David Miller
From: Sinan Kaya 
Date: Sat, 16 Apr 2016 18:23:32 -0400

> Current code is assuming that the address returned by dma_alloc_coherent
> is a logical address. This is not true on ARM/ARM64 systems. This patch
> replaces dma_alloc_coherent with dma_map_page API. The address returned
> can later by virtually mapped from the CPU side with vmap API.
> 
> Signed-off-by: Sinan Kaya 

You can't do this.

The DMA map page API gives non-coherent mappings, and thus requires
proper flushing.

So a straight conversion like this is never legitimate.


Re: [PATCH V2] net: ethernet: mellanox: correct page conversion

2016-04-17 Thread David Miller
From: Sinan Kaya 
Date: Sat, 16 Apr 2016 18:23:32 -0400

> Current code is assuming that the address returned by dma_alloc_coherent
> is a logical address. This is not true on ARM/ARM64 systems. This patch
> replaces dma_alloc_coherent with dma_map_page API. The address returned
> can later by virtually mapped from the CPU side with vmap API.
> 
> Signed-off-by: Sinan Kaya 

You can't do this.

The DMA map page API gives non-coherent mappings, and thus requires
proper flushing.

So a straight conversion like this is never legitimate.


Re: [PATCH 3/7] [Media] vcodec: mediatek: Add Mediatek V4L2 Video Decoder Driver

2016-04-17 Thread tiffany lin
Hi Hans,

On Fri, 2016-04-15 at 16:27 +0200, Hans Verkuil wrote:
> On 04/13/2016 02:01 PM, Tiffany Lin wrote:
> > Add v4l2 layer decoder driver for MT8173
> > 
> > Signed-off-by: Tiffany Lin 
> > ---
> >  drivers/media/platform/mtk-vcodec/Makefile |   10 +-
> >  drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c | 1429 
> > 
> >  drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.h |   81 ++
> >  .../media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c |  469 +++
> >  .../media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c  |  153 +++
> >  .../media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h  |   28 +
> >  drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h |   98 +-
> >  drivers/media/platform/mtk-vcodec/vdec_drv_base.h  |   56 +
> >  drivers/media/platform/mtk-vcodec/vdec_drv_if.c|  113 ++
> >  drivers/media/platform/mtk-vcodec/vdec_drv_if.h|   93 ++
> >  drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h   |   86 ++
> >  11 files changed, 2596 insertions(+), 20 deletions(-)
> >  create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c
> >  create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.h
> >  create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c
> >  create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c
> >  create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h
> >  create mode 100644 drivers/media/platform/mtk-vcodec/vdec_drv_base.h
> >  create mode 100644 drivers/media/platform/mtk-vcodec/vdec_drv_if.c
> >  create mode 100644 drivers/media/platform/mtk-vcodec/vdec_drv_if.h
> >  create mode 100644 drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h
> > 
> > diff --git a/drivers/media/platform/mtk-vcodec/Makefile 
> > b/drivers/media/platform/mtk-vcodec/Makefile
> > index dc5cb00..4c8ed2f 100644
> > --- a/drivers/media/platform/mtk-vcodec/Makefile
> > +++ b/drivers/media/platform/mtk-vcodec/Makefile
> > @@ -1,7 +1,13 @@
> >  
> >  
> > -obj-$(CONFIG_VIDEO_MEDIATEK_VCODEC) += mtk-vcodec-enc.o mtk-vcodec-common.o
> > -
> > +obj-$(CONFIG_VIDEO_MEDIATEK_VCODEC) += mtk-vcodec-dec.o \
> > +  mtk-vcodec-enc.o \
> > +  mtk-vcodec-common.o
> > +
> > +mtk-vcodec-dec-y := mtk_vcodec_dec_drv.o \
> > +   vdec_drv_if.o \
> > +   mtk_vcodec_dec.o \
> > +   mtk_vcodec_dec_pm.o \
> >  
> >  
> >  mtk-vcodec-enc-y := venc/venc_vp8_if.o \
> > diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c 
> > b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c
> > new file mode 100644
> > index 000..0499413
> > --- /dev/null
> > +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c
> > @@ -0,0 +1,1429 @@
> > +/*
> > +* Copyright (c) 2016 MediaTek Inc.
> > +* Author: PC Chen 
> > +* Tiffany Lin 
> > +*
> > +* This program is free software; you can redistribute it and/or modify
> > +* it under the terms of the GNU General Public License version 2 as
> > +* published by the Free Software Foundation.
> > +*
> > +* This program is distributed in the hope that it will be useful,
> > +* but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > +* GNU General Public License for more details.
> > +*/
> > +
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#include "mtk_vcodec_drv.h"
> > +#include "mtk_vcodec_dec.h"
> > +#include "mtk_vcodec_intr.h"
> > +#include "mtk_vcodec_util.h"
> > +#include "vdec_drv_if.h"
> > +#include "mtk_vcodec_dec_pm.h"
> > +
> > +static struct mtk_video_fmt mtk_video_formats[] = {
> > +   {
> > +   .fourcc = V4L2_PIX_FMT_H264,
> > +   .type = MTK_FMT_DEC,
> > +   .num_planes = 1,
> > +   },
> > +   {
> > +   .fourcc = V4L2_PIX_FMT_VP8,
> > +   .type = MTK_FMT_DEC,
> > +   .num_planes = 1,
> > +   },
> > +   {
> > +   .fourcc = V4L2_PIX_FMT_VP9,
> > +   .type = MTK_FMT_DEC,
> > +   .num_planes = 1,
> > +   },
> > +   {
> > +   .fourcc = V4L2_PIX_FMT_MT21,
> > +   .type = MTK_FMT_FRAME,
> > +   .num_planes = 2,
> > +   },
> > +};
> > +#define OUT_FMT_IDX0
> > +#define CAP_FMT_IDX3
> > +
> > +#define VCODEC_CAPABILITY_4K_DISABLED  0x10
> > +#define VCODEC_DEC_4K_CODED_WIDTH  4096U
> > +#define VCODEC_DEC_4K_CODED_HEIGHT 2304U
> > +
> > +#define MTK_VDEC_MIN_W 64U
> > +#define MTK_VDEC_MIN_H 64U
> > +#define MTK_VDEC_MAX_W 2048U
> > +#define MTK_VDEC_MAX_H 1088U
> > +#define DFT_CFG_WIDTH  MTK_VDEC_MIN_W
> > +#define DFT_CFG_HEIGHT MTK_VDEC_MIN_H
> > +
> > +static const struct mtk_codec_framesizes mtk_vdec_framesizes[] = {
> > +   {
> > +   .fourcc = V4L2_PIX_FMT_H264,
> > +   .stepwise = {  MTK_VDEC_MIN_W, MTK_VDEC_MAX_W, 16,
> > +   

Re: [PATCH 3/7] [Media] vcodec: mediatek: Add Mediatek V4L2 Video Decoder Driver

2016-04-17 Thread tiffany lin
Hi Hans,

On Fri, 2016-04-15 at 16:27 +0200, Hans Verkuil wrote:
> On 04/13/2016 02:01 PM, Tiffany Lin wrote:
> > Add v4l2 layer decoder driver for MT8173
> > 
> > Signed-off-by: Tiffany Lin 
> > ---
> >  drivers/media/platform/mtk-vcodec/Makefile |   10 +-
> >  drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c | 1429 
> > 
> >  drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.h |   81 ++
> >  .../media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c |  469 +++
> >  .../media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c  |  153 +++
> >  .../media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h  |   28 +
> >  drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h |   98 +-
> >  drivers/media/platform/mtk-vcodec/vdec_drv_base.h  |   56 +
> >  drivers/media/platform/mtk-vcodec/vdec_drv_if.c|  113 ++
> >  drivers/media/platform/mtk-vcodec/vdec_drv_if.h|   93 ++
> >  drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h   |   86 ++
> >  11 files changed, 2596 insertions(+), 20 deletions(-)
> >  create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c
> >  create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.h
> >  create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c
> >  create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c
> >  create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h
> >  create mode 100644 drivers/media/platform/mtk-vcodec/vdec_drv_base.h
> >  create mode 100644 drivers/media/platform/mtk-vcodec/vdec_drv_if.c
> >  create mode 100644 drivers/media/platform/mtk-vcodec/vdec_drv_if.h
> >  create mode 100644 drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h
> > 
> > diff --git a/drivers/media/platform/mtk-vcodec/Makefile 
> > b/drivers/media/platform/mtk-vcodec/Makefile
> > index dc5cb00..4c8ed2f 100644
> > --- a/drivers/media/platform/mtk-vcodec/Makefile
> > +++ b/drivers/media/platform/mtk-vcodec/Makefile
> > @@ -1,7 +1,13 @@
> >  
> >  
> > -obj-$(CONFIG_VIDEO_MEDIATEK_VCODEC) += mtk-vcodec-enc.o mtk-vcodec-common.o
> > -
> > +obj-$(CONFIG_VIDEO_MEDIATEK_VCODEC) += mtk-vcodec-dec.o \
> > +  mtk-vcodec-enc.o \
> > +  mtk-vcodec-common.o
> > +
> > +mtk-vcodec-dec-y := mtk_vcodec_dec_drv.o \
> > +   vdec_drv_if.o \
> > +   mtk_vcodec_dec.o \
> > +   mtk_vcodec_dec_pm.o \
> >  
> >  
> >  mtk-vcodec-enc-y := venc/venc_vp8_if.o \
> > diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c 
> > b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c
> > new file mode 100644
> > index 000..0499413
> > --- /dev/null
> > +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c
> > @@ -0,0 +1,1429 @@
> > +/*
> > +* Copyright (c) 2016 MediaTek Inc.
> > +* Author: PC Chen 
> > +* Tiffany Lin 
> > +*
> > +* This program is free software; you can redistribute it and/or modify
> > +* it under the terms of the GNU General Public License version 2 as
> > +* published by the Free Software Foundation.
> > +*
> > +* This program is distributed in the hope that it will be useful,
> > +* but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > +* GNU General Public License for more details.
> > +*/
> > +
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#include "mtk_vcodec_drv.h"
> > +#include "mtk_vcodec_dec.h"
> > +#include "mtk_vcodec_intr.h"
> > +#include "mtk_vcodec_util.h"
> > +#include "vdec_drv_if.h"
> > +#include "mtk_vcodec_dec_pm.h"
> > +
> > +static struct mtk_video_fmt mtk_video_formats[] = {
> > +   {
> > +   .fourcc = V4L2_PIX_FMT_H264,
> > +   .type = MTK_FMT_DEC,
> > +   .num_planes = 1,
> > +   },
> > +   {
> > +   .fourcc = V4L2_PIX_FMT_VP8,
> > +   .type = MTK_FMT_DEC,
> > +   .num_planes = 1,
> > +   },
> > +   {
> > +   .fourcc = V4L2_PIX_FMT_VP9,
> > +   .type = MTK_FMT_DEC,
> > +   .num_planes = 1,
> > +   },
> > +   {
> > +   .fourcc = V4L2_PIX_FMT_MT21,
> > +   .type = MTK_FMT_FRAME,
> > +   .num_planes = 2,
> > +   },
> > +};
> > +#define OUT_FMT_IDX0
> > +#define CAP_FMT_IDX3
> > +
> > +#define VCODEC_CAPABILITY_4K_DISABLED  0x10
> > +#define VCODEC_DEC_4K_CODED_WIDTH  4096U
> > +#define VCODEC_DEC_4K_CODED_HEIGHT 2304U
> > +
> > +#define MTK_VDEC_MIN_W 64U
> > +#define MTK_VDEC_MIN_H 64U
> > +#define MTK_VDEC_MAX_W 2048U
> > +#define MTK_VDEC_MAX_H 1088U
> > +#define DFT_CFG_WIDTH  MTK_VDEC_MIN_W
> > +#define DFT_CFG_HEIGHT MTK_VDEC_MIN_H
> > +
> > +static const struct mtk_codec_framesizes mtk_vdec_framesizes[] = {
> > +   {
> > +   .fourcc = V4L2_PIX_FMT_H264,
> > +   .stepwise = {  MTK_VDEC_MIN_W, MTK_VDEC_MAX_W, 16,
> > +   MTK_VDEC_MIN_H, MTK_VDEC_MAX_H, 16 },
> > +   },
> > +   {
> > +   .fourcc = 

Re: [PATCH] mmc/sdio: utilize runtime PM to speed up SDIO card's resume process

2016-04-17 Thread Fu, Zhonghui

Any comments are welcome.


Thanks,
Zhonghui

On 4/13/2016 2:42 PM, Fu, Zhonghui wrote:
> Leave some work of SDIO card's resume process into it's runtime resume
> process to shorten system resume latency.
>
> Signed-off-by: Zhonghui Fu 
> ---
>  drivers/mmc/core/sdio.c  |   21 ++---
>  include/linux/mmc/host.h |9 ++---
>  2 files changed, 16 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
> index bd44ba8..fe5d3c5 100644
> --- a/drivers/mmc/core/sdio.c
> +++ b/drivers/mmc/core/sdio.c
> @@ -902,6 +902,10 @@ static int mmc_sdio_suspend(struct mmc_host *host)
>  
>   if (!mmc_card_keep_power(host)) {
>   mmc_power_off(host);
> + if (host->caps & MMC_CAP_POWER_OFF_CARD) {
> + pm_runtime_disable(>card->dev);
> + pm_runtime_set_suspended(>card->dev);
> + }
>   } else if (host->retune_period) {
>   mmc_retune_timer_stop(host);
>   mmc_retune_needed(host);
> @@ -924,18 +928,16 @@ static int mmc_sdio_resume(struct mmc_host *host)
>  
>   /* Restore power if needed */
>   if (!mmc_card_keep_power(host)) {
> - mmc_power_up(host, host->card->ocr);
>   /*
> -  * Tell runtime PM core we just powered up the card,
> -  * since it still believes the card is powered off.
>* Note that currently runtime PM is only enabled
>* for SDIO cards that are MMC_CAP_POWER_OFF_CARD
>*/
>   if (host->caps & MMC_CAP_POWER_OFF_CARD) {
> - pm_runtime_disable(>card->dev);
> - pm_runtime_set_active(>card->dev);
>   pm_runtime_enable(>card->dev);
> + goto out;
>   }
> +
> + mmc_power_up(host, host->card->ocr);
>   }
>  
>   /* No need to reinitialize powered-resumed nonremovable cards */
> @@ -953,13 +955,10 @@ static int mmc_sdio_resume(struct mmc_host *host)
>   err = sdio_enable_4bit_bus(host->card);
>   }
>  
> - if (!err && host->sdio_irqs) {
> - if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD))
> - wake_up_process(host->sdio_irq_thread);
> - else if (host->caps & MMC_CAP_SDIO_IRQ)
> - host->ops->enable_sdio_irq(host, 1);
> - }
> + if (!err && host->sdio_irqs)
> + mmc_signal_sdio_irq(host);
>  
> +out:
>   mmc_release_host(host);
>  
>   host->pm_flags &= ~MMC_PM_KEEP_POWER;
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> index 8dd4d29..8faaa5b 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -400,10 +400,13 @@ void mmc_request_done(struct mmc_host *, struct 
> mmc_request *);
>  
>  static inline void mmc_signal_sdio_irq(struct mmc_host *host)
>  {
> - host->ops->enable_sdio_irq(host, 0);
> - host->sdio_irq_pending = true;
> - if (host->sdio_irq_thread)
> + if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) {
> + host->ops->enable_sdio_irq(host, 0);
> + host->sdio_irq_pending = true;
>   wake_up_process(host->sdio_irq_thread);
> + } else if (host->caps & MMC_CAP_SDIO_IRQ) {
> + host->ops->enable_sdio_irq(host, 1);
> + }
>  }
>  
>  void sdio_run_irqs(struct mmc_host *host);
> -- 1.7.1
>



Re: [PATCH] mmc/sdio: utilize runtime PM to speed up SDIO card's resume process

2016-04-17 Thread Fu, Zhonghui

Any comments are welcome.


Thanks,
Zhonghui

On 4/13/2016 2:42 PM, Fu, Zhonghui wrote:
> Leave some work of SDIO card's resume process into it's runtime resume
> process to shorten system resume latency.
>
> Signed-off-by: Zhonghui Fu 
> ---
>  drivers/mmc/core/sdio.c  |   21 ++---
>  include/linux/mmc/host.h |9 ++---
>  2 files changed, 16 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
> index bd44ba8..fe5d3c5 100644
> --- a/drivers/mmc/core/sdio.c
> +++ b/drivers/mmc/core/sdio.c
> @@ -902,6 +902,10 @@ static int mmc_sdio_suspend(struct mmc_host *host)
>  
>   if (!mmc_card_keep_power(host)) {
>   mmc_power_off(host);
> + if (host->caps & MMC_CAP_POWER_OFF_CARD) {
> + pm_runtime_disable(>card->dev);
> + pm_runtime_set_suspended(>card->dev);
> + }
>   } else if (host->retune_period) {
>   mmc_retune_timer_stop(host);
>   mmc_retune_needed(host);
> @@ -924,18 +928,16 @@ static int mmc_sdio_resume(struct mmc_host *host)
>  
>   /* Restore power if needed */
>   if (!mmc_card_keep_power(host)) {
> - mmc_power_up(host, host->card->ocr);
>   /*
> -  * Tell runtime PM core we just powered up the card,
> -  * since it still believes the card is powered off.
>* Note that currently runtime PM is only enabled
>* for SDIO cards that are MMC_CAP_POWER_OFF_CARD
>*/
>   if (host->caps & MMC_CAP_POWER_OFF_CARD) {
> - pm_runtime_disable(>card->dev);
> - pm_runtime_set_active(>card->dev);
>   pm_runtime_enable(>card->dev);
> + goto out;
>   }
> +
> + mmc_power_up(host, host->card->ocr);
>   }
>  
>   /* No need to reinitialize powered-resumed nonremovable cards */
> @@ -953,13 +955,10 @@ static int mmc_sdio_resume(struct mmc_host *host)
>   err = sdio_enable_4bit_bus(host->card);
>   }
>  
> - if (!err && host->sdio_irqs) {
> - if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD))
> - wake_up_process(host->sdio_irq_thread);
> - else if (host->caps & MMC_CAP_SDIO_IRQ)
> - host->ops->enable_sdio_irq(host, 1);
> - }
> + if (!err && host->sdio_irqs)
> + mmc_signal_sdio_irq(host);
>  
> +out:
>   mmc_release_host(host);
>  
>   host->pm_flags &= ~MMC_PM_KEEP_POWER;
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> index 8dd4d29..8faaa5b 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -400,10 +400,13 @@ void mmc_request_done(struct mmc_host *, struct 
> mmc_request *);
>  
>  static inline void mmc_signal_sdio_irq(struct mmc_host *host)
>  {
> - host->ops->enable_sdio_irq(host, 0);
> - host->sdio_irq_pending = true;
> - if (host->sdio_irq_thread)
> + if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) {
> + host->ops->enable_sdio_irq(host, 0);
> + host->sdio_irq_pending = true;
>   wake_up_process(host->sdio_irq_thread);
> + } else if (host->caps & MMC_CAP_SDIO_IRQ) {
> + host->ops->enable_sdio_irq(host, 1);
> + }
>  }
>  
>  void sdio_run_irqs(struct mmc_host *host);
> -- 1.7.1
>



  1   2   3   4   5   6   7   8   9   10   >