[lkp] [blk] e7b81af035: BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0
FYI, we noticed the below changes on https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git wb-buf-throttle commit e7b81af035ddc1323090d86350627881fcf9b1b0 ("blk-wb: updates") ++++ | | 2adccca124 | e7b81af035 | ++++ | boot_successes | 3 | 2 | | boot_failures | 1 | 6 | | page_allocation_failure:order:#,mode:#(GFP_NOWAIT|__GFP_HIGH|__GFP_COMP|__GFP_NOTRACK) | 1 || | warn_alloc_failed+0x | 1 || | Mem-Info | 1 || | backtrace:do_execveat_common | 1 || | backtrace:compat_SyS_execve | 1 || | backtrace:compat_process_vm_rw | 1 || | backtrace:compat_SyS_process_vm_writev | 1 || | BUG:unable_to_handle_kernel | 0 | 6 | | Oops | 0 | 6 | | RIP:blk_wb_done | 0 | 6 | | Kernel_panic-not_syncing:Fatal_exception_in_interrupt | 0 | 6 | | backtrace:cpu_startup_entry | 0 | 3 | | backtrace:schedule_preempt_disabled | 0 | 1 | ++++ [ 17.486381] FDC 0 is a S82078B [ 17.557276] brd: module loaded [ 17.602156] loop: module loaded [ 17.612220] BUG: unable to handle kernel NULL pointer dereference at 00a0 [ 17.614651] IP: [] blk_wb_done+0x18/0x8e [ 17.616135] PGD 0 [ 17.617105] Oops: [#1] SMP [ 17.618343] Modules linked in: [ 17.619459] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc1-00096-ge7b81af #1 [ 17.621643] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 17.624034] task: 8241d540 ti: 8240 task.ti: 8240 [ 17.626211] RIP: 0010:[] [] blk_wb_done+0x18/0x8e [ 17.628556] RSP: 0018:880038803df8 EFLAGS: 00010046 [ 17.629923] RAX: 88003e550008 RBX: RCX: 88003880cf20 [ 17.631540] RDX: ef7bdef7bdef7bdf RSI: 88003e4a RDI: [ 17.633175] RBP: 880038803e08 R08: e272d5c3 R09: 0004 [ 17.634807] R10: 880038803d78 R11: 880038808000 R12: 88003e4a [ 17.636426] R13: e8c07800 R14: R15: 0001 [ 17.638053] FS: () GS:88003880() knlGS: [ 17.640324] CS: 0010 DS: ES: CR0: 80050033 [ 17.641740] CR2: 00a0 CR3: 02418000 CR4: 06f0 [ 17.643381] Stack: [ 17.644273] 88003e4a 880038803e28 8152fa30 [ 17.646792] 88003e4a 880038803e38 81755005 [ 17.649300] 880038803e68 8152fcef 88003e974548 [ 17.651805] Call Trace: [ 17.652740] [ 17.653108] [] blk_mq_end_request+0x38/0x6c [ 17.655190] [] virtblk_request_done+0x5e/0x60 [ 17.656660] [] __blk_mq_complete_request+0x122/0x132 [ 17.658222] [] blk_mq_complete_request+0x1c/0x1e [ 17.659724] [] virtblk_done+0x74/0xce [ 17.661118] [] vring_interrupt+0x32/0x39 [ 17.662525] [] handle_irq_event_percpu+0x146/0x3d7 [ 17.664060] [] handle_irq_event+0x38/0x56 [ 17.665463] [] handle_edge_irq+0xd9/0xfb [ 17.666872] [] handle_irq+0x101/0x109 [ 17.668246] [] do_IRQ+0x85/0x101 [ 17.669550] [] common_interrupt+0x8c/0x8c [ 17.671071] [ 17.671434] [] ? native_safe_halt+0x6/0x8 [ 17.673485] [] default_idle+0x60/0x1a8 [ 17.674935] [] arch_cpu_idle+0xf/0x11 [ 17.676307] [] default_idle_call+0x3d/0x44 [ 17.677736] [] cpu_startup_entry+0x218/0x3e6 [ 17.679209] [] rest_init+0x135/0x13b [ 17.680585] []
[lkp] [blk] e7b81af035: BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0
FYI, we noticed the below changes on https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git wb-buf-throttle commit e7b81af035ddc1323090d86350627881fcf9b1b0 ("blk-wb: updates") ++++ | | 2adccca124 | e7b81af035 | ++++ | boot_successes | 3 | 2 | | boot_failures | 1 | 6 | | page_allocation_failure:order:#,mode:#(GFP_NOWAIT|__GFP_HIGH|__GFP_COMP|__GFP_NOTRACK) | 1 || | warn_alloc_failed+0x | 1 || | Mem-Info | 1 || | backtrace:do_execveat_common | 1 || | backtrace:compat_SyS_execve | 1 || | backtrace:compat_process_vm_rw | 1 || | backtrace:compat_SyS_process_vm_writev | 1 || | BUG:unable_to_handle_kernel | 0 | 6 | | Oops | 0 | 6 | | RIP:blk_wb_done | 0 | 6 | | Kernel_panic-not_syncing:Fatal_exception_in_interrupt | 0 | 6 | | backtrace:cpu_startup_entry | 0 | 3 | | backtrace:schedule_preempt_disabled | 0 | 1 | ++++ [ 17.486381] FDC 0 is a S82078B [ 17.557276] brd: module loaded [ 17.602156] loop: module loaded [ 17.612220] BUG: unable to handle kernel NULL pointer dereference at 00a0 [ 17.614651] IP: [] blk_wb_done+0x18/0x8e [ 17.616135] PGD 0 [ 17.617105] Oops: [#1] SMP [ 17.618343] Modules linked in: [ 17.619459] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc1-00096-ge7b81af #1 [ 17.621643] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 17.624034] task: 8241d540 ti: 8240 task.ti: 8240 [ 17.626211] RIP: 0010:[] [] blk_wb_done+0x18/0x8e [ 17.628556] RSP: 0018:880038803df8 EFLAGS: 00010046 [ 17.629923] RAX: 88003e550008 RBX: RCX: 88003880cf20 [ 17.631540] RDX: ef7bdef7bdef7bdf RSI: 88003e4a RDI: [ 17.633175] RBP: 880038803e08 R08: e272d5c3 R09: 0004 [ 17.634807] R10: 880038803d78 R11: 880038808000 R12: 88003e4a [ 17.636426] R13: e8c07800 R14: R15: 0001 [ 17.638053] FS: () GS:88003880() knlGS: [ 17.640324] CS: 0010 DS: ES: CR0: 80050033 [ 17.641740] CR2: 00a0 CR3: 02418000 CR4: 06f0 [ 17.643381] Stack: [ 17.644273] 88003e4a 880038803e28 8152fa30 [ 17.646792] 88003e4a 880038803e38 81755005 [ 17.649300] 880038803e68 8152fcef 88003e974548 [ 17.651805] Call Trace: [ 17.652740] [ 17.653108] [] blk_mq_end_request+0x38/0x6c [ 17.655190] [] virtblk_request_done+0x5e/0x60 [ 17.656660] [] __blk_mq_complete_request+0x122/0x132 [ 17.658222] [] blk_mq_complete_request+0x1c/0x1e [ 17.659724] [] virtblk_done+0x74/0xce [ 17.661118] [] vring_interrupt+0x32/0x39 [ 17.662525] [] handle_irq_event_percpu+0x146/0x3d7 [ 17.664060] [] handle_irq_event+0x38/0x56 [ 17.665463] [] handle_edge_irq+0xd9/0xfb [ 17.666872] [] handle_irq+0x101/0x109 [ 17.668246] [] do_IRQ+0x85/0x101 [ 17.669550] [] common_interrupt+0x8c/0x8c [ 17.671071] [ 17.671434] [] ? native_safe_halt+0x6/0x8 [ 17.673485] [] default_idle+0x60/0x1a8 [ 17.674935] [] arch_cpu_idle+0xf/0x11 [ 17.676307] [] default_idle_call+0x3d/0x44 [ 17.677736] [] cpu_startup_entry+0x218/0x3e6 [ 17.679209] [] rest_init+0x135/0x13b [ 17.680585] []
Re: [PATCH 1/3] ARM: DTS: da850: add node for spi0
On Friday 15 April 2016 09:47 PM, David Lechner wrote: > On 04/15/2016 05:24 AM, Sekhar Nori wrote: > >> >> This made me notice that num-cs is populated wrongly for spi1. It >> actually has 8 chip selects. This is fine though. > > I might as well fix it since I have to make changes anyway. Don't > remember how I came up with 6. In section 3.7.7 of datasheet, there are 6 possible chip selects listed for SPI0 and 8 possible chipselects for SPI1. If you are fixing SPI1, please make that a separate patch. >> Also, it will be nice to add pinctrl entries for spi0 like it is done >> for spi1. You will need those anyway for using the interface. > > I omitted this on purpose. For my use case, I am using the SPI as > write-only, so not using the SOMI pin, which is actually muxed as a GPIO > for something else. So having a pinctl like spi1 is of no use to me. I > figured if someone needs it, they can add it, otherwise it just is > wasted space to me. Alright, makes sense. Regards, Sekhar
Re: [PATCH 1/3] ARM: DTS: da850: add node for spi0
On Friday 15 April 2016 09:47 PM, David Lechner wrote: > On 04/15/2016 05:24 AM, Sekhar Nori wrote: > >> >> This made me notice that num-cs is populated wrongly for spi1. It >> actually has 8 chip selects. This is fine though. > > I might as well fix it since I have to make changes anyway. Don't > remember how I came up with 6. In section 3.7.7 of datasheet, there are 6 possible chip selects listed for SPI0 and 8 possible chipselects for SPI1. If you are fixing SPI1, please make that a separate patch. >> Also, it will be nice to add pinctrl entries for spi0 like it is done >> for spi1. You will need those anyway for using the interface. > > I omitted this on purpose. For my use case, I am using the SPI as > write-only, so not using the SOMI pin, which is actually muxed as a GPIO > for something else. So having a pinctl like spi1 is of no use to me. I > figured if someone needs it, they can add it, otherwise it just is > wasted space to me. Alright, makes sense. Regards, Sekhar
RE: [PATCHv2] wlcore: spi: add wl18xx support
> > > > - all wilink family needs special init command for entering wspi mode. > > extra clock cycles should be sent after the spi init command while the > > cs pin is high. > > - switch to controling the cs pin from the spi driver for achieveing the > > above. > > - the selected cs gpio is read from the spi device-tree node using the > > cs-gpios field and setup as a gpio. > > - See the example below for specifying the cs gpio using the cs-gpios entry > >{ > > ... > > cs-gpios = < 5 0>; > > ... > > wlcore: wlcore@0 { > > compatible = "ti,wl1835"; > > ... > > ... > > }; > > }; > > > > Signed-off-by: Eyal Reizer> > I don't think this can work in general: not all SPI hosts uses GPIOs for > controlling CS, so the logic can't work, and it's also a layering violation > for the > driver to look at the parent. > > I would suggest fixing this using a new API function from the SPI core, if we > don't already have a generic way to do it. > Originally this is what I have done until I was pointed to the generic cs-gpio mechanism in the SPI core. It is a generic mechanism already in the SPI core driver. See: Documentation/devicetree/bindings/spi/spi-bus.txt It is also part of the generic spi.h (include/Linux/spi/spi.h), already part of " struct spi_device" So it seemed redundant adding another mechanism for implementing the same. Platform that interact with a wilink need to use it, and platforms that don't have this capability will probably not interact with a wilink device using SPI. Best Regards, Eyal
RE: [PATCHv2] wlcore: spi: add wl18xx support
> > > > - all wilink family needs special init command for entering wspi mode. > > extra clock cycles should be sent after the spi init command while the > > cs pin is high. > > - switch to controling the cs pin from the spi driver for achieveing the > > above. > > - the selected cs gpio is read from the spi device-tree node using the > > cs-gpios field and setup as a gpio. > > - See the example below for specifying the cs gpio using the cs-gpios entry > >{ > > ... > > cs-gpios = < 5 0>; > > ... > > wlcore: wlcore@0 { > > compatible = "ti,wl1835"; > > ... > > ... > > }; > > }; > > > > Signed-off-by: Eyal Reizer > > I don't think this can work in general: not all SPI hosts uses GPIOs for > controlling CS, so the logic can't work, and it's also a layering violation > for the > driver to look at the parent. > > I would suggest fixing this using a new API function from the SPI core, if we > don't already have a generic way to do it. > Originally this is what I have done until I was pointed to the generic cs-gpio mechanism in the SPI core. It is a generic mechanism already in the SPI core driver. See: Documentation/devicetree/bindings/spi/spi-bus.txt It is also part of the generic spi.h (include/Linux/spi/spi.h), already part of " struct spi_device" So it seemed redundant adding another mechanism for implementing the same. Platform that interact with a wilink need to use it, and platforms that don't have this capability will probably not interact with a wilink device using SPI. Best Regards, Eyal
[PATCH v2 1/3] PCI: imx6: Use enum instead of bool for variant indicator
Use enumerated type instead of a boolean flag to specify the variant of the PCIe IP block (6Q, 6SX, etc). This patch has zero functional impact, however it makes the code easier to extend for the case of more than 2 possible variants of an IP block (of which there are). Signed-off-by: Andrey Smirnov--- Changes since v1: - Patchset is rebased against https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/log/?h=pci/host-imx6 - DTS files changes moved into a separate patch drivers/pci/host/pci-imx6.c | 126 +--- 1 file changed, 71 insertions(+), 55 deletions(-) diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c index 0f6d630..c570bbb 100644 --- a/drivers/pci/host/pci-imx6.c +++ b/drivers/pci/host/pci-imx6.c @@ -31,6 +31,11 @@ #define to_imx6_pcie(x)container_of(x, struct imx6_pcie, pp) +enum imx6_pcie_variants { + IMX6Q, + IMX6SX +}; + struct imx6_pcie { struct gpio_desc*reset_gpio; struct clk *pcie_bus; @@ -39,7 +44,7 @@ struct imx6_pcie { struct clk *pcie; struct pcie_portpp; struct regmap *iomuxc_gpr; - boolis_imx6sx; + enum imx6_pcie_variants variant; void __iomem*mem_base; u32 tx_deemph_gen1; u32 tx_deemph_gen2_3p5db; @@ -238,7 +243,8 @@ static int imx6_pcie_assert_core_reset(struct pcie_port *pp) struct imx6_pcie *imx6_pcie = to_imx6_pcie(pp); u32 val, gpr1, gpr12; - if (imx6_pcie->is_imx6sx) { + switch (imx6_pcie->variant) { + case IMX6SX: regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, IMX6SX_GPR12_PCIE_TEST_POWERDOWN, IMX6SX_GPR12_PCIE_TEST_POWERDOWN); @@ -246,72 +252,80 @@ static int imx6_pcie_assert_core_reset(struct pcie_port *pp) regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR5, IMX6SX_GPR5_PCIE_BTNRST_RESET, IMX6SX_GPR5_PCIE_BTNRST_RESET); - return 0; - } - - /* -* If the bootloader already enabled the link we need some special -* handling to get the core back into a state where it is safe to -* touch it for configuration. As there is no dedicated reset signal -* wired up for MX6QDL, we need to manually force LTSSM into "detect" -* state before completely disabling LTSSM, which is a prerequisite -* for core configuration. -* -* If both LTSSM_ENABLE and REF_SSP_ENABLE are active we have a strong -* indication that the bootloader activated the link. -*/ - regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, ); - regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, ); - - if ((gpr1 & IMX6Q_GPR1_PCIE_REF_CLK_EN) && - (gpr12 & IMX6Q_GPR12_PCIE_CTL_2)) { - val = readl(pp->dbi_base + PCIE_PL_PFLR); - val &= ~PCIE_PL_PFLR_LINK_STATE_MASK; - val |= PCIE_PL_PFLR_FORCE_LINK; - writel(val, pp->dbi_base + PCIE_PL_PFLR); + break; + case IMX6Q: + /* +* If the bootloader already enabled the link we need some special +* handling to get the core back into a state where it is safe to +* touch it for configuration. As there is no dedicated reset signal +* wired up for MX6QDL, we need to manually force LTSSM into "detect" +* state before completely disabling LTSSM, which is a prerequisite +* for core configuration. +* +* If both LTSSM_ENABLE and REF_SSP_ENABLE are active we have a strong +* indication that the bootloader activated the link. +*/ + regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, ); + regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, ); + + if ((gpr1 & IMX6Q_GPR1_PCIE_REF_CLK_EN) && + (gpr12 & IMX6Q_GPR12_PCIE_CTL_2)) { + val = readl(pp->dbi_base + PCIE_PL_PFLR); + val &= ~PCIE_PL_PFLR_LINK_STATE_MASK; + val |= PCIE_PL_PFLR_FORCE_LINK; + writel(val, pp->dbi_base + PCIE_PL_PFLR); + + regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, + IMX6Q_GPR12_PCIE_CTL_2, 0 << 10); + } - regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, - IMX6Q_GPR12_PCIE_CTL_2, 0 << 10); + regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, +
[PATCH v2 3/3] ARM: dts: imx6qp: Specify imx6qp version of PCIe core
I.MX6Quad Plus has a slightly different version of PCIe core than reqular i.MX6Quad. Signed-off-by: Andrey Smirnov--- Changes since v1: - Patchset is rebased against https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/log/?h=pci/host-imx6 - DTS files changes moved into a separate patch arch/arm/boot/dts/imx6qp.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/imx6qp.dtsi b/arch/arm/boot/dts/imx6qp.dtsi index 1ada714..886dbf2 100644 --- a/arch/arm/boot/dts/imx6qp.dtsi +++ b/arch/arm/boot/dts/imx6qp.dtsi @@ -82,5 +82,8 @@ "ldb_di0", "ldb_di1", "prg"; }; + pcie: pcie@0x0100 { + compatible = "fsl,imx6qp-pcie", "snps,dw-pcie"; + }; }; }; -- 2.5.5
[PATCH v2 3/3] ARM: dts: imx6qp: Specify imx6qp version of PCIe core
I.MX6Quad Plus has a slightly different version of PCIe core than reqular i.MX6Quad. Signed-off-by: Andrey Smirnov --- Changes since v1: - Patchset is rebased against https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/log/?h=pci/host-imx6 - DTS files changes moved into a separate patch arch/arm/boot/dts/imx6qp.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/imx6qp.dtsi b/arch/arm/boot/dts/imx6qp.dtsi index 1ada714..886dbf2 100644 --- a/arch/arm/boot/dts/imx6qp.dtsi +++ b/arch/arm/boot/dts/imx6qp.dtsi @@ -82,5 +82,8 @@ "ldb_di0", "ldb_di1", "prg"; }; + pcie: pcie@0x0100 { + compatible = "fsl,imx6qp-pcie", "snps,dw-pcie"; + }; }; }; -- 2.5.5
[PATCH v2 1/3] PCI: imx6: Use enum instead of bool for variant indicator
Use enumerated type instead of a boolean flag to specify the variant of the PCIe IP block (6Q, 6SX, etc). This patch has zero functional impact, however it makes the code easier to extend for the case of more than 2 possible variants of an IP block (of which there are). Signed-off-by: Andrey Smirnov --- Changes since v1: - Patchset is rebased against https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/log/?h=pci/host-imx6 - DTS files changes moved into a separate patch drivers/pci/host/pci-imx6.c | 126 +--- 1 file changed, 71 insertions(+), 55 deletions(-) diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c index 0f6d630..c570bbb 100644 --- a/drivers/pci/host/pci-imx6.c +++ b/drivers/pci/host/pci-imx6.c @@ -31,6 +31,11 @@ #define to_imx6_pcie(x)container_of(x, struct imx6_pcie, pp) +enum imx6_pcie_variants { + IMX6Q, + IMX6SX +}; + struct imx6_pcie { struct gpio_desc*reset_gpio; struct clk *pcie_bus; @@ -39,7 +44,7 @@ struct imx6_pcie { struct clk *pcie; struct pcie_portpp; struct regmap *iomuxc_gpr; - boolis_imx6sx; + enum imx6_pcie_variants variant; void __iomem*mem_base; u32 tx_deemph_gen1; u32 tx_deemph_gen2_3p5db; @@ -238,7 +243,8 @@ static int imx6_pcie_assert_core_reset(struct pcie_port *pp) struct imx6_pcie *imx6_pcie = to_imx6_pcie(pp); u32 val, gpr1, gpr12; - if (imx6_pcie->is_imx6sx) { + switch (imx6_pcie->variant) { + case IMX6SX: regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, IMX6SX_GPR12_PCIE_TEST_POWERDOWN, IMX6SX_GPR12_PCIE_TEST_POWERDOWN); @@ -246,72 +252,80 @@ static int imx6_pcie_assert_core_reset(struct pcie_port *pp) regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR5, IMX6SX_GPR5_PCIE_BTNRST_RESET, IMX6SX_GPR5_PCIE_BTNRST_RESET); - return 0; - } - - /* -* If the bootloader already enabled the link we need some special -* handling to get the core back into a state where it is safe to -* touch it for configuration. As there is no dedicated reset signal -* wired up for MX6QDL, we need to manually force LTSSM into "detect" -* state before completely disabling LTSSM, which is a prerequisite -* for core configuration. -* -* If both LTSSM_ENABLE and REF_SSP_ENABLE are active we have a strong -* indication that the bootloader activated the link. -*/ - regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, ); - regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, ); - - if ((gpr1 & IMX6Q_GPR1_PCIE_REF_CLK_EN) && - (gpr12 & IMX6Q_GPR12_PCIE_CTL_2)) { - val = readl(pp->dbi_base + PCIE_PL_PFLR); - val &= ~PCIE_PL_PFLR_LINK_STATE_MASK; - val |= PCIE_PL_PFLR_FORCE_LINK; - writel(val, pp->dbi_base + PCIE_PL_PFLR); + break; + case IMX6Q: + /* +* If the bootloader already enabled the link we need some special +* handling to get the core back into a state where it is safe to +* touch it for configuration. As there is no dedicated reset signal +* wired up for MX6QDL, we need to manually force LTSSM into "detect" +* state before completely disabling LTSSM, which is a prerequisite +* for core configuration. +* +* If both LTSSM_ENABLE and REF_SSP_ENABLE are active we have a strong +* indication that the bootloader activated the link. +*/ + regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, ); + regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, ); + + if ((gpr1 & IMX6Q_GPR1_PCIE_REF_CLK_EN) && + (gpr12 & IMX6Q_GPR12_PCIE_CTL_2)) { + val = readl(pp->dbi_base + PCIE_PL_PFLR); + val &= ~PCIE_PL_PFLR_LINK_STATE_MASK; + val |= PCIE_PL_PFLR_FORCE_LINK; + writel(val, pp->dbi_base + PCIE_PL_PFLR); + + regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, + IMX6Q_GPR12_PCIE_CTL_2, 0 << 10); + } - regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, - IMX6Q_GPR12_PCIE_CTL_2, 0 << 10); + regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, + IMX6Q_GPR1_PCIE_TEST_PD, 1 << 18); +
[PATCH] sched/cpufreq: don't trigger cpufreq update w/o real rt/deadline tasks running
Sometimes update_curr() is called w/o tasks actually running, it is captured by: u64 delta_exec = rq_clock_task(rq) - curr->se.exec_start; We should not trigger cpufreq update in this case for rt/deadline classes, and this patch fix it. Signed-off-by: Wanpeng Li--- kernel/sched/deadline.c | 8 kernel/sched/rt.c | 8 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index affd97e..8f9b5af 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -717,10 +717,6 @@ static void update_curr_dl(struct rq *rq) if (!dl_task(curr) || !on_dl_rq(dl_se)) return; - /* Kick cpufreq (see the comment in linux/cpufreq.h). */ - if (cpu_of(rq) == smp_processor_id()) - cpufreq_trigger_update(rq_clock(rq)); - /* * Consumed budget is computed considering the time as * observed by schedulable tasks (excluding time spent @@ -736,6 +732,10 @@ static void update_curr_dl(struct rq *rq) return; } + /* kick cpufreq (see the comment in linux/cpufreq.h). */ + if (cpu_of(rq) == smp_processor_id()) + cpufreq_trigger_update(rq_clock(rq)); + schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index c41ea7a..19e1306 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -953,14 +953,14 @@ static void update_curr_rt(struct rq *rq) if (curr->sched_class != _sched_class) return; - /* Kick cpufreq (see the comment in linux/cpufreq.h). */ - if (cpu_of(rq) == smp_processor_id()) - cpufreq_trigger_update(rq_clock(rq)); - delta_exec = rq_clock_task(rq) - curr->se.exec_start; if (unlikely((s64)delta_exec <= 0)) return; + /* Kick cpufreq (see the comment in linux/cpufreq.h). */ + if (cpu_of(rq) == smp_processor_id()) + cpufreq_trigger_update(rq_clock(rq)); + schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); -- 1.9.1
[PATCH] sched/cpufreq: don't trigger cpufreq update w/o real rt/deadline tasks running
Sometimes update_curr() is called w/o tasks actually running, it is captured by: u64 delta_exec = rq_clock_task(rq) - curr->se.exec_start; We should not trigger cpufreq update in this case for rt/deadline classes, and this patch fix it. Signed-off-by: Wanpeng Li --- kernel/sched/deadline.c | 8 kernel/sched/rt.c | 8 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index affd97e..8f9b5af 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -717,10 +717,6 @@ static void update_curr_dl(struct rq *rq) if (!dl_task(curr) || !on_dl_rq(dl_se)) return; - /* Kick cpufreq (see the comment in linux/cpufreq.h). */ - if (cpu_of(rq) == smp_processor_id()) - cpufreq_trigger_update(rq_clock(rq)); - /* * Consumed budget is computed considering the time as * observed by schedulable tasks (excluding time spent @@ -736,6 +732,10 @@ static void update_curr_dl(struct rq *rq) return; } + /* kick cpufreq (see the comment in linux/cpufreq.h). */ + if (cpu_of(rq) == smp_processor_id()) + cpufreq_trigger_update(rq_clock(rq)); + schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index c41ea7a..19e1306 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -953,14 +953,14 @@ static void update_curr_rt(struct rq *rq) if (curr->sched_class != _sched_class) return; - /* Kick cpufreq (see the comment in linux/cpufreq.h). */ - if (cpu_of(rq) == smp_processor_id()) - cpufreq_trigger_update(rq_clock(rq)); - delta_exec = rq_clock_task(rq) - curr->se.exec_start; if (unlikely((s64)delta_exec <= 0)) return; + /* Kick cpufreq (see the comment in linux/cpufreq.h). */ + if (cpu_of(rq) == smp_processor_id()) + cpufreq_trigger_update(rq_clock(rq)); + schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); -- 1.9.1
[PATCH v2 2/3] PCI: imx6: Implement reset sequence for i.MX6+
I.MX6+ has a dedicated bit for reseting PCIe core, which should be used instead of a regular reset sequence since using the latter will hang the SoC. This commit is based on c34068d48273e24d392d9a49a38be807954420ed from http://git.freescale.com/git/cgit.cgi/imx/linux-2.6-imx.git Signed-off-by: Andrey Smirnov--- Changes since v1: - Patchset is rebased against https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/log/?h=pci/host-imx6 - DTS files changes moved into a separate patch drivers/pci/host/pci-imx6.c | 28 ++-- include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 1 + 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c index c570bbb..834c5b8 100644 --- a/drivers/pci/host/pci-imx6.c +++ b/drivers/pci/host/pci-imx6.c @@ -33,7 +33,8 @@ enum imx6_pcie_variants { IMX6Q, - IMX6SX + IMX6SX, + IMX6QP, }; struct imx6_pcie { @@ -253,6 +254,11 @@ static int imx6_pcie_assert_core_reset(struct pcie_port *pp) IMX6SX_GPR5_PCIE_BTNRST_RESET, IMX6SX_GPR5_PCIE_BTNRST_RESET); break; + case IMX6QP: + regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, + IMX6Q_GPR1_PCIE_SW_RST, + IMX6Q_GPR1_PCIE_SW_RST); + break; case IMX6Q: /* * If the bootloader already enabled the link we need some special @@ -307,6 +313,7 @@ static int imx6_pcie_enable_ref_clk(struct imx6_pcie *imx6_pcie) regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, IMX6SX_GPR12_PCIE_TEST_POWERDOWN, 0); break; + case IMX6QP:/* FALLTHROUGH */ case IMX6Q: /* power up core phy and enable ref clock */ regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, @@ -367,9 +374,22 @@ static int imx6_pcie_deassert_core_reset(struct pcie_port *pp) gpiod_set_value_cansleep(imx6_pcie->reset_gpio, 1); } - if (imx6_pcie->variant == IMX6SX) + switch (imx6_pcie->variant) { + case IMX6SX: regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR5, IMX6SX_GPR5_PCIE_BTNRST_RESET, 0); + break; + case IMX6QP: + regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, + IMX6Q_GPR1_PCIE_SW_RST, 0); + + usleep_range(200, 500); + break; + case IMX6Q: /* Nothing to do */ + break; + default: + BUG(); + } return 0; @@ -601,6 +621,9 @@ static int __init imx6_pcie_probe(struct platform_device *pdev) if (of_device_is_compatible(pp->dev->of_node, "fsl,imx6sx-pcie")) imx6_pcie->variant = IMX6SX; + else if (of_device_is_compatible(pp->dev->of_node, +"fsl,imx6qp-pcie")) + imx6_pcie->variant = IMX6QP; else imx6_pcie->variant = IMX6Q; @@ -697,6 +720,7 @@ static void imx6_pcie_shutdown(struct platform_device *pdev) static const struct of_device_id imx6_pcie_of_match[] = { { .compatible = "fsl,imx6q-pcie", }, { .compatible = "fsl,imx6sx-pcie", }, + { .compatible = "fsl,imx6qp-pcie", }, {}, }; MODULE_DEVICE_TABLE(of, imx6_pcie_of_match); diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h index 238c8db..5b08e3c 100644 --- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h @@ -95,6 +95,7 @@ #define IMX6Q_GPR0_DMAREQ_MUX_SEL0_IOMUX BIT(0) #define IMX6Q_GPR1_PCIE_REQ_MASK (0x3 << 30) +#define IMX6Q_GPR1_PCIE_SW_RST BIT(29) #define IMX6Q_GPR1_PCIE_EXIT_L1BIT(28) #define IMX6Q_GPR1_PCIE_RDY_L23BIT(27) #define IMX6Q_GPR1_PCIE_ENTER_L1 BIT(26) -- 2.5.5
Re: [patch] bnx2i: silence uninitialized variable warnings
On 14/04/16 3:10 PM, "Dan Carpenter"wrote: >Presumably it isn't possible to have empty lists here, but my static >checker doesn't know that and complains that "ep" can be used >uninitialized. > >Signed-off-by: Dan Carpenter > >diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c >b/drivers/scsi/bnx2i/bnx2i_iscsi.c >index 7289437..133901f 100644 >--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c >+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c >@@ -675,7 +675,7 @@ bnx2i_find_ep_in_ofld_list(struct bnx2i_hba *hba, u32 >iscsi_cid) > { > struct list_head *list; > struct list_head *tmp; >- struct bnx2i_endpoint *ep; >+ struct bnx2i_endpoint *ep = NULL; > > read_lock_bh(>ep_rdwr_lock); > list_for_each_safe(list, tmp, >ep_ofld_list) { >@@ -703,7 +703,7 @@ bnx2i_find_ep_in_destroy_list(struct bnx2i_hba *hba, >u32 iscsi_cid) > { > struct list_head *list; > struct list_head *tmp; >- struct bnx2i_endpoint *ep; >+ struct bnx2i_endpoint *ep = NULL; > > read_lock_bh(>ep_rdwr_lock); > list_for_each_safe(list, tmp, >ep_destroy_list) { Acked-by: Nilesh Javali
[PATCH v2 2/3] PCI: imx6: Implement reset sequence for i.MX6+
I.MX6+ has a dedicated bit for reseting PCIe core, which should be used instead of a regular reset sequence since using the latter will hang the SoC. This commit is based on c34068d48273e24d392d9a49a38be807954420ed from http://git.freescale.com/git/cgit.cgi/imx/linux-2.6-imx.git Signed-off-by: Andrey Smirnov --- Changes since v1: - Patchset is rebased against https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/log/?h=pci/host-imx6 - DTS files changes moved into a separate patch drivers/pci/host/pci-imx6.c | 28 ++-- include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 1 + 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c index c570bbb..834c5b8 100644 --- a/drivers/pci/host/pci-imx6.c +++ b/drivers/pci/host/pci-imx6.c @@ -33,7 +33,8 @@ enum imx6_pcie_variants { IMX6Q, - IMX6SX + IMX6SX, + IMX6QP, }; struct imx6_pcie { @@ -253,6 +254,11 @@ static int imx6_pcie_assert_core_reset(struct pcie_port *pp) IMX6SX_GPR5_PCIE_BTNRST_RESET, IMX6SX_GPR5_PCIE_BTNRST_RESET); break; + case IMX6QP: + regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, + IMX6Q_GPR1_PCIE_SW_RST, + IMX6Q_GPR1_PCIE_SW_RST); + break; case IMX6Q: /* * If the bootloader already enabled the link we need some special @@ -307,6 +313,7 @@ static int imx6_pcie_enable_ref_clk(struct imx6_pcie *imx6_pcie) regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, IMX6SX_GPR12_PCIE_TEST_POWERDOWN, 0); break; + case IMX6QP:/* FALLTHROUGH */ case IMX6Q: /* power up core phy and enable ref clock */ regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, @@ -367,9 +374,22 @@ static int imx6_pcie_deassert_core_reset(struct pcie_port *pp) gpiod_set_value_cansleep(imx6_pcie->reset_gpio, 1); } - if (imx6_pcie->variant == IMX6SX) + switch (imx6_pcie->variant) { + case IMX6SX: regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR5, IMX6SX_GPR5_PCIE_BTNRST_RESET, 0); + break; + case IMX6QP: + regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, + IMX6Q_GPR1_PCIE_SW_RST, 0); + + usleep_range(200, 500); + break; + case IMX6Q: /* Nothing to do */ + break; + default: + BUG(); + } return 0; @@ -601,6 +621,9 @@ static int __init imx6_pcie_probe(struct platform_device *pdev) if (of_device_is_compatible(pp->dev->of_node, "fsl,imx6sx-pcie")) imx6_pcie->variant = IMX6SX; + else if (of_device_is_compatible(pp->dev->of_node, +"fsl,imx6qp-pcie")) + imx6_pcie->variant = IMX6QP; else imx6_pcie->variant = IMX6Q; @@ -697,6 +720,7 @@ static void imx6_pcie_shutdown(struct platform_device *pdev) static const struct of_device_id imx6_pcie_of_match[] = { { .compatible = "fsl,imx6q-pcie", }, { .compatible = "fsl,imx6sx-pcie", }, + { .compatible = "fsl,imx6qp-pcie", }, {}, }; MODULE_DEVICE_TABLE(of, imx6_pcie_of_match); diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h index 238c8db..5b08e3c 100644 --- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h @@ -95,6 +95,7 @@ #define IMX6Q_GPR0_DMAREQ_MUX_SEL0_IOMUX BIT(0) #define IMX6Q_GPR1_PCIE_REQ_MASK (0x3 << 30) +#define IMX6Q_GPR1_PCIE_SW_RST BIT(29) #define IMX6Q_GPR1_PCIE_EXIT_L1BIT(28) #define IMX6Q_GPR1_PCIE_RDY_L23BIT(27) #define IMX6Q_GPR1_PCIE_ENTER_L1 BIT(26) -- 2.5.5
Re: [patch] bnx2i: silence uninitialized variable warnings
On 14/04/16 3:10 PM, "Dan Carpenter" wrote: >Presumably it isn't possible to have empty lists here, but my static >checker doesn't know that and complains that "ep" can be used >uninitialized. > >Signed-off-by: Dan Carpenter > >diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c >b/drivers/scsi/bnx2i/bnx2i_iscsi.c >index 7289437..133901f 100644 >--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c >+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c >@@ -675,7 +675,7 @@ bnx2i_find_ep_in_ofld_list(struct bnx2i_hba *hba, u32 >iscsi_cid) > { > struct list_head *list; > struct list_head *tmp; >- struct bnx2i_endpoint *ep; >+ struct bnx2i_endpoint *ep = NULL; > > read_lock_bh(>ep_rdwr_lock); > list_for_each_safe(list, tmp, >ep_ofld_list) { >@@ -703,7 +703,7 @@ bnx2i_find_ep_in_destroy_list(struct bnx2i_hba *hba, >u32 iscsi_cid) > { > struct list_head *list; > struct list_head *tmp; >- struct bnx2i_endpoint *ep; >+ struct bnx2i_endpoint *ep = NULL; > > read_lock_bh(>ep_rdwr_lock); > list_for_each_safe(list, tmp, >ep_destroy_list) { Acked-by: Nilesh Javali
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On Sun, Apr 17, 2016 at 10:45 PM, H. Peter Anvinwrote: > On 04/17/16 22:39, Andy Lutomirski wrote: >>> >>> I'm reasonably confident they have, because we have had security bugs >>> TWICE when someone has tried to "optimize" the code. The masking was >>> generally done with a movl instruction, which confused people. >>> So the type of the syscall nr is a bit confused. If there was an installed base of programs that leaved garbage in the high bits, we would have noticed *years* ago. On the other hand, the 32-bit ptrace ABI and the seccomp ABI both think it's 32-bits. >>> >>> Incorrect. We have seen these failures in real life. >> >> What kind of failure? Programs that accidentally set rax to >> 0xbaadf00d0003 get -ENOSYS in most cases, not close(). If we'd >> broken programs like this, I assume we would have had to fix it a long >> time ago. >> If we were designing the x86_64 ABI and everything around it from scratch, I'd suggest that that either the high bits must be zero or that the number actually be 64 bits (which are more or less the same thing). That would let us use the high bits for something interesting in the future. >>> >>> Not really all that useful. What we have is a C ABI. >> >> And we've already stolen a bit once for x32. Maybe we'll want more. >> For example, if we added a cancellable bit, if x86_32 didn't want it, >> we could steal a high bit for ie. >> > > I think we're worrying about the wrong thing here... we skipped bit 31 > to avoid signedness issues, and with bit 30 for x32 we now "only" have > 20 bits that haven't been used for anything at all. > >>> In practice, we can probably still declare that the thing is a 64-bit number, given that most kernels in the wild currently fail syscalls that have the high bits set. >>> >>> They don't, and we can prove it... >> >> I'm confused. >> >> asm volatile ("syscall" : >> "=a" (ret) : >> "a" (SYS_getpid | 0xbaadf00dULL) : >> "memory", "cc", "rcx", "r11"); >> >> gets -ENOSYS on the kernel I'm running on my laptop and on Fedora 23's >> stock kernel. >> >> I'm not terribly worried about nasty security issues in here because >> all the nasty stuff is in C now. >> >> What kernel had the other behavior? In 2.6.11, I see: >> >> ENTRY(system_call) >> CFI_STARTPROC >> swapgs >> movq%rsp,%gs:pda_oldrsp >> movq%gs:pda_kernelstack,%rsp >> sti >> SAVE_ARGS 8,1 >> movq %rax,ORIG_RAX-ARGOFFSET(%rsp) >> movq %rcx,RIP-ARGOFFSET(%rsp) >> GET_THREAD_INFO(%rcx) >> testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) >> jnz tracesys >> cmpq $__NR_syscall_max,%rax >> > > I can't remember what versions. What I do know is that this was a bug > which was introduced, fixed, re-introduced, and fixed again, and both > resulted in CVEs. The fact that you're seeing the cmpq indicates that > it at least was not one of the security-buggy kernels. > > I do agree we should make the behavior consistent, and follow the > documented behavior of treating the syscall argument as an int. > I think I prefer the "reject weird input" behavior over the "accept and normalize weird input" if we can get away with it, and I'm fairly confident that we can get away with "reject weird input" given that distro kernels do exactly that already. So I like Ben's patch. --Andy > -hpa > > -- Andy Lutomirski AMA Capital Management, LLC
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On Sun, Apr 17, 2016 at 10:45 PM, H. Peter Anvin wrote: > On 04/17/16 22:39, Andy Lutomirski wrote: >>> >>> I'm reasonably confident they have, because we have had security bugs >>> TWICE when someone has tried to "optimize" the code. The masking was >>> generally done with a movl instruction, which confused people. >>> So the type of the syscall nr is a bit confused. If there was an installed base of programs that leaved garbage in the high bits, we would have noticed *years* ago. On the other hand, the 32-bit ptrace ABI and the seccomp ABI both think it's 32-bits. >>> >>> Incorrect. We have seen these failures in real life. >> >> What kind of failure? Programs that accidentally set rax to >> 0xbaadf00d0003 get -ENOSYS in most cases, not close(). If we'd >> broken programs like this, I assume we would have had to fix it a long >> time ago. >> If we were designing the x86_64 ABI and everything around it from scratch, I'd suggest that that either the high bits must be zero or that the number actually be 64 bits (which are more or less the same thing). That would let us use the high bits for something interesting in the future. >>> >>> Not really all that useful. What we have is a C ABI. >> >> And we've already stolen a bit once for x32. Maybe we'll want more. >> For example, if we added a cancellable bit, if x86_32 didn't want it, >> we could steal a high bit for ie. >> > > I think we're worrying about the wrong thing here... we skipped bit 31 > to avoid signedness issues, and with bit 30 for x32 we now "only" have > 20 bits that haven't been used for anything at all. > >>> In practice, we can probably still declare that the thing is a 64-bit number, given that most kernels in the wild currently fail syscalls that have the high bits set. >>> >>> They don't, and we can prove it... >> >> I'm confused. >> >> asm volatile ("syscall" : >> "=a" (ret) : >> "a" (SYS_getpid | 0xbaadf00dULL) : >> "memory", "cc", "rcx", "r11"); >> >> gets -ENOSYS on the kernel I'm running on my laptop and on Fedora 23's >> stock kernel. >> >> I'm not terribly worried about nasty security issues in here because >> all the nasty stuff is in C now. >> >> What kernel had the other behavior? In 2.6.11, I see: >> >> ENTRY(system_call) >> CFI_STARTPROC >> swapgs >> movq%rsp,%gs:pda_oldrsp >> movq%gs:pda_kernelstack,%rsp >> sti >> SAVE_ARGS 8,1 >> movq %rax,ORIG_RAX-ARGOFFSET(%rsp) >> movq %rcx,RIP-ARGOFFSET(%rsp) >> GET_THREAD_INFO(%rcx) >> testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) >> jnz tracesys >> cmpq $__NR_syscall_max,%rax >> > > I can't remember what versions. What I do know is that this was a bug > which was introduced, fixed, re-introduced, and fixed again, and both > resulted in CVEs. The fact that you're seeing the cmpq indicates that > it at least was not one of the security-buggy kernels. > > I do agree we should make the behavior consistent, and follow the > documented behavior of treating the syscall argument as an int. > I think I prefer the "reject weird input" behavior over the "accept and normalize weird input" if we can get away with it, and I'm fairly confident that we can get away with "reject weird input" given that distro kernels do exactly that already. So I like Ben's patch. --Andy > -hpa > > -- Andy Lutomirski AMA Capital Management, LLC
Re: [PATCH v10 0/4] Introduce usb charger framework to deal with the usb gadget power negotation
Hi Felipe, What do you think of this version patchset? Could you review this patchset when you feel free? I really hope to move the usb charger forwards with suggestions. Thanks. On 7 April 2016 at 19:48, Baolin Wangwrote: > Currently the Linux kernel does not provide any standard integration of this > feature that integrates the USB subsystem with the system power regulation > provided by PMICs meaning that either vendors must add this in their kernels > or USB gadget devices based on Linux (such as mobile phones) may not behave > as they should. Thus provide a standard framework for doing this in kernel. > > Now introduce one user with wm831x_power to support and test the usb charger, > which is pending testing. Moreover there may be other potential users will use > it in future. > > Changes since v9: > - Remove some redundant sysfs attributes. > - Change the SDP charger default current if gadget is SS. > - Remove the 'get_charger_type' callback in gadget->ops. > > Baolin Wang (4): > gadget: Introduce the usb charger framework > gadget: Support for the usb charger framework > gadget: Integrate with the usb gadget supporting for usb charger > power: wm831x_power: Support USB charger current limit management > > drivers/power/wm831x_power.c | 69 > drivers/usb/gadget/Kconfig|7 + > drivers/usb/gadget/udc/Makefile |1 + > drivers/usb/gadget/udc/charger.c | 766 > + > drivers/usb/gadget/udc/udc-core.c | 11 + > include/linux/mfd/wm831x/pdata.h |3 + > include/linux/usb/charger.h | 173 + > include/linux/usb/gadget.h| 13 + > include/uapi/linux/usb/charger.h | 31 ++ > 9 files changed, 1074 insertions(+) > create mode 100644 drivers/usb/gadget/udc/charger.c > create mode 100644 include/linux/usb/charger.h > create mode 100644 include/uapi/linux/usb/charger.h > > -- > 1.7.9.5 > -- Baolin.wang Best Regards
Re: [PATCH v10 0/4] Introduce usb charger framework to deal with the usb gadget power negotation
Hi Felipe, What do you think of this version patchset? Could you review this patchset when you feel free? I really hope to move the usb charger forwards with suggestions. Thanks. On 7 April 2016 at 19:48, Baolin Wang wrote: > Currently the Linux kernel does not provide any standard integration of this > feature that integrates the USB subsystem with the system power regulation > provided by PMICs meaning that either vendors must add this in their kernels > or USB gadget devices based on Linux (such as mobile phones) may not behave > as they should. Thus provide a standard framework for doing this in kernel. > > Now introduce one user with wm831x_power to support and test the usb charger, > which is pending testing. Moreover there may be other potential users will use > it in future. > > Changes since v9: > - Remove some redundant sysfs attributes. > - Change the SDP charger default current if gadget is SS. > - Remove the 'get_charger_type' callback in gadget->ops. > > Baolin Wang (4): > gadget: Introduce the usb charger framework > gadget: Support for the usb charger framework > gadget: Integrate with the usb gadget supporting for usb charger > power: wm831x_power: Support USB charger current limit management > > drivers/power/wm831x_power.c | 69 > drivers/usb/gadget/Kconfig|7 + > drivers/usb/gadget/udc/Makefile |1 + > drivers/usb/gadget/udc/charger.c | 766 > + > drivers/usb/gadget/udc/udc-core.c | 11 + > include/linux/mfd/wm831x/pdata.h |3 + > include/linux/usb/charger.h | 173 + > include/linux/usb/gadget.h| 13 + > include/uapi/linux/usb/charger.h | 31 ++ > 9 files changed, 1074 insertions(+) > create mode 100644 drivers/usb/gadget/udc/charger.c > create mode 100644 include/linux/usb/charger.h > create mode 100644 include/uapi/linux/usb/charger.h > > -- > 1.7.9.5 > -- Baolin.wang Best Regards
Re: [PATCH v2 0/4] Introduce bulk mode for crypto engine framework
On Mon, Apr 18, 2016 at 01:31:09PM +0800, Baolin Wang wrote: > > We've tried to do this in dm-crypt, but it failed. > The dm-crypt maintainer explained to me that I should optimize the > driver, not add strange hw-dependent crypto modes to dm-crypt, this is > not the first crypto accelerator that is just not suited for this kind > of use. > He thought if it can process batch of chunks of data each with own IV, > then it can work with dm-crypt, but he thought such optimized code > should be inside crypto API, not in dmcrypt. That's a completely bogus argument. The user always has more information available than the underlying API. So it is totally stupid to have the API try to extract information that the user could have provided in the first place. I'm not taking this patch-set. Cheers, -- Email: Herbert XuHome Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH v2 0/4] Introduce bulk mode for crypto engine framework
On Mon, Apr 18, 2016 at 01:31:09PM +0800, Baolin Wang wrote: > > We've tried to do this in dm-crypt, but it failed. > The dm-crypt maintainer explained to me that I should optimize the > driver, not add strange hw-dependent crypto modes to dm-crypt, this is > not the first crypto accelerator that is just not suited for this kind > of use. > He thought if it can process batch of chunks of data each with own IV, > then it can work with dm-crypt, but he thought such optimized code > should be inside crypto API, not in dmcrypt. That's a completely bogus argument. The user always has more information available than the underlying API. So it is totally stupid to have the API try to extract information that the user could have provided in the first place. I'm not taking this patch-set. Cheers, -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On 04/17/16 22:39, Andy Lutomirski wrote: >> >> I'm reasonably confident they have, because we have had security bugs >> TWICE when someone has tried to "optimize" the code. The masking was >> generally done with a movl instruction, which confused people. >> >>> So the type of the syscall nr is a bit confused. If there was an >>> installed base of programs that leaved garbage in the high bits, we >>> would have noticed *years* ago. On the other hand, the 32-bit ptrace >>> ABI and the seccomp ABI both think it's 32-bits. >> >> Incorrect. We have seen these failures in real life. > > What kind of failure? Programs that accidentally set rax to > 0xbaadf00d0003 get -ENOSYS in most cases, not close(). If we'd > broken programs like this, I assume we would have had to fix it a long > time ago. > >>> If we were designing the x86_64 ABI and everything around it from >>> scratch, I'd suggest that that either the high bits must be zero or >>> that the number actually be 64 bits (which are more or less the same >>> thing). That would let us use the high bits for something interesting >>> in the future. >> >> Not really all that useful. What we have is a C ABI. > > And we've already stolen a bit once for x32. Maybe we'll want more. > For example, if we added a cancellable bit, if x86_32 didn't want it, > we could steal a high bit for ie. > I think we're worrying about the wrong thing here... we skipped bit 31 to avoid signedness issues, and with bit 30 for x32 we now "only" have 20 bits that haven't been used for anything at all. >> >>> In practice, we can probably still declare that the thing is a 64-bit >>> number, given that most kernels in the wild currently fail syscalls >>> that have the high bits set. >> >> They don't, and we can prove it... > > I'm confused. > > asm volatile ("syscall" : > "=a" (ret) : > "a" (SYS_getpid | 0xbaadf00dULL) : > "memory", "cc", "rcx", "r11"); > > gets -ENOSYS on the kernel I'm running on my laptop and on Fedora 23's > stock kernel. > > I'm not terribly worried about nasty security issues in here because > all the nasty stuff is in C now. > > What kernel had the other behavior? In 2.6.11, I see: > > ENTRY(system_call) > CFI_STARTPROC > swapgs > movq%rsp,%gs:pda_oldrsp > movq%gs:pda_kernelstack,%rsp > sti > SAVE_ARGS 8,1 > movq %rax,ORIG_RAX-ARGOFFSET(%rsp) > movq %rcx,RIP-ARGOFFSET(%rsp) > GET_THREAD_INFO(%rcx) > testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) > jnz tracesys > cmpq $__NR_syscall_max,%rax > I can't remember what versions. What I do know is that this was a bug which was introduced, fixed, re-introduced, and fixed again, and both resulted in CVEs. The fact that you're seeing the cmpq indicates that it at least was not one of the security-buggy kernels. I do agree we should make the behavior consistent, and follow the documented behavior of treating the syscall argument as an int. -hpa
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On 04/17/16 22:39, Andy Lutomirski wrote: >> >> I'm reasonably confident they have, because we have had security bugs >> TWICE when someone has tried to "optimize" the code. The masking was >> generally done with a movl instruction, which confused people. >> >>> So the type of the syscall nr is a bit confused. If there was an >>> installed base of programs that leaved garbage in the high bits, we >>> would have noticed *years* ago. On the other hand, the 32-bit ptrace >>> ABI and the seccomp ABI both think it's 32-bits. >> >> Incorrect. We have seen these failures in real life. > > What kind of failure? Programs that accidentally set rax to > 0xbaadf00d0003 get -ENOSYS in most cases, not close(). If we'd > broken programs like this, I assume we would have had to fix it a long > time ago. > >>> If we were designing the x86_64 ABI and everything around it from >>> scratch, I'd suggest that that either the high bits must be zero or >>> that the number actually be 64 bits (which are more or less the same >>> thing). That would let us use the high bits for something interesting >>> in the future. >> >> Not really all that useful. What we have is a C ABI. > > And we've already stolen a bit once for x32. Maybe we'll want more. > For example, if we added a cancellable bit, if x86_32 didn't want it, > we could steal a high bit for ie. > I think we're worrying about the wrong thing here... we skipped bit 31 to avoid signedness issues, and with bit 30 for x32 we now "only" have 20 bits that haven't been used for anything at all. >> >>> In practice, we can probably still declare that the thing is a 64-bit >>> number, given that most kernels in the wild currently fail syscalls >>> that have the high bits set. >> >> They don't, and we can prove it... > > I'm confused. > > asm volatile ("syscall" : > "=a" (ret) : > "a" (SYS_getpid | 0xbaadf00dULL) : > "memory", "cc", "rcx", "r11"); > > gets -ENOSYS on the kernel I'm running on my laptop and on Fedora 23's > stock kernel. > > I'm not terribly worried about nasty security issues in here because > all the nasty stuff is in C now. > > What kernel had the other behavior? In 2.6.11, I see: > > ENTRY(system_call) > CFI_STARTPROC > swapgs > movq%rsp,%gs:pda_oldrsp > movq%gs:pda_kernelstack,%rsp > sti > SAVE_ARGS 8,1 > movq %rax,ORIG_RAX-ARGOFFSET(%rsp) > movq %rcx,RIP-ARGOFFSET(%rsp) > GET_THREAD_INFO(%rcx) > testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) > jnz tracesys > cmpq $__NR_syscall_max,%rax > I can't remember what versions. What I do know is that this was a bug which was introduced, fixed, re-introduced, and fixed again, and both resulted in CVEs. The fact that you're seeing the cmpq indicates that it at least was not one of the security-buggy kernels. I do agree we should make the behavior consistent, and follow the documented behavior of treating the syscall argument as an int. -hpa
Re: [RESEND PATCH 1/3] power: charger-manager: Replace deprecatd API of extcon
Hi Sebastian, On 2016년 04월 15일 23:13, Sebastian Reichel wrote: > Hi, > > On Fri, Apr 15, 2016 at 09:43:34AM +0900, Chanwoo Choi wrote: >> This patch removes the deprecated notifier API of extcon framework and then >> use >> the new extcon API[2] with the unique id[1] to indicate the each external >> connector. Alter deprecated API as following: >> - extcon_register_interest() -> extcon_register_notifier() >> - extcon_unregister_interest() -> extcon_unregister_notifier() >> >> And, extcon alters the name of USB charger connector in patch[3] as >> following: >> - EXTCON_CHG_USB_SDP /* Standard Downstream Port */ >> - EXTCON_CHG_USB_DCP /* Dedicated Charging Port */ >> - EXTCON_CHG_USB_CDP /* Charging Downstream Port */ >> - EXTCON_CHG_USB_ACA /* Accessory Charger Adapter */ >> >> So, the name of external charger connector are changed as following: >> - "USB" -> "SDP" >> - "TA" -> "DCP" >> >> [1] Commit 2a9de9c0f08d61 >> - ("extcon: Use the unique id for external connector instead of string) >> [2] Commit 046050f6e623e4 >> - ("extcon: Update the prototype of extcon_register_notifier() with enum >> extcon >> [3] Commit 11eecf910bd81d >> - ("extcon: Modify the id and name of external connector") >> >> Signed-off-by: Chanwoo Choi>> --- >> .../bindings/power_supply/charger-manager.txt | 4 +-- >> drivers/power/charger-manager.c| 31 >> ++ >> include/linux/power/charger-manager.h | 4 +-- >> 3 files changed, 24 insertions(+), 15 deletions(-) >> >> diff --git >> a/Documentation/devicetree/bindings/power_supply/charger-manager.txt >> b/Documentation/devicetree/bindings/power_supply/charger-manager.txt >> index ec4fe9de3137..73193e380dc2 100644 >> --- a/Documentation/devicetree/bindings/power_supply/charger-manager.txt >> +++ b/Documentation/devicetree/bindings/power_supply/charger-manager.txt >> @@ -65,13 +65,13 @@ Example : >> regulator@0 { >> cm-regulator-name = "chg-reg"; >> cable@0 { >> -cm-cable-name = "USB"; >> +cm-cable-id = 5; /* EXTCON_CHG_USB_SDP */ >> cm-cable-extcon = "extcon-dev.0"; >> cm-cable-min = <475000>; >> cm-cable-max = <50>; >> }; >> cable@1 { >> -cm-cable-name = "TA"; >> +cm-cable-id = 6; /* EXTCON_CHG_USB_DCP */ >> cm-cable-extcon = "extcon-dev.0"; >> cm-cable-min = <65>; >> cm-cable-max = <675000>; > > This breaks DT ABI. Looks like charger-manager is not used in > mainline,but I guess ther should be an explicit Acked-By from > a DT binding maintainer. As I mentioned on other mail to Rob, the purpose of this patch removes the deprecated the EXTCON APIs. So, I touched the DT binding. About DT ABI about charger-manager, we should handle it on separate patches. Also I think the defines should be in > some header includable from DTS, so that something like this > can be done: > > cm-cable-id = ; I agree to use some definition for the kind of charger cable. But, Not yet. I'm preparing the EXTCON update to use the definitions on Device Tree file. I'll send some separate patches in the near future. > > Apart from that: > > Acked-By: Sebastian Reichel Thanks for review. Best Regards, Chanwoo Choi
Re: [RESEND PATCH 1/3] power: charger-manager: Replace deprecatd API of extcon
Hi Sebastian, On 2016년 04월 15일 23:13, Sebastian Reichel wrote: > Hi, > > On Fri, Apr 15, 2016 at 09:43:34AM +0900, Chanwoo Choi wrote: >> This patch removes the deprecated notifier API of extcon framework and then >> use >> the new extcon API[2] with the unique id[1] to indicate the each external >> connector. Alter deprecated API as following: >> - extcon_register_interest() -> extcon_register_notifier() >> - extcon_unregister_interest() -> extcon_unregister_notifier() >> >> And, extcon alters the name of USB charger connector in patch[3] as >> following: >> - EXTCON_CHG_USB_SDP /* Standard Downstream Port */ >> - EXTCON_CHG_USB_DCP /* Dedicated Charging Port */ >> - EXTCON_CHG_USB_CDP /* Charging Downstream Port */ >> - EXTCON_CHG_USB_ACA /* Accessory Charger Adapter */ >> >> So, the name of external charger connector are changed as following: >> - "USB" -> "SDP" >> - "TA" -> "DCP" >> >> [1] Commit 2a9de9c0f08d61 >> - ("extcon: Use the unique id for external connector instead of string) >> [2] Commit 046050f6e623e4 >> - ("extcon: Update the prototype of extcon_register_notifier() with enum >> extcon >> [3] Commit 11eecf910bd81d >> - ("extcon: Modify the id and name of external connector") >> >> Signed-off-by: Chanwoo Choi >> --- >> .../bindings/power_supply/charger-manager.txt | 4 +-- >> drivers/power/charger-manager.c| 31 >> ++ >> include/linux/power/charger-manager.h | 4 +-- >> 3 files changed, 24 insertions(+), 15 deletions(-) >> >> diff --git >> a/Documentation/devicetree/bindings/power_supply/charger-manager.txt >> b/Documentation/devicetree/bindings/power_supply/charger-manager.txt >> index ec4fe9de3137..73193e380dc2 100644 >> --- a/Documentation/devicetree/bindings/power_supply/charger-manager.txt >> +++ b/Documentation/devicetree/bindings/power_supply/charger-manager.txt >> @@ -65,13 +65,13 @@ Example : >> regulator@0 { >> cm-regulator-name = "chg-reg"; >> cable@0 { >> -cm-cable-name = "USB"; >> +cm-cable-id = 5; /* EXTCON_CHG_USB_SDP */ >> cm-cable-extcon = "extcon-dev.0"; >> cm-cable-min = <475000>; >> cm-cable-max = <50>; >> }; >> cable@1 { >> -cm-cable-name = "TA"; >> +cm-cable-id = 6; /* EXTCON_CHG_USB_DCP */ >> cm-cable-extcon = "extcon-dev.0"; >> cm-cable-min = <65>; >> cm-cable-max = <675000>; > > This breaks DT ABI. Looks like charger-manager is not used in > mainline,but I guess ther should be an explicit Acked-By from > a DT binding maintainer. As I mentioned on other mail to Rob, the purpose of this patch removes the deprecated the EXTCON APIs. So, I touched the DT binding. About DT ABI about charger-manager, we should handle it on separate patches. Also I think the defines should be in > some header includable from DTS, so that something like this > can be done: > > cm-cable-id = ; I agree to use some definition for the kind of charger cable. But, Not yet. I'm preparing the EXTCON update to use the definitions on Device Tree file. I'll send some separate patches in the near future. > > Apart from that: > > Acked-By: Sebastian Reichel Thanks for review. Best Regards, Chanwoo Choi
Re: [PATCH 3/7] [Media] vcodec: mediatek: Add Mediatek V4L2 Video Decoder Driver
snipped. > > + > > +void mtk_vcodec_dec_set_default_params(struct mtk_vcodec_ctx *ctx) > > +{ > > + struct mtk_q_data *q_data; > > + > > + ctx->m2m_ctx->q_lock = >dev->dev_mutex; > > + ctx->fh.m2m_ctx = ctx->m2m_ctx; > > + ctx->fh.ctrl_handler = >ctrl_hdl; > > + INIT_WORK(>decode_work, mtk_vdec_worker); > > + > > + q_data = >q_data[MTK_Q_DATA_SRC]; > > + memset(q_data, 0, sizeof(struct mtk_q_data)); > > + q_data->visible_width = DFT_CFG_WIDTH; > > + q_data->visible_height = DFT_CFG_HEIGHT; > > + q_data->fmt = _video_formats[OUT_FMT_IDX]; > > + q_data->colorspace = V4L2_COLORSPACE_REC709; > > + q_data->field = V4L2_FIELD_NONE; > > + ctx->q_data[MTK_Q_DATA_DST].sizeimage[0] = > > + DFT_CFG_WIDTH * DFT_CFG_HEIGHT; > > + ctx->q_data[MTK_Q_DATA_DST].bytesperline[0] = 0; > > + > > + > > + q_data = >q_data[MTK_Q_DATA_DST]; > > + memset(q_data, 0, sizeof(struct mtk_q_data)); > > + q_data->visible_width = DFT_CFG_WIDTH; > > + q_data->visible_height = DFT_CFG_HEIGHT; > > + q_data->coded_width = DFT_CFG_WIDTH; > > + q_data->coded_height = DFT_CFG_HEIGHT; > > + q_data->colorspace = V4L2_COLORSPACE_REC709; > > + q_data->field = V4L2_FIELD_NONE; > > + > > + q_data->fmt = _video_formats[CAP_FMT_IDX]; > > + > > + v4l_bound_align_image(_data->coded_width, > > + MTK_VDEC_MIN_W, > > + MTK_VDEC_MAX_W, 4, > > + _data->coded_height, > > + MTK_VDEC_MIN_H, > > + MTK_VDEC_MAX_H, 5, 6); > > + > > + q_data->sizeimage[0] = q_data->coded_width * q_data->coded_height; > > + q_data->bytesperline[0] = q_data->coded_width; > > + q_data->sizeimage[1] = q_data->sizeimage[0] / 2; > > + q_data->bytesperline[1] = q_data->coded_width; > > + > > +} > > + > > +static int vidioc_vdec_streamon(struct file *file, void *priv, > > + enum v4l2_buf_type type) > > +{ > > + struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv); > > + > > + mtk_v4l2_debug(3, "[%d] (%d)", ctx->idx, type); > > + > > + return v4l2_m2m_streamon(file, ctx->m2m_ctx, type); > > +} > > + > > +static int vidioc_vdec_streamoff(struct file *file, void *priv, > > +enum v4l2_buf_type type) > > +{ > > + struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv); > > + > > + mtk_v4l2_debug(3, "[%d] (%d)", ctx->idx, type); > > + return v4l2_m2m_streamoff(file, ctx->m2m_ctx, type); > > +} > > + > > +static int vidioc_vdec_reqbufs(struct file *file, void *priv, > > + struct v4l2_requestbuffers *reqbufs) > > +{ > > + struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv); > > + int ret; > > + > > + mtk_v4l2_debug(3, "[%d] (%d) count=%d", ctx->idx, > > +reqbufs->type, reqbufs->count); > > + ret = v4l2_m2m_reqbufs(file, ctx->m2m_ctx, reqbufs); > > + > > + return ret; > > +} > > Please use the v4l2_m2m_ioctl_* helper functions were applicable. > snipped. > > +static unsigned int fops_vcodec_poll(struct file *file, > > +struct poll_table_struct *wait) > > +{ > > + struct mtk_vcodec_ctx *ctx = fh_to_ctx(file->private_data); > > + struct mtk_vcodec_dev *dev = ctx->dev; > > + int ret; > > + > > + mutex_lock(>dev_mutex); > > + ret = v4l2_m2m_poll(file, ctx->m2m_ctx, wait); > > + mutex_unlock(>dev_mutex); > > + > > + return ret; > > +} > > + > > +static int fops_vcodec_mmap(struct file *file, struct vm_area_struct *vma) > > +{ > > + struct mtk_vcodec_ctx *ctx = fh_to_ctx(file->private_data); > > + > > + return v4l2_m2m_mmap(file, ctx->m2m_ctx, vma); > > +} > > + > > +static const struct v4l2_file_operations mtk_vcodec_fops = { > > + .owner = THIS_MODULE, > > + .open = fops_vcodec_open, > > + .release= fops_vcodec_release, > > + .poll = fops_vcodec_poll, > > + .unlocked_ioctl = video_ioctl2, > > + .mmap = fops_vcodec_mmap, > > You should be able to use the v4l2_m2m_fop helper functions for poll and mmap. > Hi Hans, We are plaining to remove m2m framework in th feature, although we think it is easy to use and could save a lot of code similar to what m2m framework implemented and reduce code size. The main reason is that in v4l2_m2m_try_schedule, it required that at least one output buffer and one capture buffer to run device_run. We want to start device_run without capture buffer queued. Is there any suggestion that we could use m2m framework but trigger device_run with only output buffer. Or we need to remove m2m and write our own implementation. snipped. best regards, Tiffany
Re: [PATCH 3/7] [Media] vcodec: mediatek: Add Mediatek V4L2 Video Decoder Driver
snipped. > > + > > +void mtk_vcodec_dec_set_default_params(struct mtk_vcodec_ctx *ctx) > > +{ > > + struct mtk_q_data *q_data; > > + > > + ctx->m2m_ctx->q_lock = >dev->dev_mutex; > > + ctx->fh.m2m_ctx = ctx->m2m_ctx; > > + ctx->fh.ctrl_handler = >ctrl_hdl; > > + INIT_WORK(>decode_work, mtk_vdec_worker); > > + > > + q_data = >q_data[MTK_Q_DATA_SRC]; > > + memset(q_data, 0, sizeof(struct mtk_q_data)); > > + q_data->visible_width = DFT_CFG_WIDTH; > > + q_data->visible_height = DFT_CFG_HEIGHT; > > + q_data->fmt = _video_formats[OUT_FMT_IDX]; > > + q_data->colorspace = V4L2_COLORSPACE_REC709; > > + q_data->field = V4L2_FIELD_NONE; > > + ctx->q_data[MTK_Q_DATA_DST].sizeimage[0] = > > + DFT_CFG_WIDTH * DFT_CFG_HEIGHT; > > + ctx->q_data[MTK_Q_DATA_DST].bytesperline[0] = 0; > > + > > + > > + q_data = >q_data[MTK_Q_DATA_DST]; > > + memset(q_data, 0, sizeof(struct mtk_q_data)); > > + q_data->visible_width = DFT_CFG_WIDTH; > > + q_data->visible_height = DFT_CFG_HEIGHT; > > + q_data->coded_width = DFT_CFG_WIDTH; > > + q_data->coded_height = DFT_CFG_HEIGHT; > > + q_data->colorspace = V4L2_COLORSPACE_REC709; > > + q_data->field = V4L2_FIELD_NONE; > > + > > + q_data->fmt = _video_formats[CAP_FMT_IDX]; > > + > > + v4l_bound_align_image(_data->coded_width, > > + MTK_VDEC_MIN_W, > > + MTK_VDEC_MAX_W, 4, > > + _data->coded_height, > > + MTK_VDEC_MIN_H, > > + MTK_VDEC_MAX_H, 5, 6); > > + > > + q_data->sizeimage[0] = q_data->coded_width * q_data->coded_height; > > + q_data->bytesperline[0] = q_data->coded_width; > > + q_data->sizeimage[1] = q_data->sizeimage[0] / 2; > > + q_data->bytesperline[1] = q_data->coded_width; > > + > > +} > > + > > +static int vidioc_vdec_streamon(struct file *file, void *priv, > > + enum v4l2_buf_type type) > > +{ > > + struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv); > > + > > + mtk_v4l2_debug(3, "[%d] (%d)", ctx->idx, type); > > + > > + return v4l2_m2m_streamon(file, ctx->m2m_ctx, type); > > +} > > + > > +static int vidioc_vdec_streamoff(struct file *file, void *priv, > > +enum v4l2_buf_type type) > > +{ > > + struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv); > > + > > + mtk_v4l2_debug(3, "[%d] (%d)", ctx->idx, type); > > + return v4l2_m2m_streamoff(file, ctx->m2m_ctx, type); > > +} > > + > > +static int vidioc_vdec_reqbufs(struct file *file, void *priv, > > + struct v4l2_requestbuffers *reqbufs) > > +{ > > + struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv); > > + int ret; > > + > > + mtk_v4l2_debug(3, "[%d] (%d) count=%d", ctx->idx, > > +reqbufs->type, reqbufs->count); > > + ret = v4l2_m2m_reqbufs(file, ctx->m2m_ctx, reqbufs); > > + > > + return ret; > > +} > > Please use the v4l2_m2m_ioctl_* helper functions were applicable. > snipped. > > +static unsigned int fops_vcodec_poll(struct file *file, > > +struct poll_table_struct *wait) > > +{ > > + struct mtk_vcodec_ctx *ctx = fh_to_ctx(file->private_data); > > + struct mtk_vcodec_dev *dev = ctx->dev; > > + int ret; > > + > > + mutex_lock(>dev_mutex); > > + ret = v4l2_m2m_poll(file, ctx->m2m_ctx, wait); > > + mutex_unlock(>dev_mutex); > > + > > + return ret; > > +} > > + > > +static int fops_vcodec_mmap(struct file *file, struct vm_area_struct *vma) > > +{ > > + struct mtk_vcodec_ctx *ctx = fh_to_ctx(file->private_data); > > + > > + return v4l2_m2m_mmap(file, ctx->m2m_ctx, vma); > > +} > > + > > +static const struct v4l2_file_operations mtk_vcodec_fops = { > > + .owner = THIS_MODULE, > > + .open = fops_vcodec_open, > > + .release= fops_vcodec_release, > > + .poll = fops_vcodec_poll, > > + .unlocked_ioctl = video_ioctl2, > > + .mmap = fops_vcodec_mmap, > > You should be able to use the v4l2_m2m_fop helper functions for poll and mmap. > Hi Hans, We are plaining to remove m2m framework in th feature, although we think it is easy to use and could save a lot of code similar to what m2m framework implemented and reduce code size. The main reason is that in v4l2_m2m_try_schedule, it required that at least one output buffer and one capture buffer to run device_run. We want to start device_run without capture buffer queued. Is there any suggestion that we could use m2m framework but trigger device_run with only output buffer. Or we need to remove m2m and write our own implementation. snipped. best regards, Tiffany
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On Sun, Apr 17, 2016 at 10:21 PM, H. Peter Anvinwrote: > On 04/17/16 22:18, Andy Lutomirski wrote: >> On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvin wrote: >>> On 04/17/16 17:47, Ben Hutchings wrote: We've always masked off the top 32 bits when x32 is enabled, but hopefully no-one relies on that. Now that the slow path is in C, we check all the bits there, regardless of whether x32 is enabled. Let's make the fast path consistent with it. >>> >>> We have always masked off the top 32 bits *period*. >>> >>> We have had some bugs where we haven't, because someone has tried to >>> "optimize" the code and they have been quite serious. The system call >>> number is an int, which means the upper 32 bits are undefined on call >>> entry: we HAVE to mask them. >> >> I'm reasonably confident that normal kernels (non-x32) have not masked >> those bits since before I started hacking on the entry code. >> > > I'm reasonably confident they have, because we have had security bugs > TWICE when someone has tried to "optimize" the code. The masking was > generally done with a movl instruction, which confused people. > >> So the type of the syscall nr is a bit confused. If there was an >> installed base of programs that leaved garbage in the high bits, we >> would have noticed *years* ago. On the other hand, the 32-bit ptrace >> ABI and the seccomp ABI both think it's 32-bits. > > Incorrect. We have seen these failures in real life. What kind of failure? Programs that accidentally set rax to 0xbaadf00d0003 get -ENOSYS in most cases, not close(). If we'd broken programs like this, I assume we would have had to fix it a long time ago. > >> If we were designing the x86_64 ABI and everything around it from >> scratch, I'd suggest that that either the high bits must be zero or >> that the number actually be 64 bits (which are more or less the same >> thing). That would let us use the high bits for something interesting >> in the future. > > Not really all that useful. What we have is a C ABI. And we've already stolen a bit once for x32. Maybe we'll want more. For example, if we added a cancellable bit, if x86_32 didn't want it, we could steal a high bit for ie. > >> In practice, we can probably still declare that the thing is a 64-bit >> number, given that most kernels in the wild currently fail syscalls >> that have the high bits set. > > They don't, and we can prove it... I'm confused. asm volatile ("syscall" : "=a" (ret) : "a" (SYS_getpid | 0xbaadf00dULL) : "memory", "cc", "rcx", "r11"); gets -ENOSYS on the kernel I'm running on my laptop and on Fedora 23's stock kernel. I'm not terribly worried about nasty security issues in here because all the nasty stuff is in C now. What kernel had the other behavior? In 2.6.11, I see: ENTRY(system_call) CFI_STARTPROC swapgs movq%rsp,%gs:pda_oldrsp movq%gs:pda_kernelstack,%rsp sti SAVE_ARGS 8,1 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) jnz tracesys cmpq $__NR_syscall_max,%rax --Andy
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On Sun, Apr 17, 2016 at 10:21 PM, H. Peter Anvin wrote: > On 04/17/16 22:18, Andy Lutomirski wrote: >> On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvin wrote: >>> On 04/17/16 17:47, Ben Hutchings wrote: We've always masked off the top 32 bits when x32 is enabled, but hopefully no-one relies on that. Now that the slow path is in C, we check all the bits there, regardless of whether x32 is enabled. Let's make the fast path consistent with it. >>> >>> We have always masked off the top 32 bits *period*. >>> >>> We have had some bugs where we haven't, because someone has tried to >>> "optimize" the code and they have been quite serious. The system call >>> number is an int, which means the upper 32 bits are undefined on call >>> entry: we HAVE to mask them. >> >> I'm reasonably confident that normal kernels (non-x32) have not masked >> those bits since before I started hacking on the entry code. >> > > I'm reasonably confident they have, because we have had security bugs > TWICE when someone has tried to "optimize" the code. The masking was > generally done with a movl instruction, which confused people. > >> So the type of the syscall nr is a bit confused. If there was an >> installed base of programs that leaved garbage in the high bits, we >> would have noticed *years* ago. On the other hand, the 32-bit ptrace >> ABI and the seccomp ABI both think it's 32-bits. > > Incorrect. We have seen these failures in real life. What kind of failure? Programs that accidentally set rax to 0xbaadf00d0003 get -ENOSYS in most cases, not close(). If we'd broken programs like this, I assume we would have had to fix it a long time ago. > >> If we were designing the x86_64 ABI and everything around it from >> scratch, I'd suggest that that either the high bits must be zero or >> that the number actually be 64 bits (which are more or less the same >> thing). That would let us use the high bits for something interesting >> in the future. > > Not really all that useful. What we have is a C ABI. And we've already stolen a bit once for x32. Maybe we'll want more. For example, if we added a cancellable bit, if x86_32 didn't want it, we could steal a high bit for ie. > >> In practice, we can probably still declare that the thing is a 64-bit >> number, given that most kernels in the wild currently fail syscalls >> that have the high bits set. > > They don't, and we can prove it... I'm confused. asm volatile ("syscall" : "=a" (ret) : "a" (SYS_getpid | 0xbaadf00dULL) : "memory", "cc", "rcx", "r11"); gets -ENOSYS on the kernel I'm running on my laptop and on Fedora 23's stock kernel. I'm not terribly worried about nasty security issues in here because all the nasty stuff is in C now. What kernel had the other behavior? In 2.6.11, I see: ENTRY(system_call) CFI_STARTPROC swapgs movq%rsp,%gs:pda_oldrsp movq%gs:pda_kernelstack,%rsp sti SAVE_ARGS 8,1 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) jnz tracesys cmpq $__NR_syscall_max,%rax --Andy
linux-next: Tree for Apr 18
Hi all, Changes since 20160415: The net-next tree gained conflicts against the net tree. The tip tree still had its build failure for which I reverted a commit and gained another for which I applied a build fix. The gpio tree gained a build failure so I used the version from next-20160415. The livepatching tree gained conflicts against Linus' and the powerpc trees. The akpm-current tree still had its build failure for which I applied a patch. Non-merge commits (relative to Linus' tree): 4344 3992 files changed, 163277 insertions(+), 92002 deletions(-) I have created today's linux-next tree at git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git (patches at http://www.kernel.org/pub/linux/kernel/next/ ). If you are tracking the linux-next tree using git, you should not use "git pull" to do so as that will try to merge the new linux-next release with the old one. You should use "git fetch" and checkout or reset to the new master. You can see which trees have been included by looking in the Next/Trees file in the source. There are also quilt-import.log and merge.log files in the Next directory. Between each merge, the tree was built with a ppc64_defconfig for powerpc and an allmodconfig (with CONFIG_BUILD_DOCSRC=n) for x86_64, a multi_v7_defconfig for arm and a native build of tools/perf. After the final fixups (if any), I do an x86_64 modules_install followed by builds for x86_64 allnoconfig, powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig, allyesconfig (this fails its final link) and pseries_le_defconfig and i386, sparc and sparc64 defconfig. Below is a summary of the state of the merge. I am currently merging 232 trees (counting Linus' and 35 trees of patches pending for Linus' tree). Stats about the size of the tree over time can be seen at http://neuling.org/linux-next-size.html . Status of my local build tests will be at http://kisskb.ellerman.id.au/linux-next . If maintainers want to give advice about cross compilers/configs that work, we are always open to add more builds. Thanks to Randy Dunlap for doing many randconfig builds. And to Paul Gortmaker for triage and bug fixes. -- Cheers, Stephen Rothwell $ git checkout master $ git reset --hard stable Merging origin/master (9d090d01e3ef Merge tag 'dm-4.6-fix-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm) Merging fixes/master (9735a22799b9 Linux 4.6-rc2) Merging kbuild-current/rc-fixes (3d1450d54a4f Makefile: Force gzip and xz on module install) Merging arc-current/for-curr (d01ebf06e305 ARCv2: Enable LOCKDEP) Merging arm-current/fixes (9c18fcf7ae0e ARM: 8551/2: DMA: Fix kzalloc flags in __dma_alloc) Merging m68k-current/for-linus (7b8ba82ad4ad m68k/defconfig: Update defconfigs for v4.6-rc2) Merging metag-fixes/fixes (0164a711c97b metag: Fix ioremap_wc/ioremap_cached build errors) Merging powerpc-fixes/fixes (71528d8bd7a8 powerpc: Correct used_vsr comment) Merging powerpc-merge-mpe/fixes (bc0195aad0da Linux 4.2-rc2) Merging sparc/master (5ec712934ce1 sparc: Write up preadv2/pwritev2 syscalls.) Merging net/master (ab2ed0171a50 macsec: fix crypto Kconfig dependency) Merging ipsec/master (d6af1a31cc72 vti: Add pmtu handling to vti_xmit.) Merging ipvs/master (bcf493428840 netfilter: ebtables: Fix extension lookup with identical name) Merging wireless-drivers/master (de478a61389c ath9k: ar5008_hw_cmn_spur_mitigate: add missing mask_m & mask_p initialisation) Merging mac80211/master (8f815cdde3e5 nl80211: check netlink protocol in socket release notification) Merging sound-current/for-linus (c44da62b55bb ALSA: hda - Fix inconsistent monitor_present state until repoll) Merging pci-current/for-linus (67e658794ca1 cxgb4: Set VPD size so we can read both VPD structures) Merging driver-core.current/driver-core-linus (dea5c24a1404 lib: lz4: cleanup unaligned access efficiency detection) Merging tty.current/tty-linus (bf1620068911 Linux 4.6-rc3) Merging usb.current/usb-linus (e86103a75705 usb: hcd: out of bounds access in for_each_companion) Merging usb-gadget-fixes/fixes (bf1620068911 Linux 4.6-rc3) Merging usb-serial-fixes/usb-linus (bf1620068911 Linux 4.6-rc3) Merging usb-chipidea-fixes/ci-for-usb-stable (d144dfea8af7 usb: chipidea: otg: change workqueue ci_otg as freezable) Merging staging.current/staging-linus (bf1620068911 Linux 4.6-rc3) Merging char-misc.current/char-misc-linus (053f78d35995 Merge tag 'lkdtm-4.6-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux into char-misc-linus) Merging input-current/for-linus (eda5ecc0a6b8 Input: pmic8xxx-pwrkey - fix algorithm for converting trigger delay) Merging crypto-current/master (f709b45ec461 crypto: ccp - Prevent information leakage on export) Merging ide/master (1993b176a822 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/ide) Merging devicetree-current/devicetree/merge (f76502aa9140 of/dynamic: Fix test for
linux-next: Tree for Apr 18
Hi all, Changes since 20160415: The net-next tree gained conflicts against the net tree. The tip tree still had its build failure for which I reverted a commit and gained another for which I applied a build fix. The gpio tree gained a build failure so I used the version from next-20160415. The livepatching tree gained conflicts against Linus' and the powerpc trees. The akpm-current tree still had its build failure for which I applied a patch. Non-merge commits (relative to Linus' tree): 4344 3992 files changed, 163277 insertions(+), 92002 deletions(-) I have created today's linux-next tree at git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git (patches at http://www.kernel.org/pub/linux/kernel/next/ ). If you are tracking the linux-next tree using git, you should not use "git pull" to do so as that will try to merge the new linux-next release with the old one. You should use "git fetch" and checkout or reset to the new master. You can see which trees have been included by looking in the Next/Trees file in the source. There are also quilt-import.log and merge.log files in the Next directory. Between each merge, the tree was built with a ppc64_defconfig for powerpc and an allmodconfig (with CONFIG_BUILD_DOCSRC=n) for x86_64, a multi_v7_defconfig for arm and a native build of tools/perf. After the final fixups (if any), I do an x86_64 modules_install followed by builds for x86_64 allnoconfig, powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig, allyesconfig (this fails its final link) and pseries_le_defconfig and i386, sparc and sparc64 defconfig. Below is a summary of the state of the merge. I am currently merging 232 trees (counting Linus' and 35 trees of patches pending for Linus' tree). Stats about the size of the tree over time can be seen at http://neuling.org/linux-next-size.html . Status of my local build tests will be at http://kisskb.ellerman.id.au/linux-next . If maintainers want to give advice about cross compilers/configs that work, we are always open to add more builds. Thanks to Randy Dunlap for doing many randconfig builds. And to Paul Gortmaker for triage and bug fixes. -- Cheers, Stephen Rothwell $ git checkout master $ git reset --hard stable Merging origin/master (9d090d01e3ef Merge tag 'dm-4.6-fix-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm) Merging fixes/master (9735a22799b9 Linux 4.6-rc2) Merging kbuild-current/rc-fixes (3d1450d54a4f Makefile: Force gzip and xz on module install) Merging arc-current/for-curr (d01ebf06e305 ARCv2: Enable LOCKDEP) Merging arm-current/fixes (9c18fcf7ae0e ARM: 8551/2: DMA: Fix kzalloc flags in __dma_alloc) Merging m68k-current/for-linus (7b8ba82ad4ad m68k/defconfig: Update defconfigs for v4.6-rc2) Merging metag-fixes/fixes (0164a711c97b metag: Fix ioremap_wc/ioremap_cached build errors) Merging powerpc-fixes/fixes (71528d8bd7a8 powerpc: Correct used_vsr comment) Merging powerpc-merge-mpe/fixes (bc0195aad0da Linux 4.2-rc2) Merging sparc/master (5ec712934ce1 sparc: Write up preadv2/pwritev2 syscalls.) Merging net/master (ab2ed0171a50 macsec: fix crypto Kconfig dependency) Merging ipsec/master (d6af1a31cc72 vti: Add pmtu handling to vti_xmit.) Merging ipvs/master (bcf493428840 netfilter: ebtables: Fix extension lookup with identical name) Merging wireless-drivers/master (de478a61389c ath9k: ar5008_hw_cmn_spur_mitigate: add missing mask_m & mask_p initialisation) Merging mac80211/master (8f815cdde3e5 nl80211: check netlink protocol in socket release notification) Merging sound-current/for-linus (c44da62b55bb ALSA: hda - Fix inconsistent monitor_present state until repoll) Merging pci-current/for-linus (67e658794ca1 cxgb4: Set VPD size so we can read both VPD structures) Merging driver-core.current/driver-core-linus (dea5c24a1404 lib: lz4: cleanup unaligned access efficiency detection) Merging tty.current/tty-linus (bf1620068911 Linux 4.6-rc3) Merging usb.current/usb-linus (e86103a75705 usb: hcd: out of bounds access in for_each_companion) Merging usb-gadget-fixes/fixes (bf1620068911 Linux 4.6-rc3) Merging usb-serial-fixes/usb-linus (bf1620068911 Linux 4.6-rc3) Merging usb-chipidea-fixes/ci-for-usb-stable (d144dfea8af7 usb: chipidea: otg: change workqueue ci_otg as freezable) Merging staging.current/staging-linus (bf1620068911 Linux 4.6-rc3) Merging char-misc.current/char-misc-linus (053f78d35995 Merge tag 'lkdtm-4.6-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux into char-misc-linus) Merging input-current/for-linus (eda5ecc0a6b8 Input: pmic8xxx-pwrkey - fix algorithm for converting trigger delay) Merging crypto-current/master (f709b45ec461 crypto: ccp - Prevent information leakage on export) Merging ide/master (1993b176a822 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/ide) Merging devicetree-current/devicetree/merge (f76502aa9140 of/dynamic: Fix test for
Re: [RESEND PATCH 2/3] power: axp288_charger: Replace deprecatd API of extcon
Hi Sebastian, On 2016년 04월 15일 23:20, Sebastian Reichel wrote: > Hi, > > On Fri, Apr 15, 2016 at 09:43:35AM +0900, Chanwoo Choi wrote: >> This patch removes the deprecated notifier API of extcon framework and then >> use >> the new extcon API[2] with the unique id[1] to indicate the each external >> connector. Alter deprecated API as following: >> - extcon_register_interest() -> extcon_register_notifier() >> - extcon_unregister_interest() -> extcon_unregister_notifier() >> - extcon_get_cable_state() -> extcon_get_cable_state_() >> >> And, extcon alters the name of USB charger connector in patch[3] as >> following: >> - EXTCON_CHG_USB_SDP /* Standard Downstream Port */ >> - EXTCON_CHG_USB_DCP /* Dedicated Charging Port */ >> - EXTCON_CHG_USB_CDP /* Charging Downstream Port */ >> - EXTCON_CHG_USB_ACA /* Accessory Charger Adapter */ >> >> [1] Commit 2a9de9c0f08d61 >> - ("extcon: Use the unique id for external connector instead of string) >> [2] Commit 046050f6e623e4 >> - ("extcon: Update the prototype of extcon_register_notifier() with enum >> extcon >> [3] Commit 11eecf910bd81d >> - ("extcon: Modify the id and name of external connector") >> >> Signed-off-by: Chanwoo Choi>> --- >> drivers/power/axp288_charger.c | 77 >> +- >> 1 file changed, 53 insertions(+), 24 deletions(-) >> >> diff --git a/drivers/power/axp288_charger.c b/drivers/power/axp288_charger.c >> index e4d569f57acc..e5c2569befa6 100644 >> --- a/drivers/power/axp288_charger.c >> +++ b/drivers/power/axp288_charger.c >> @@ -129,10 +129,6 @@ >> [snip] >> /* Register charger interrupts */ >> for (i = 0; i < CHRG_INTR_END; i++) { >> @@ -905,11 +922,17 @@ static int axp288_charger_probe(struct platform_device >> *pdev) >> return 0; >> >> intr_reg_failed: >> -if (info->otg.cable.edev) >> -extcon_unregister_interest(>otg.cable); >> +if (info->otg.cable) >> +extcon_unregister_notifier(info->otg.cable, EXTCON_USB_HOST, >> +>otg.id_nb); >> power_supply_unregister(info->psy_usb); >> psy_reg_failed: >> -extcon_unregister_notifier(info->cable.edev, EXTCON_NONE, >> >cable.nb); >> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP, >> +>cable.nb); >> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP, >> +>cable.nb); > > EXTCON_CHG_USB_SDP -> EXTCON_CHG_USB_CDP? I was mistaken. I'll fix it. > >> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_DCP, >> +>cable.nb); >> return ret; >> } >> >> @@ -917,10 +940,16 @@ static int axp288_charger_remove(struct >> platform_device *pdev) >> { >> struct axp288_chrg_info *info = dev_get_drvdata(>dev); >> >> -if (info->otg.cable.edev) >> -extcon_unregister_interest(>otg.cable); >> +if (info->otg.cable) >> +extcon_unregister_notifier(info->otg.cable, EXTCON_USB_HOST, >> +>otg.id_nb); >> >> -extcon_unregister_notifier(info->cable.edev, EXTCON_NONE, >> >cable.nb); >> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP, >> +>cable.nb); >> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP, >> +>cable.nb); > > EXTCON_CHG_USB_SDP -> EXTCON_CHG_USB_CDP? ditto. > >> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_DCP, >> +>cable.nb); >> power_supply_unregister(info->psy_usb); >> >> return 0; > > Has this dependencies to your tree or are all dependencies already > in torvalds tree? This patch-set have the dependency on Linus torvalds tree[1]. [1] https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/ So, when picking this patch-set, someone make the immutable branch for this patchset to prevent the merge conflict between power_supply and extcon tree. Thanks, Chanwoo Choi
Re: [RESEND PATCH 2/3] power: axp288_charger: Replace deprecatd API of extcon
Hi Sebastian, On 2016년 04월 15일 23:20, Sebastian Reichel wrote: > Hi, > > On Fri, Apr 15, 2016 at 09:43:35AM +0900, Chanwoo Choi wrote: >> This patch removes the deprecated notifier API of extcon framework and then >> use >> the new extcon API[2] with the unique id[1] to indicate the each external >> connector. Alter deprecated API as following: >> - extcon_register_interest() -> extcon_register_notifier() >> - extcon_unregister_interest() -> extcon_unregister_notifier() >> - extcon_get_cable_state() -> extcon_get_cable_state_() >> >> And, extcon alters the name of USB charger connector in patch[3] as >> following: >> - EXTCON_CHG_USB_SDP /* Standard Downstream Port */ >> - EXTCON_CHG_USB_DCP /* Dedicated Charging Port */ >> - EXTCON_CHG_USB_CDP /* Charging Downstream Port */ >> - EXTCON_CHG_USB_ACA /* Accessory Charger Adapter */ >> >> [1] Commit 2a9de9c0f08d61 >> - ("extcon: Use the unique id for external connector instead of string) >> [2] Commit 046050f6e623e4 >> - ("extcon: Update the prototype of extcon_register_notifier() with enum >> extcon >> [3] Commit 11eecf910bd81d >> - ("extcon: Modify the id and name of external connector") >> >> Signed-off-by: Chanwoo Choi >> --- >> drivers/power/axp288_charger.c | 77 >> +- >> 1 file changed, 53 insertions(+), 24 deletions(-) >> >> diff --git a/drivers/power/axp288_charger.c b/drivers/power/axp288_charger.c >> index e4d569f57acc..e5c2569befa6 100644 >> --- a/drivers/power/axp288_charger.c >> +++ b/drivers/power/axp288_charger.c >> @@ -129,10 +129,6 @@ >> [snip] >> /* Register charger interrupts */ >> for (i = 0; i < CHRG_INTR_END; i++) { >> @@ -905,11 +922,17 @@ static int axp288_charger_probe(struct platform_device >> *pdev) >> return 0; >> >> intr_reg_failed: >> -if (info->otg.cable.edev) >> -extcon_unregister_interest(>otg.cable); >> +if (info->otg.cable) >> +extcon_unregister_notifier(info->otg.cable, EXTCON_USB_HOST, >> +>otg.id_nb); >> power_supply_unregister(info->psy_usb); >> psy_reg_failed: >> -extcon_unregister_notifier(info->cable.edev, EXTCON_NONE, >> >cable.nb); >> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP, >> +>cable.nb); >> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP, >> +>cable.nb); > > EXTCON_CHG_USB_SDP -> EXTCON_CHG_USB_CDP? I was mistaken. I'll fix it. > >> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_DCP, >> +>cable.nb); >> return ret; >> } >> >> @@ -917,10 +940,16 @@ static int axp288_charger_remove(struct >> platform_device *pdev) >> { >> struct axp288_chrg_info *info = dev_get_drvdata(>dev); >> >> -if (info->otg.cable.edev) >> -extcon_unregister_interest(>otg.cable); >> +if (info->otg.cable) >> +extcon_unregister_notifier(info->otg.cable, EXTCON_USB_HOST, >> +>otg.id_nb); >> >> -extcon_unregister_notifier(info->cable.edev, EXTCON_NONE, >> >cable.nb); >> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP, >> +>cable.nb); >> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP, >> +>cable.nb); > > EXTCON_CHG_USB_SDP -> EXTCON_CHG_USB_CDP? ditto. > >> +extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_DCP, >> +>cable.nb); >> power_supply_unregister(info->psy_usb); >> >> return 0; > > Has this dependencies to your tree or are all dependencies already > in torvalds tree? This patch-set have the dependency on Linus torvalds tree[1]. [1] https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/ So, when picking this patch-set, someone make the immutable branch for this patchset to prevent the merge conflict between power_supply and extcon tree. Thanks, Chanwoo Choi
Re: [PATCH v2 0/4] Introduce bulk mode for crypto engine framework
Hi Herbert, On 15 April 2016 at 21:48, Herbert Xuwrote: > On Tue, Mar 15, 2016 at 03:47:58PM +0800, Baolin Wang wrote: >> Now some cipher hardware engines prefer to handle bulk block by merging >> requests >> to increase the block size and thus increase the hardware engine processing >> speed. >> >> This patchset introduces request bulk mode to help the crypto hardware >> drivers >> improve in efficiency. > > Could you please explain why this merging can't be done in dm-crypt > instead? We've tried to do this in dm-crypt, but it failed. The dm-crypt maintainer explained to me that I should optimize the driver, not add strange hw-dependent crypto modes to dm-crypt, this is not the first crypto accelerator that is just not suited for this kind of use. He thought if it can process batch of chunks of data each with own IV, then it can work with dm-crypt, but he thought such optimized code should be inside crypto API, not in dmcrypt. I think his suggestion is reasonable, so we introduce the crypto engine framework to factor out the common patterns for driving the queue of operations. Then it will be more reasonable to do the bulk mode optimization in crypto engine framework. Thanks. > > Thanks, > -- > Email: Herbert Xu > Home Page: http://gondor.apana.org.au/~herbert/ > PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- Baolin.wang Best Regards
Re: [PATCH v2 0/4] Introduce bulk mode for crypto engine framework
Hi Herbert, On 15 April 2016 at 21:48, Herbert Xu wrote: > On Tue, Mar 15, 2016 at 03:47:58PM +0800, Baolin Wang wrote: >> Now some cipher hardware engines prefer to handle bulk block by merging >> requests >> to increase the block size and thus increase the hardware engine processing >> speed. >> >> This patchset introduces request bulk mode to help the crypto hardware >> drivers >> improve in efficiency. > > Could you please explain why this merging can't be done in dm-crypt > instead? We've tried to do this in dm-crypt, but it failed. The dm-crypt maintainer explained to me that I should optimize the driver, not add strange hw-dependent crypto modes to dm-crypt, this is not the first crypto accelerator that is just not suited for this kind of use. He thought if it can process batch of chunks of data each with own IV, then it can work with dm-crypt, but he thought such optimized code should be inside crypto API, not in dmcrypt. I think his suggestion is reasonable, so we introduce the crypto engine framework to factor out the common patterns for driving the queue of operations. Then it will be more reasonable to do the bulk mode optimization in crypto engine framework. Thanks. > > Thanks, > -- > Email: Herbert Xu > Home Page: http://gondor.apana.org.au/~herbert/ > PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- Baolin.wang Best Regards
[PATCH 3/2] cgroup_show_path: use a new helper to get current cgns css_set
Since we're getting current's cgroup namespace info, and are not modifying it, we can use rcu_read_lock() instead of cgroup_mutex. Signed-off-by: Serge Hallyn--- kernel/cgroup.c | 40 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9a0d7b3..cd8269e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1215,6 +1215,41 @@ static void cgroup_destroy_root(struct cgroup_root *root) cgroup_free_root(root); } +/* + * look up cgroup associated with current task's cgroup namespace on the + * specified hierarchy + */ +static struct cgroup * +current_cgns_cgroup_from_root(struct cgroup_root *root) +{ + struct cgroup *res = NULL; + struct css_set *css; + + lockdep_assert_held(_set_lock); + + rcu_read_lock(); + + css = current->nsproxy->cgroup_ns->root_cset; + if (cset == _css_set) { + res = >cgrp; + } else { + struct cgrp_cset_link *link; + + list_for_each_entry(link, >cgrp_links, cgrp_link) { + struct cgroup *c = link->cgrp; + + if (c->root == root) { + res = c; + break; + } + } + } + rcu_read_unlock(); + + BUG_ON(!res); + return res; +} + /* look up cgroup associated with given css_set on the specified hierarchy */ static struct cgroup *cset_cgroup_from_root(struct css_set *cset, struct cgroup_root *root) @@ -1598,13 +1633,11 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, { int len = 0, ret = 0; char *buf = NULL; - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root); struct cgroup *ns_cgroup; - mutex_lock(_mutex); spin_lock_bh(_set_lock); - ns_cgroup = cset_cgroup_from_root(ns->root_cset, kf_cgroot); + ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot); len = kernfs_path_from_node(kf_node, ns_cgroup->kn, NULL, 0); if (len > 0) buf = kmalloc(len + 1, GFP_ATOMIC); @@ -1612,7 +1645,6 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, ret = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, len + 1); spin_unlock_bh(_set_lock); - mutex_unlock(_mutex); if (len <= 0) return len; -- 2.7.4
[PATCH 3/2] cgroup_show_path: use a new helper to get current cgns css_set
Since we're getting current's cgroup namespace info, and are not modifying it, we can use rcu_read_lock() instead of cgroup_mutex. Signed-off-by: Serge Hallyn --- kernel/cgroup.c | 40 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9a0d7b3..cd8269e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1215,6 +1215,41 @@ static void cgroup_destroy_root(struct cgroup_root *root) cgroup_free_root(root); } +/* + * look up cgroup associated with current task's cgroup namespace on the + * specified hierarchy + */ +static struct cgroup * +current_cgns_cgroup_from_root(struct cgroup_root *root) +{ + struct cgroup *res = NULL; + struct css_set *css; + + lockdep_assert_held(_set_lock); + + rcu_read_lock(); + + css = current->nsproxy->cgroup_ns->root_cset; + if (cset == _css_set) { + res = >cgrp; + } else { + struct cgrp_cset_link *link; + + list_for_each_entry(link, >cgrp_links, cgrp_link) { + struct cgroup *c = link->cgrp; + + if (c->root == root) { + res = c; + break; + } + } + } + rcu_read_unlock(); + + BUG_ON(!res); + return res; +} + /* look up cgroup associated with given css_set on the specified hierarchy */ static struct cgroup *cset_cgroup_from_root(struct css_set *cset, struct cgroup_root *root) @@ -1598,13 +1633,11 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, { int len = 0, ret = 0; char *buf = NULL; - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root); struct cgroup *ns_cgroup; - mutex_lock(_mutex); spin_lock_bh(_set_lock); - ns_cgroup = cset_cgroup_from_root(ns->root_cset, kf_cgroot); + ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot); len = kernfs_path_from_node(kf_node, ns_cgroup->kn, NULL, 0); if (len > 0) buf = kmalloc(len + 1, GFP_ATOMIC); @@ -1612,7 +1645,6 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, ret = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, len + 1); spin_unlock_bh(_set_lock); - mutex_unlock(_mutex); if (len <= 0) return len; -- 2.7.4
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On 04/17/16 22:18, Andy Lutomirski wrote: > On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvinwrote: >> On 04/17/16 17:47, Ben Hutchings wrote: >>> We've always masked off the top 32 bits when x32 is enabled, but >>> hopefully no-one relies on that. Now that the slow path is in C, we >>> check all the bits there, regardless of whether x32 is enabled. Let's >>> make the fast path consistent with it. >> >> We have always masked off the top 32 bits *period*. >> >> We have had some bugs where we haven't, because someone has tried to >> "optimize" the code and they have been quite serious. The system call >> number is an int, which means the upper 32 bits are undefined on call >> entry: we HAVE to mask them. > > I'm reasonably confident that normal kernels (non-x32) have not masked > those bits since before I started hacking on the entry code. > > So the type of the syscall nr is a bit confused. If there was an > installed base of programs that leaved garbage in the high bits, we > would have noticed *years* ago. On the other hand, the 32-bit ptrace > ABI and the seccomp ABI both think it's 32-bits. > > If we were designing the x86_64 ABI and everything around it from > scratch, I'd suggest that that either the high bits must be zero or > that the number actually be 64 bits (which are more or less the same > thing). That would let us use the high bits for something interesting > in the future. > > In practice, we can probably still declare that the thing is a 64-bit > number, given that most kernels in the wild currently fail syscalls > that have the high bits set. > For the record, I changed the range comparison from cmpl to cmpq so if someone re-introduced this bug *again* it would be a functionality problem as opposed to a security hole a mile wide. -hpa
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On 04/17/16 22:18, Andy Lutomirski wrote: > On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvin wrote: >> On 04/17/16 17:47, Ben Hutchings wrote: >>> We've always masked off the top 32 bits when x32 is enabled, but >>> hopefully no-one relies on that. Now that the slow path is in C, we >>> check all the bits there, regardless of whether x32 is enabled. Let's >>> make the fast path consistent with it. >> >> We have always masked off the top 32 bits *period*. >> >> We have had some bugs where we haven't, because someone has tried to >> "optimize" the code and they have been quite serious. The system call >> number is an int, which means the upper 32 bits are undefined on call >> entry: we HAVE to mask them. > > I'm reasonably confident that normal kernels (non-x32) have not masked > those bits since before I started hacking on the entry code. > > So the type of the syscall nr is a bit confused. If there was an > installed base of programs that leaved garbage in the high bits, we > would have noticed *years* ago. On the other hand, the 32-bit ptrace > ABI and the seccomp ABI both think it's 32-bits. > > If we were designing the x86_64 ABI and everything around it from > scratch, I'd suggest that that either the high bits must be zero or > that the number actually be 64 bits (which are more or less the same > thing). That would let us use the high bits for something interesting > in the future. > > In practice, we can probably still declare that the thing is a 64-bit > number, given that most kernels in the wild currently fail syscalls > that have the high bits set. > For the record, I changed the range comparison from cmpl to cmpq so if someone re-introduced this bug *again* it would be a functionality problem as opposed to a security hole a mile wide. -hpa
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On 04/17/16 22:18, Andy Lutomirski wrote: > On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvinwrote: >> On 04/17/16 17:47, Ben Hutchings wrote: >>> We've always masked off the top 32 bits when x32 is enabled, but >>> hopefully no-one relies on that. Now that the slow path is in C, we >>> check all the bits there, regardless of whether x32 is enabled. Let's >>> make the fast path consistent with it. >> >> We have always masked off the top 32 bits *period*. >> >> We have had some bugs where we haven't, because someone has tried to >> "optimize" the code and they have been quite serious. The system call >> number is an int, which means the upper 32 bits are undefined on call >> entry: we HAVE to mask them. > > I'm reasonably confident that normal kernels (non-x32) have not masked > those bits since before I started hacking on the entry code. > I'm reasonably confident they have, because we have had security bugs TWICE when someone has tried to "optimize" the code. The masking was generally done with a movl instruction, which confused people. > So the type of the syscall nr is a bit confused. If there was an > installed base of programs that leaved garbage in the high bits, we > would have noticed *years* ago. On the other hand, the 32-bit ptrace > ABI and the seccomp ABI both think it's 32-bits. Incorrect. We have seen these failures in real life. > If we were designing the x86_64 ABI and everything around it from > scratch, I'd suggest that that either the high bits must be zero or > that the number actually be 64 bits (which are more or less the same > thing). That would let us use the high bits for something interesting > in the future. Not really all that useful. What we have is a C ABI. > In practice, we can probably still declare that the thing is a 64-bit > number, given that most kernels in the wild currently fail syscalls > that have the high bits set. They don't, and we can prove it... -hpa
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On 04/17/16 22:18, Andy Lutomirski wrote: > On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvin wrote: >> On 04/17/16 17:47, Ben Hutchings wrote: >>> We've always masked off the top 32 bits when x32 is enabled, but >>> hopefully no-one relies on that. Now that the slow path is in C, we >>> check all the bits there, regardless of whether x32 is enabled. Let's >>> make the fast path consistent with it. >> >> We have always masked off the top 32 bits *period*. >> >> We have had some bugs where we haven't, because someone has tried to >> "optimize" the code and they have been quite serious. The system call >> number is an int, which means the upper 32 bits are undefined on call >> entry: we HAVE to mask them. > > I'm reasonably confident that normal kernels (non-x32) have not masked > those bits since before I started hacking on the entry code. > I'm reasonably confident they have, because we have had security bugs TWICE when someone has tried to "optimize" the code. The masking was generally done with a movl instruction, which confused people. > So the type of the syscall nr is a bit confused. If there was an > installed base of programs that leaved garbage in the high bits, we > would have noticed *years* ago. On the other hand, the 32-bit ptrace > ABI and the seccomp ABI both think it's 32-bits. Incorrect. We have seen these failures in real life. > If we were designing the x86_64 ABI and everything around it from > scratch, I'd suggest that that either the high bits must be zero or > that the number actually be 64 bits (which are more or less the same > thing). That would let us use the high bits for something interesting > in the future. Not really all that useful. What we have is a C ABI. > In practice, we can probably still declare that the thing is a 64-bit > number, given that most kernels in the wild currently fail syscalls > that have the high bits set. They don't, and we can prove it... -hpa
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvinwrote: > On 04/17/16 17:47, Ben Hutchings wrote: >> We've always masked off the top 32 bits when x32 is enabled, but >> hopefully no-one relies on that. Now that the slow path is in C, we >> check all the bits there, regardless of whether x32 is enabled. Let's >> make the fast path consistent with it. > > We have always masked off the top 32 bits *period*. > > We have had some bugs where we haven't, because someone has tried to > "optimize" the code and they have been quite serious. The system call > number is an int, which means the upper 32 bits are undefined on call > entry: we HAVE to mask them. I'm reasonably confident that normal kernels (non-x32) have not masked those bits since before I started hacking on the entry code. So the type of the syscall nr is a bit confused. If there was an installed base of programs that leaved garbage in the high bits, we would have noticed *years* ago. On the other hand, the 32-bit ptrace ABI and the seccomp ABI both think it's 32-bits. If we were designing the x86_64 ABI and everything around it from scratch, I'd suggest that that either the high bits must be zero or that the number actually be 64 bits (which are more or less the same thing). That would let us use the high bits for something interesting in the future. In practice, we can probably still declare that the thing is a 64-bit number, given that most kernels in the wild currently fail syscalls that have the high bits set. --Andy
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On Sun, Apr 17, 2016 at 9:50 PM, H. Peter Anvin wrote: > On 04/17/16 17:47, Ben Hutchings wrote: >> We've always masked off the top 32 bits when x32 is enabled, but >> hopefully no-one relies on that. Now that the slow path is in C, we >> check all the bits there, regardless of whether x32 is enabled. Let's >> make the fast path consistent with it. > > We have always masked off the top 32 bits *period*. > > We have had some bugs where we haven't, because someone has tried to > "optimize" the code and they have been quite serious. The system call > number is an int, which means the upper 32 bits are undefined on call > entry: we HAVE to mask them. I'm reasonably confident that normal kernels (non-x32) have not masked those bits since before I started hacking on the entry code. So the type of the syscall nr is a bit confused. If there was an installed base of programs that leaved garbage in the high bits, we would have noticed *years* ago. On the other hand, the 32-bit ptrace ABI and the seccomp ABI both think it's 32-bits. If we were designing the x86_64 ABI and everything around it from scratch, I'd suggest that that either the high bits must be zero or that the number actually be 64 bits (which are more or less the same thing). That would let us use the high bits for something interesting in the future. In practice, we can probably still declare that the thing is a 64-bit number, given that most kernels in the wild currently fail syscalls that have the high bits set. --Andy
arch/ia64/kernel/entry.S:621: Error: Operand 2 of `adds' should be a 14-bit integer (-8192-8191)
Hi Will, FYI, the error/warning still remains. tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: c3b46c73264b03000d1e18b22f5caf63332547c9 commit: da48d094ce5d7c7dcdad9011648a81c42fd1c2ef Kconfig: remove HAVE_LATENCYTOP_SUPPORT date: 3 months ago config: ia64-allmodconfig (attached as .config) reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout da48d094ce5d7c7dcdad9011648a81c42fd1c2ef # save the attached .config to linux build tree make.cross ARCH=ia64 All errors (new ones prefixed by >>): arch/ia64/kernel/entry.S: Assembler messages: >> arch/ia64/kernel/entry.S:621: Error: Operand 2 of `adds' should be a 14-bit >> integer (-8192-8191) arch/ia64/kernel/entry.S:728: Error: Operand 2 of `adds' should be a 14-bit integer (-8192-8191) arch/ia64/kernel/entry.S:859: Error: Operand 2 of `adds' should be a 14-bit integer (-8192-8191) -- arch/ia64/kernel/fsys.S: Assembler messages: >> arch/ia64/kernel/fsys.S:67: Error: Operand 3 of `add' should be a general >> register r0-r3 arch/ia64/kernel/fsys.S:97: Error: Operand 3 of `add' should be a general register r0-r3 arch/ia64/kernel/fsys.S:193: Error: Operand 3 of `add' should be a general register r0-r3 arch/ia64/kernel/fsys.S:336: Error: Operand 3 of `add' should be a general register r0-r3 arch/ia64/kernel/fsys.S:338: Error: Operand 3 of `add' should be a general register r0-r3 -- arch/ia64/kernel/ivt.S: Assembler messages: >> arch/ia64/kernel/ivt.S:759: Error: Operand 3 of `add' should be a general >> register r0-r3 vim +621 arch/ia64/kernel/entry.S ^1da177e Linus Torvalds 2005-04-16 605 PT_REGS_UNWIND_INFO(0) ^1da177e Linus Torvalds 2005-04-16 606 { /* ^1da177e Linus Torvalds 2005-04-16 607 * Some versions of gas generate bad unwind info if the first instruction of a ^1da177e Linus Torvalds 2005-04-16 608 * procedure doesn't go into the first slot of a bundle. This is a workaround. ^1da177e Linus Torvalds 2005-04-16 609 */ ^1da177e Linus Torvalds 2005-04-16 610 nop.m 0 ^1da177e Linus Torvalds 2005-04-16 611 nop.i 0 ^1da177e Linus Torvalds 2005-04-16 612 /* ^1da177e Linus Torvalds 2005-04-16 613 * We need to call schedule_tail() to complete the scheduling process. ^1da177e Linus Torvalds 2005-04-16 614 * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the ^1da177e Linus Torvalds 2005-04-16 615 * address of the previously executing task. ^1da177e Linus Torvalds 2005-04-16 616 */ ^1da177e Linus Torvalds 2005-04-16 617 br.call.sptk.many rp=ia64_invoke_schedule_tail ^1da177e Linus Torvalds 2005-04-16 618 } ^1da177e Linus Torvalds 2005-04-16 619 .ret8: 54d496c3 Al Viro2012-10-14 620 (pKStk)br.call.sptk.many rp=call_payload ^1da177e Linus Torvalds 2005-04-16 @621 adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 ^1da177e Linus Torvalds 2005-04-16 622 ;; ^1da177e Linus Torvalds 2005-04-16 623 ld4 r2=[r2] ^1da177e Linus Torvalds 2005-04-16 624 ;; ^1da177e Linus Torvalds 2005-04-16 625 mov r8=0 ^1da177e Linus Torvalds 2005-04-16 626 and r2=_TIF_SYSCALL_TRACEAUDIT,r2 ^1da177e Linus Torvalds 2005-04-16 627 ;; ^1da177e Linus Torvalds 2005-04-16 628 cmp.ne p6,p0=r2,r0 ^1da177e Linus Torvalds 2005-04-16 629 (p6) br.cond.spnt .strace_check_retval :: The code at line 621 was first introduced by commit :: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Linux-2.6.12-rc2 :: TO: Linus Torvalds:: CC: Linus Torvalds --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: Binary data
arch/ia64/kernel/entry.S:621: Error: Operand 2 of `adds' should be a 14-bit integer (-8192-8191)
Hi Will, FYI, the error/warning still remains. tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: c3b46c73264b03000d1e18b22f5caf63332547c9 commit: da48d094ce5d7c7dcdad9011648a81c42fd1c2ef Kconfig: remove HAVE_LATENCYTOP_SUPPORT date: 3 months ago config: ia64-allmodconfig (attached as .config) reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout da48d094ce5d7c7dcdad9011648a81c42fd1c2ef # save the attached .config to linux build tree make.cross ARCH=ia64 All errors (new ones prefixed by >>): arch/ia64/kernel/entry.S: Assembler messages: >> arch/ia64/kernel/entry.S:621: Error: Operand 2 of `adds' should be a 14-bit >> integer (-8192-8191) arch/ia64/kernel/entry.S:728: Error: Operand 2 of `adds' should be a 14-bit integer (-8192-8191) arch/ia64/kernel/entry.S:859: Error: Operand 2 of `adds' should be a 14-bit integer (-8192-8191) -- arch/ia64/kernel/fsys.S: Assembler messages: >> arch/ia64/kernel/fsys.S:67: Error: Operand 3 of `add' should be a general >> register r0-r3 arch/ia64/kernel/fsys.S:97: Error: Operand 3 of `add' should be a general register r0-r3 arch/ia64/kernel/fsys.S:193: Error: Operand 3 of `add' should be a general register r0-r3 arch/ia64/kernel/fsys.S:336: Error: Operand 3 of `add' should be a general register r0-r3 arch/ia64/kernel/fsys.S:338: Error: Operand 3 of `add' should be a general register r0-r3 -- arch/ia64/kernel/ivt.S: Assembler messages: >> arch/ia64/kernel/ivt.S:759: Error: Operand 3 of `add' should be a general >> register r0-r3 vim +621 arch/ia64/kernel/entry.S ^1da177e Linus Torvalds 2005-04-16 605 PT_REGS_UNWIND_INFO(0) ^1da177e Linus Torvalds 2005-04-16 606 { /* ^1da177e Linus Torvalds 2005-04-16 607 * Some versions of gas generate bad unwind info if the first instruction of a ^1da177e Linus Torvalds 2005-04-16 608 * procedure doesn't go into the first slot of a bundle. This is a workaround. ^1da177e Linus Torvalds 2005-04-16 609 */ ^1da177e Linus Torvalds 2005-04-16 610 nop.m 0 ^1da177e Linus Torvalds 2005-04-16 611 nop.i 0 ^1da177e Linus Torvalds 2005-04-16 612 /* ^1da177e Linus Torvalds 2005-04-16 613 * We need to call schedule_tail() to complete the scheduling process. ^1da177e Linus Torvalds 2005-04-16 614 * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the ^1da177e Linus Torvalds 2005-04-16 615 * address of the previously executing task. ^1da177e Linus Torvalds 2005-04-16 616 */ ^1da177e Linus Torvalds 2005-04-16 617 br.call.sptk.many rp=ia64_invoke_schedule_tail ^1da177e Linus Torvalds 2005-04-16 618 } ^1da177e Linus Torvalds 2005-04-16 619 .ret8: 54d496c3 Al Viro2012-10-14 620 (pKStk)br.call.sptk.many rp=call_payload ^1da177e Linus Torvalds 2005-04-16 @621 adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 ^1da177e Linus Torvalds 2005-04-16 622 ;; ^1da177e Linus Torvalds 2005-04-16 623 ld4 r2=[r2] ^1da177e Linus Torvalds 2005-04-16 624 ;; ^1da177e Linus Torvalds 2005-04-16 625 mov r8=0 ^1da177e Linus Torvalds 2005-04-16 626 and r2=_TIF_SYSCALL_TRACEAUDIT,r2 ^1da177e Linus Torvalds 2005-04-16 627 ;; ^1da177e Linus Torvalds 2005-04-16 628 cmp.ne p6,p0=r2,r0 ^1da177e Linus Torvalds 2005-04-16 629 (p6) br.cond.spnt .strace_check_retval :: The code at line 621 was first introduced by commit :: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Linux-2.6.12-rc2 :: TO: Linus Torvalds :: CC: Linus Torvalds --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: Binary data
Re: [PATCH v1] ARM: dts: omap3-n900: Specify peripherals LDO regulators initial mode
Hi, On Sun, Apr 17, 2016 at 05:29:23PM +0300, Ivaylo Dimitrov wrote: > Without that, regulators are left in the mode last set by the bootloader or > by the kernel the device was rebooted from. This leads to various problems, > like non-working peripherals. > > Signed-off-by: Ivaylo DimitrovReviewed-By: Sebastian Reichel -- Sebastian signature.asc Description: PGP signature
Re: [PATCH v1] ARM: dts: omap3-n900: Specify peripherals LDO regulators initial mode
Hi, On Sun, Apr 17, 2016 at 05:29:23PM +0300, Ivaylo Dimitrov wrote: > Without that, regulators are left in the mode last set by the bootloader or > by the kernel the device was rebooted from. This leads to various problems, > like non-working peripherals. > > Signed-off-by: Ivaylo Dimitrov Reviewed-By: Sebastian Reichel -- Sebastian signature.asc Description: PGP signature
Re: [PATCH V2] net: ethernet: mellanox: correct page conversion
On 2016-04-18 00:00, David Miller wrote: From: Sinan KayaDate: Sat, 16 Apr 2016 18:23:32 -0400 Current code is assuming that the address returned by dma_alloc_coherent is a logical address. This is not true on ARM/ARM64 systems. This patch replaces dma_alloc_coherent with dma_map_page API. The address returned can later by virtually mapped from the CPU side with vmap API. Signed-off-by: Sinan Kaya You can't do this. The DMA map page API gives non-coherent mappings, and thus requires proper flushing. So a straight conversion like this is never legitimate. I would agree on proper dma api usage. However, the code is already assuming coherent architecture by mapping the cpu pages as page_kernel. Dma_map_page returns cached buffers and you don't need cache flushes on coherent architecture to make the data visible.
Re: [PATCH V2] net: ethernet: mellanox: correct page conversion
On 2016-04-18 00:00, David Miller wrote: From: Sinan Kaya Date: Sat, 16 Apr 2016 18:23:32 -0400 Current code is assuming that the address returned by dma_alloc_coherent is a logical address. This is not true on ARM/ARM64 systems. This patch replaces dma_alloc_coherent with dma_map_page API. The address returned can later by virtually mapped from the CPU side with vmap API. Signed-off-by: Sinan Kaya You can't do this. The DMA map page API gives non-coherent mappings, and thus requires proper flushing. So a straight conversion like this is never legitimate. I would agree on proper dma api usage. However, the code is already assuming coherent architecture by mapping the cpu pages as page_kernel. Dma_map_page returns cached buffers and you don't need cache flushes on coherent architecture to make the data visible.
Re: [PATCH 2/2] ARM: davinci: da850: use clk->set_parent for async3
On Sunday 17 April 2016 01:01 AM, David Lechner wrote: >> +static int da850_async3_set_parent(struct clk *clk, struct clk *parent) >> +{ >> +u32 val; >> + >> +val = readl(DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP3_REG)); >> + >> +/* Set the USB 1.1 PHY clock mux based on the parent clock. */ > > I seem to have regressed here since the last revision, this is supposed > to read: > > /* Set the async3 clock domain mux based on the parent clock. */ > > Although now that I am looking at it again, it doesn't really add > anything useful and could be omitted altogether. Agree the comment is redundant. No need resend just for this though. I can drop it when applying. Thanks, Sekhar
Re: [PATCH 2/2] ARM: davinci: da850: use clk->set_parent for async3
On Sunday 17 April 2016 01:01 AM, David Lechner wrote: >> +static int da850_async3_set_parent(struct clk *clk, struct clk *parent) >> +{ >> +u32 val; >> + >> +val = readl(DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP3_REG)); >> + >> +/* Set the USB 1.1 PHY clock mux based on the parent clock. */ > > I seem to have regressed here since the last revision, this is supposed > to read: > > /* Set the async3 clock domain mux based on the parent clock. */ > > Although now that I am looking at it again, it doesn't really add > anything useful and could be omitted altogether. Agree the comment is redundant. No need resend just for this though. I can drop it when applying. Thanks, Sekhar
Re: [PATCH v2 4/5] iio: health: afe4404: use regmap to retrieve struct device
On Sun, Apr 17, 2016 at 01:07:52PM -0500, Andrew F. Davis wrote: > On 04/16/2016 02:22 PM, Jonathan Cameron wrote: > > On 10/04/16 20:07, Alison Schofield wrote: > >> Driver includes struct regmap and struct device in its global data. > >> Remove the struct device and use regmap API to retrieve device info. > >> > > Why? This adds nothing but more code to get dev through some > container_of trickery when we could just keep a dev pointer in the data > structure. > > Andrew Thanks for the review and response. The why would be for simplification and uniformity across IIO. I think I see your point in general, but not sure I get your specific concerns with these afe4403/04 drivers. The drivers only use the device struct in probe and then again at device remove time. At probe, the change no longer stores it in the global data. At remove the regmap_get_device() func is a simple dereference to retrieve the device struct. That's the simplification: we don't carry that ptr in global data waiting for the opportunity to use it at device remove. We just find it when we need it at device remove. (Perhaps these devices are getting removed frequently?) Regards, alisons
Re: [PATCH v2 4/5] iio: health: afe4404: use regmap to retrieve struct device
On Sun, Apr 17, 2016 at 01:07:52PM -0500, Andrew F. Davis wrote: > On 04/16/2016 02:22 PM, Jonathan Cameron wrote: > > On 10/04/16 20:07, Alison Schofield wrote: > >> Driver includes struct regmap and struct device in its global data. > >> Remove the struct device and use regmap API to retrieve device info. > >> > > Why? This adds nothing but more code to get dev through some > container_of trickery when we could just keep a dev pointer in the data > structure. > > Andrew Thanks for the review and response. The why would be for simplification and uniformity across IIO. I think I see your point in general, but not sure I get your specific concerns with these afe4403/04 drivers. The drivers only use the device struct in probe and then again at device remove time. At probe, the change no longer stores it in the global data. At remove the regmap_get_device() func is a simple dereference to retrieve the device struct. That's the simplification: we don't carry that ptr in global data waiting for the opportunity to use it at device remove. We just find it when we need it at device remove. (Perhaps these devices are getting removed frequently?) Regards, alisons
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On 04/17/16 17:47, Ben Hutchings wrote: > We've always masked off the top 32 bits when x32 is enabled, but > hopefully no-one relies on that. Now that the slow path is in C, we > check all the bits there, regardless of whether x32 is enabled. Let's > make the fast path consistent with it. We have always masked off the top 32 bits *period*. We have had some bugs where we haven't, because someone has tried to "optimize" the code and they have been quite serious. The system call number is an int, which means the upper 32 bits are undefined on call entry: we HAVE to mask them. -hpa
Re: [PATCH] x86/entry/x32: Check top 32 bits of syscall number on the fast path
On 04/17/16 17:47, Ben Hutchings wrote: > We've always masked off the top 32 bits when x32 is enabled, but > hopefully no-one relies on that. Now that the slow path is in C, we > check all the bits there, regardless of whether x32 is enabled. Let's > make the fast path consistent with it. We have always masked off the top 32 bits *period*. We have had some bugs where we haven't, because someone has tried to "optimize" the code and they have been quite serious. The system call number is an int, which means the upper 32 bits are undefined on call entry: we HAVE to mask them. -hpa
RE: [patch] intel_telemetry_pltdrv: silence an unintialized variable warning
Looks good...thanks Dan. > -Original Message- > From: Dan Carpenter [mailto:dan.carpen...@oracle.com] > Sent: Friday, April 15, 2016 8:16 PM > To: Chakravarty, Souvik K> Cc: Darren Hart ; platform-driver- > x...@vger.kernel.org; linux-kernel@vger.kernel.org; kernel- > janit...@vger.kernel.org > Subject: [patch] intel_telemetry_pltdrv: silence an unintialized variable > warning > > Presumably "pss_period" and "ioss_period" can't both be zero, but this > function is never called so we can't infer that using static analysis alone. > > Let's silence the warning by setting "ret" to zero. > > Signed-off-by: Dan Carpenter > > diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c > b/drivers/platform/x86/intel_telemetry_pltdrv.c > index 397119f..781bd10 100644 > --- a/drivers/platform/x86/intel_telemetry_pltdrv.c > +++ b/drivers/platform/x86/intel_telemetry_pltdrv.c > @@ -659,7 +659,7 @@ static int telemetry_plt_update_events(struct > telemetry_evtconfig pss_evtconfig, static int > telemetry_plt_set_sampling_period(u8 pss_period, u8 ioss_period) { > u32 telem_ctrl = 0; > - int ret; > + int ret = 0; > > mutex_lock(&(telm_conf->telem_lock)); > if (ioss_period) {
RE: [patch] intel_telemetry_pltdrv: silence an unintialized variable warning
Looks good...thanks Dan. > -Original Message- > From: Dan Carpenter [mailto:dan.carpen...@oracle.com] > Sent: Friday, April 15, 2016 8:16 PM > To: Chakravarty, Souvik K > Cc: Darren Hart ; platform-driver- > x...@vger.kernel.org; linux-kernel@vger.kernel.org; kernel- > janit...@vger.kernel.org > Subject: [patch] intel_telemetry_pltdrv: silence an unintialized variable > warning > > Presumably "pss_period" and "ioss_period" can't both be zero, but this > function is never called so we can't infer that using static analysis alone. > > Let's silence the warning by setting "ret" to zero. > > Signed-off-by: Dan Carpenter > > diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c > b/drivers/platform/x86/intel_telemetry_pltdrv.c > index 397119f..781bd10 100644 > --- a/drivers/platform/x86/intel_telemetry_pltdrv.c > +++ b/drivers/platform/x86/intel_telemetry_pltdrv.c > @@ -659,7 +659,7 @@ static int telemetry_plt_update_events(struct > telemetry_evtconfig pss_evtconfig, static int > telemetry_plt_set_sampling_period(u8 pss_period, u8 ioss_period) { > u32 telem_ctrl = 0; > - int ret; > + int ret = 0; > > mutex_lock(&(telm_conf->telem_lock)); > if (ioss_period) {
linux-next: manual merge of the livepatching tree with the powerpc tree
Hi Jiri, Today's linux-next merge of the livepatching tree got a conflict in: arch/powerpc/kernel/process.c between commit: 7f92bc569455 ("powerpc: sparse: Include headers for __weak symbols") from the powerpc tree and commit: 5d31a96e6c01 ("powerpc/livepatch: Add livepatch stack to struct thread_info") from the livepatching tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc arch/powerpc/kernel/process.c index 4695088e7dd2,a290ee374aa0.. --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@@ -56,7 -55,8 +56,9 @@@ #include #endif #include +#include + #include + #include #include
[PATCHSET v4 0/8] Make background writeback not suck
Hi, Since the dawn of time, our background buffered writeback has sucked. When we do background buffered writeback, it should have little impact on foreground activity. That's the definition of background activity... But for as long as I can remember, heavy buffered writers have not behaved like that. For instance, if I do something like this: $ dd if=/dev/zero of=foo bs=1M count=10k on my laptop, and then try and start chrome, it basically won't start before the buffered writeback is done. Or, for server oriented workloads, where installation of a big RPM (or similar) adversely impacts database reads or sync writes. When that happens, I get people yelling at me. I have posted plenty of results previously, I'll keep it shorter this time. Here's a run on my laptop, using read-to-pipe-async for reading a 5g file, and rewriting it. 4.6-rc3: $ t/read-to-pipe-async -f ~/5g > 5g-new Latency percentiles (usec) (READERS) 50.th: 2 75.th: 3 90.th: 5 95.th: 7 99.th: 43 99.5000th: 77 99.9000th: 9008 99.9900th: 91008 99.9990th: 286208 99.th: 347648 Over=1251, min=0, max=358081 Latency percentiles (usec) (WRITERS) 50.th: 4 75.th: 8 90.th: 13 95.th: 15 99.th: 32 99.5000th: 43 99.9000th: 81 99.9900th: 2372 99.9990th: 104320 99.th: 349696 Over=63, min=1, max=358321 Read rate (KB/sec) : 91859 Write rate (KB/sec): 91859 4.6-rc3 + wb-buf-throttle Latency percentiles (usec) (READERS) 50.th: 2 75.th: 3 90.th: 5 95.th: 8 99.th: 48 99.5000th: 79 99.9000th: 5304 99.9900th: 22496 99.9990th: 29408 99.th: 33728 Over=860, min=0, max=37599 Latency percentiles (usec) (WRITERS) 50.th: 4 75.th: 9 90.th: 14 95.th: 16 99.th: 34 99.5000th: 45 99.9000th: 87 99.9900th: 1342 99.9990th: 13648 99.th: 21280 Over=29, min=1, max=30457 Read rate (KB/sec) : 95832 Write rate (KB/sec): 95832 Better throughput and tighter latencies, for both reads and writes. That's hard not to like. The above was the why. The how is basically throttling background writeback. We still want to issue big writes from the vm side of things, so we get nice and big extents on the file system end. But we don't need to flood the device with THOUSANDS of requests for background writeback. For most devices, we don't need a whole lot to get decent throughput. This adds some simple blk-wb code that keeps limits how much buffered writeback we keep in flight on the device end. It's all about managing the queues on the hardware side. The big change in this version is that it should be pretty much auto-tuning - you no longer have to set a given percentage of writeback bandwidth. I've implemented something similar to CoDel to manage the writeback queue. See the last patch for a full description, but the tldr is that we monitor min latencies over a window of time, and scale up/down the queue based on that. This needs a minimum of tunables, and it stays out of the way, if your device is fast enough. There's a single tunable now, wb_last_usec, that simply sets this latency target. Most people won't have to touch this, it'll work pretty well just being in the ballpark. I welcome testing. If you are sick of Linux bogging down when buffered writes are happening, then this is for you, laptop or server. The patchset is fully stable, I have not observed problems. It passes full xfstest runs, and a variety of benchmarks as well. It works equally well on blk-mq/scsi-mq, and "classic" setups. You can also find this in a branch in the block git repo: git://git.kernel.dk/linux-block.git wb-buf-throttle Note that I rebase this branch when I collapse patches. The wb-buf-throttle-v4 will remain the same as this version. I've folded the device write cache changes into my 4.7 branches, so they are not a part of this posting. Get the full wb-buf-throttle branch, or apply the patches here on top of my for-next. A full patch against Linus' current tree can also be downloaded here: http://brick.kernel.dk/snaps/wb-buf-throttle-v4.patch Changes since v3 - Re-do the mm/ writheback parts. Add REQ_BG for background writes, and don't overload the wbc 'reason' for writeback decisions. - Add tracking for when apps are sleeping waiting for a page to complete. - Change wbc_to_write() to wbc_to_write_cmd(). - Use atomic_t for the balance_dirty_pages() sleep count. - Add a basic scalable block stats tracking framework. - Rewrite blk-wb core as described above, to dynamically adapt. This is a big change, see the last patch for a full description of it. - Add tracing to blk-wb, instead of using debug printk's. - Rebased to 4.6-rc3 (ish) Changes since v2 -
[PATCH 1/8] block: add WRITE_BG
This adds a new request flag, REQ_BG, that callers can use to tell the block layer that this is background (non-urgent) IO. Signed-off-by: Jens Axboe--- include/linux/blk_types.h | 4 +++- include/linux/fs.h| 4 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 86a38ea1823f..223012451c7a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -161,6 +161,7 @@ enum rq_flag_bits { __REQ_INTEGRITY,/* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ __REQ_FLUSH,/* request for cache flush */ + __REQ_BG, /* background activity */ /* bio only flags */ __REQ_RAHEAD, /* read ahead, can fail anytime */ @@ -208,7 +209,7 @@ enum rq_flag_bits { #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \ -REQ_SECURE | REQ_INTEGRITY) +REQ_SECURE | REQ_INTEGRITY | REQ_BG) #define REQ_CLONE_MASK REQ_COMMON_MASK #define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME) @@ -235,6 +236,7 @@ enum rq_flag_bits { #define REQ_COPY_USER (1ULL << __REQ_COPY_USER) #define REQ_FLUSH (1ULL << __REQ_FLUSH) #define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ) +#define REQ_BG (1ULL << __REQ_BG) #define REQ_IO_STAT(1ULL << __REQ_IO_STAT) #define REQ_MIXED_MERGE(1ULL << __REQ_MIXED_MERGE) #define REQ_SECURE (1ULL << __REQ_SECURE) diff --git a/include/linux/fs.h b/include/linux/fs.h index 70e61b58baaf..bb8f951cc619 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -192,6 +192,9 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded * by a cache flush and data is guaranteed to be on * non-volatile media on completion. + * WRITE_BGBackground write. This is for background activity like + * the periodic flush and background threshold writeback + * * */ #define RW_MASKREQ_WRITE @@ -207,6 +210,7 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); #define WRITE_FLUSH(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) #define WRITE_FLUSH_FUA(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) +#define WRITE_BG (WRITE | REQ_NOIDLE | REQ_BG) /* * Attribute flags. These should be or-ed together to figure out what -- 2.8.0.rc4.6.g7e4ba36
linux-next: manual merge of the livepatching tree with the powerpc tree
Hi Jiri, Today's linux-next merge of the livepatching tree got a conflict in: arch/powerpc/kernel/process.c between commit: 7f92bc569455 ("powerpc: sparse: Include headers for __weak symbols") from the powerpc tree and commit: 5d31a96e6c01 ("powerpc/livepatch: Add livepatch stack to struct thread_info") from the livepatching tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc arch/powerpc/kernel/process.c index 4695088e7dd2,a290ee374aa0.. --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@@ -56,7 -55,8 +56,9 @@@ #include #endif #include +#include + #include + #include #include
[PATCHSET v4 0/8] Make background writeback not suck
Hi, Since the dawn of time, our background buffered writeback has sucked. When we do background buffered writeback, it should have little impact on foreground activity. That's the definition of background activity... But for as long as I can remember, heavy buffered writers have not behaved like that. For instance, if I do something like this: $ dd if=/dev/zero of=foo bs=1M count=10k on my laptop, and then try and start chrome, it basically won't start before the buffered writeback is done. Or, for server oriented workloads, where installation of a big RPM (or similar) adversely impacts database reads or sync writes. When that happens, I get people yelling at me. I have posted plenty of results previously, I'll keep it shorter this time. Here's a run on my laptop, using read-to-pipe-async for reading a 5g file, and rewriting it. 4.6-rc3: $ t/read-to-pipe-async -f ~/5g > 5g-new Latency percentiles (usec) (READERS) 50.th: 2 75.th: 3 90.th: 5 95.th: 7 99.th: 43 99.5000th: 77 99.9000th: 9008 99.9900th: 91008 99.9990th: 286208 99.th: 347648 Over=1251, min=0, max=358081 Latency percentiles (usec) (WRITERS) 50.th: 4 75.th: 8 90.th: 13 95.th: 15 99.th: 32 99.5000th: 43 99.9000th: 81 99.9900th: 2372 99.9990th: 104320 99.th: 349696 Over=63, min=1, max=358321 Read rate (KB/sec) : 91859 Write rate (KB/sec): 91859 4.6-rc3 + wb-buf-throttle Latency percentiles (usec) (READERS) 50.th: 2 75.th: 3 90.th: 5 95.th: 8 99.th: 48 99.5000th: 79 99.9000th: 5304 99.9900th: 22496 99.9990th: 29408 99.th: 33728 Over=860, min=0, max=37599 Latency percentiles (usec) (WRITERS) 50.th: 4 75.th: 9 90.th: 14 95.th: 16 99.th: 34 99.5000th: 45 99.9000th: 87 99.9900th: 1342 99.9990th: 13648 99.th: 21280 Over=29, min=1, max=30457 Read rate (KB/sec) : 95832 Write rate (KB/sec): 95832 Better throughput and tighter latencies, for both reads and writes. That's hard not to like. The above was the why. The how is basically throttling background writeback. We still want to issue big writes from the vm side of things, so we get nice and big extents on the file system end. But we don't need to flood the device with THOUSANDS of requests for background writeback. For most devices, we don't need a whole lot to get decent throughput. This adds some simple blk-wb code that keeps limits how much buffered writeback we keep in flight on the device end. It's all about managing the queues on the hardware side. The big change in this version is that it should be pretty much auto-tuning - you no longer have to set a given percentage of writeback bandwidth. I've implemented something similar to CoDel to manage the writeback queue. See the last patch for a full description, but the tldr is that we monitor min latencies over a window of time, and scale up/down the queue based on that. This needs a minimum of tunables, and it stays out of the way, if your device is fast enough. There's a single tunable now, wb_last_usec, that simply sets this latency target. Most people won't have to touch this, it'll work pretty well just being in the ballpark. I welcome testing. If you are sick of Linux bogging down when buffered writes are happening, then this is for you, laptop or server. The patchset is fully stable, I have not observed problems. It passes full xfstest runs, and a variety of benchmarks as well. It works equally well on blk-mq/scsi-mq, and "classic" setups. You can also find this in a branch in the block git repo: git://git.kernel.dk/linux-block.git wb-buf-throttle Note that I rebase this branch when I collapse patches. The wb-buf-throttle-v4 will remain the same as this version. I've folded the device write cache changes into my 4.7 branches, so they are not a part of this posting. Get the full wb-buf-throttle branch, or apply the patches here on top of my for-next. A full patch against Linus' current tree can also be downloaded here: http://brick.kernel.dk/snaps/wb-buf-throttle-v4.patch Changes since v3 - Re-do the mm/ writheback parts. Add REQ_BG for background writes, and don't overload the wbc 'reason' for writeback decisions. - Add tracking for when apps are sleeping waiting for a page to complete. - Change wbc_to_write() to wbc_to_write_cmd(). - Use atomic_t for the balance_dirty_pages() sleep count. - Add a basic scalable block stats tracking framework. - Rewrite blk-wb core as described above, to dynamically adapt. This is a big change, see the last patch for a full description of it. - Add tracing to blk-wb, instead of using debug printk's. - Rebased to 4.6-rc3 (ish) Changes since v2 -
[PATCH 1/8] block: add WRITE_BG
This adds a new request flag, REQ_BG, that callers can use to tell the block layer that this is background (non-urgent) IO. Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 4 +++- include/linux/fs.h| 4 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 86a38ea1823f..223012451c7a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -161,6 +161,7 @@ enum rq_flag_bits { __REQ_INTEGRITY,/* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ __REQ_FLUSH,/* request for cache flush */ + __REQ_BG, /* background activity */ /* bio only flags */ __REQ_RAHEAD, /* read ahead, can fail anytime */ @@ -208,7 +209,7 @@ enum rq_flag_bits { #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \ -REQ_SECURE | REQ_INTEGRITY) +REQ_SECURE | REQ_INTEGRITY | REQ_BG) #define REQ_CLONE_MASK REQ_COMMON_MASK #define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME) @@ -235,6 +236,7 @@ enum rq_flag_bits { #define REQ_COPY_USER (1ULL << __REQ_COPY_USER) #define REQ_FLUSH (1ULL << __REQ_FLUSH) #define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ) +#define REQ_BG (1ULL << __REQ_BG) #define REQ_IO_STAT(1ULL << __REQ_IO_STAT) #define REQ_MIXED_MERGE(1ULL << __REQ_MIXED_MERGE) #define REQ_SECURE (1ULL << __REQ_SECURE) diff --git a/include/linux/fs.h b/include/linux/fs.h index 70e61b58baaf..bb8f951cc619 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -192,6 +192,9 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded * by a cache flush and data is guaranteed to be on * non-volatile media on completion. + * WRITE_BGBackground write. This is for background activity like + * the periodic flush and background threshold writeback + * * */ #define RW_MASKREQ_WRITE @@ -207,6 +210,7 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); #define WRITE_FLUSH(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) #define WRITE_FLUSH_FUA(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) +#define WRITE_BG (WRITE | REQ_NOIDLE | REQ_BG) /* * Attribute flags. These should be or-ed together to figure out what -- 2.8.0.rc4.6.g7e4ba36
[PATCH 6/8] block: add code to track actual device queue depth
For blk-mq, ->nr_requests does track queue depth, at least at init time. But for the older queue paths, it's simply a soft setting. On top of that, it's generally larger than the hardware setting on purpose, to allow backup of requests for merging. Fill a hole in struct request with a 'queue_depth' member, that drivers can call to more closely inform the block layer of the real queue depth. Signed-off-by: Jens Axboe--- block/blk-settings.c | 12 drivers/scsi/scsi.c| 3 +++ include/linux/blkdev.h | 11 +++ 3 files changed, 26 insertions(+) diff --git a/block/blk-settings.c b/block/blk-settings.c index f679ae122843..f7e122e717e8 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -832,6 +832,18 @@ void blk_queue_flush_queueable(struct request_queue *q, bool queueable) EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); /** + * blk_set_queue_depth - tell the block layer about the device queue depth + * @q: the request queue for the device + * @depth: queue depth + * + */ +void blk_set_queue_depth(struct request_queue *q, unsigned int depth) +{ + q->queue_depth = depth; +} +EXPORT_SYMBOL(blk_set_queue_depth); + +/** * blk_queue_write_cache - configure queue's write cache * @q: the request queue for the device * @wc:write back cache on or off diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 1deb6adc411f..75455d4dab68 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -621,6 +621,9 @@ int scsi_change_queue_depth(struct scsi_device *sdev, int depth) wmb(); } + if (sdev->request_queue) + blk_set_queue_depth(sdev->request_queue, depth); + return sdev->queue_depth; } EXPORT_SYMBOL(scsi_change_queue_depth); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fc1894996b12..eee94bd6de52 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -315,6 +315,8 @@ struct request_queue { struct blk_mq_ctx __percpu *queue_ctx; unsigned intnr_queues; + unsigned intqueue_depth; + /* hw dispatch queues */ struct blk_mq_hw_ctx**queue_hw_ctx; unsigned intnr_hw_queues; @@ -681,6 +683,14 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) return false; } +static inline unsigned int blk_queue_depth(struct request_queue *q) +{ + if (q->queue_depth) + return q->queue_depth; + + return q->nr_requests; +} + /* * q->prep_rq_fn return values */ @@ -984,6 +994,7 @@ extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); +extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_default_limits(struct queue_limits *lim); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, -- 2.8.0.rc4.6.g7e4ba36
[PATCH 5/8] writeback: increment page wait count when waiting
If we end up waiting on a page that is dirty or marked writeback, then increment the corresponding bdi_writeback counter. Signed-off-by: Jens Axboe--- mm/filemap.c | 42 +++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index f2479af09da9..a8854a083b71 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -764,37 +764,73 @@ wait_queue_head_t *page_waitqueue(struct page *page) } EXPORT_SYMBOL(page_waitqueue); +static bool inc_dirty_wait(struct page *page) +{ + if (!page->mapping || !PageDirty(page) || !PageWriteback(page)) + return false; + else { + struct bdi_writeback *wb = inode_to_wb(page->mapping->host); + + atomic_inc(>dirty_sleeping); + return true; + } +} + +static void dec_dirty_wait(struct page *page) +{ + struct bdi_writeback *wb = inode_to_wb(page->mapping->host); + + atomic_dec(>dirty_sleeping); +} + void wait_on_page_bit(struct page *page, int bit_nr) { DEFINE_WAIT_BIT(wait, >flags, bit_nr); - if (test_bit(bit_nr, >flags)) + if (test_bit(bit_nr, >flags)) { + bool did_inc = inc_dirty_wait(page); __wait_on_bit(page_waitqueue(page), , bit_wait_io, TASK_UNINTERRUPTIBLE); + if (did_inc) + dec_dirty_wait(page); + } } EXPORT_SYMBOL(wait_on_page_bit); int wait_on_page_bit_killable(struct page *page, int bit_nr) { DEFINE_WAIT_BIT(wait, >flags, bit_nr); + bool did_inc; + int ret; if (!test_bit(bit_nr, >flags)) return 0; - return __wait_on_bit(page_waitqueue(page), , + did_inc = inc_dirty_wait(page); + ret = __wait_on_bit(page_waitqueue(page), , bit_wait_io, TASK_KILLABLE); + if (did_inc) + dec_dirty_wait(page); + return ret; } int wait_on_page_bit_killable_timeout(struct page *page, int bit_nr, unsigned long timeout) { DEFINE_WAIT_BIT(wait, >flags, bit_nr); + bool did_inc; + int ret; wait.key.timeout = jiffies + timeout; if (!test_bit(bit_nr, >flags)) return 0; - return __wait_on_bit(page_waitqueue(page), , + + did_inc = inc_dirty_wait(page); + ret = __wait_on_bit(page_waitqueue(page), , bit_wait_io_timeout, TASK_KILLABLE); + if (did_inc) + dec_dirty_wait(page); + return ret; } EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout); -- 2.8.0.rc4.6.g7e4ba36
[PATCH 6/8] block: add code to track actual device queue depth
For blk-mq, ->nr_requests does track queue depth, at least at init time. But for the older queue paths, it's simply a soft setting. On top of that, it's generally larger than the hardware setting on purpose, to allow backup of requests for merging. Fill a hole in struct request with a 'queue_depth' member, that drivers can call to more closely inform the block layer of the real queue depth. Signed-off-by: Jens Axboe --- block/blk-settings.c | 12 drivers/scsi/scsi.c| 3 +++ include/linux/blkdev.h | 11 +++ 3 files changed, 26 insertions(+) diff --git a/block/blk-settings.c b/block/blk-settings.c index f679ae122843..f7e122e717e8 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -832,6 +832,18 @@ void blk_queue_flush_queueable(struct request_queue *q, bool queueable) EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); /** + * blk_set_queue_depth - tell the block layer about the device queue depth + * @q: the request queue for the device + * @depth: queue depth + * + */ +void blk_set_queue_depth(struct request_queue *q, unsigned int depth) +{ + q->queue_depth = depth; +} +EXPORT_SYMBOL(blk_set_queue_depth); + +/** * blk_queue_write_cache - configure queue's write cache * @q: the request queue for the device * @wc:write back cache on or off diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 1deb6adc411f..75455d4dab68 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -621,6 +621,9 @@ int scsi_change_queue_depth(struct scsi_device *sdev, int depth) wmb(); } + if (sdev->request_queue) + blk_set_queue_depth(sdev->request_queue, depth); + return sdev->queue_depth; } EXPORT_SYMBOL(scsi_change_queue_depth); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fc1894996b12..eee94bd6de52 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -315,6 +315,8 @@ struct request_queue { struct blk_mq_ctx __percpu *queue_ctx; unsigned intnr_queues; + unsigned intqueue_depth; + /* hw dispatch queues */ struct blk_mq_hw_ctx**queue_hw_ctx; unsigned intnr_hw_queues; @@ -681,6 +683,14 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) return false; } +static inline unsigned int blk_queue_depth(struct request_queue *q) +{ + if (q->queue_depth) + return q->queue_depth; + + return q->nr_requests; +} + /* * q->prep_rq_fn return values */ @@ -984,6 +994,7 @@ extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); +extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_default_limits(struct queue_limits *lim); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, -- 2.8.0.rc4.6.g7e4ba36
[PATCH 5/8] writeback: increment page wait count when waiting
If we end up waiting on a page that is dirty or marked writeback, then increment the corresponding bdi_writeback counter. Signed-off-by: Jens Axboe --- mm/filemap.c | 42 +++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index f2479af09da9..a8854a083b71 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -764,37 +764,73 @@ wait_queue_head_t *page_waitqueue(struct page *page) } EXPORT_SYMBOL(page_waitqueue); +static bool inc_dirty_wait(struct page *page) +{ + if (!page->mapping || !PageDirty(page) || !PageWriteback(page)) + return false; + else { + struct bdi_writeback *wb = inode_to_wb(page->mapping->host); + + atomic_inc(>dirty_sleeping); + return true; + } +} + +static void dec_dirty_wait(struct page *page) +{ + struct bdi_writeback *wb = inode_to_wb(page->mapping->host); + + atomic_dec(>dirty_sleeping); +} + void wait_on_page_bit(struct page *page, int bit_nr) { DEFINE_WAIT_BIT(wait, >flags, bit_nr); - if (test_bit(bit_nr, >flags)) + if (test_bit(bit_nr, >flags)) { + bool did_inc = inc_dirty_wait(page); __wait_on_bit(page_waitqueue(page), , bit_wait_io, TASK_UNINTERRUPTIBLE); + if (did_inc) + dec_dirty_wait(page); + } } EXPORT_SYMBOL(wait_on_page_bit); int wait_on_page_bit_killable(struct page *page, int bit_nr) { DEFINE_WAIT_BIT(wait, >flags, bit_nr); + bool did_inc; + int ret; if (!test_bit(bit_nr, >flags)) return 0; - return __wait_on_bit(page_waitqueue(page), , + did_inc = inc_dirty_wait(page); + ret = __wait_on_bit(page_waitqueue(page), , bit_wait_io, TASK_KILLABLE); + if (did_inc) + dec_dirty_wait(page); + return ret; } int wait_on_page_bit_killable_timeout(struct page *page, int bit_nr, unsigned long timeout) { DEFINE_WAIT_BIT(wait, >flags, bit_nr); + bool did_inc; + int ret; wait.key.timeout = jiffies + timeout; if (!test_bit(bit_nr, >flags)) return 0; - return __wait_on_bit(page_waitqueue(page), , + + did_inc = inc_dirty_wait(page); + ret = __wait_on_bit(page_waitqueue(page), , bit_wait_io_timeout, TASK_KILLABLE); + if (did_inc) + dec_dirty_wait(page); + return ret; } EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout); -- 2.8.0.rc4.6.g7e4ba36
[PATCH 2/8] writeback: add wbc_to_write_cmd()
Add wbc_to_write_cmd(), which returns the write type to use, based on a struct writeback_control. No functional changes in this patch, but it prepares us for factoring other wbc fields for write type. Signed-off-by: Jens Axboe--- fs/block_dev.c| 2 +- fs/buffer.c | 2 +- fs/f2fs/data.c| 2 +- fs/f2fs/node.c| 2 +- fs/gfs2/meta_io.c | 3 +-- fs/mpage.c| 9 - fs/xfs/xfs_aops.c | 2 +- include/linux/writeback.h | 8 8 files changed, 18 insertions(+), 12 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 20a2c02b77c4..8662da6aa07c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -432,7 +432,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, struct page *page, struct writeback_control *wbc) { int result; - int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE; + int rw = wbc_to_write_cmd(wbc); const struct block_device_operations *ops = bdev->bd_disk->fops; if (!ops->rw_page || bdev_get_integrity(bdev)) diff --git a/fs/buffer.c b/fs/buffer.c index af0d9a82a8ed..46763c58e786 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1697,7 +1697,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; unsigned int blocksize, bbits; int nr_underway = 0; - int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); + int write_op = wbc_to_write_cmd(wbc); head = create_page_buffers(page, inode, (1 << BH_Dirty)|(1 << BH_Uptodate)); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5dafb9cef12e..e4e81ce663c5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1153,7 +1153,7 @@ static int f2fs_write_data_page(struct page *page, struct f2fs_io_info fio = { .sbi = sbi, .type = DATA, - .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, + .rw = wbc_to_write_cmd(wbc), .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1a33de9d84b1..3b377258dc09 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1397,7 +1397,7 @@ static int f2fs_write_node_page(struct page *page, struct f2fs_io_info fio = { .sbi = sbi, .type = NODE, - .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, + .rw = wbc_to_write_cmd(wbc), .page = page, .encrypted_page = NULL, }; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 0448524c11bc..3fdfa3848f18 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -37,8 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb { struct buffer_head *bh, *head; int nr_underway = 0; - int write_op = REQ_META | REQ_PRIO | - (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); + int write_op = REQ_META | REQ_PRIO | wbc_to_write_cmd(wbc); BUG_ON(!PageLocked(page)); BUG_ON(!page_has_buffers(page)); diff --git a/fs/mpage.c b/fs/mpage.c index eedc644b78d7..bcbdb61b24f1 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -486,7 +486,6 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; - int wr = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); @@ -595,7 +594,7 @@ page_is_mapped: * This page will go to BIO. Do we need to send this BIO off first? */ if (bio && mpd->last_block_in_bio != blocks[0] - 1) - bio = mpage_bio_submit(wr, bio); + bio = mpage_bio_submit(wbc_to_write_cmd(wbc), bio); alloc_new: if (bio == NULL) { @@ -622,7 +621,7 @@ alloc_new: wbc_account_io(wbc, page, PAGE_SIZE); length = first_unmapped << blkbits; if (bio_add_page(bio, page, length, 0) < length) { - bio = mpage_bio_submit(wr, bio); + bio = mpage_bio_submit(wbc_to_write_cmd(wbc), bio); goto alloc_new; } @@ -632,7 +631,7 @@ alloc_new: set_page_writeback(page); unlock_page(page); if (boundary || (first_unmapped != blocks_per_page)) { - bio = mpage_bio_submit(wr, bio); + bio = mpage_bio_submit(wbc_to_write_cmd(wbc), bio); if (boundary_block) { write_boundary_block(boundary_bdev, boundary_block, 1 << blkbits); @@ -644,7 +643,7 @@ alloc_new: confused: if (bio) - bio = mpage_bio_submit(wr, bio); +
[PATCH 3/8] writeback: use WRITE_BG for kupdate and background writeback
If we're doing background type writes, then use the appropriate write command for that. Signed-off-by: Jens Axboe--- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index aa66fa05ff0d..6e4a35acaa3e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -104,6 +104,8 @@ static inline int wbc_to_write_cmd(struct writeback_control *wbc) { if (wbc->sync_mode == WB_SYNC_ALL) return WRITE_SYNC; + else if (wbc->for_kupdate || wbc->for_background) + return WRITE_BG; return WRITE; } -- 2.8.0.rc4.6.g7e4ba36
[PATCH 4/8] writeback: track if we're sleeping on progress in balance_dirty_pages()
Note in the bdi_writeback structure if a task is currently being limited in balance_dirty_pages(), waiting for writeback to proceed. Signed-off-by: Jens Axboe--- include/linux/backing-dev-defs.h | 2 ++ mm/backing-dev.c | 1 + mm/page-writeback.c | 2 ++ 3 files changed, 5 insertions(+) diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 3f103076d0bf..1212c374b928 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -116,6 +116,8 @@ struct bdi_writeback { struct list_head work_list; struct delayed_work dwork; /* work item used for writeback */ + atomic_t dirty_sleeping;/* waiting on dirty limit exceeded */ + struct list_head bdi_node; /* anchored at bdi->wb_list */ #ifdef CONFIG_CGROUP_WRITEBACK diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 0c6317b7db38..41db7dff11d0 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -310,6 +310,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, spin_lock_init(>work_lock); INIT_LIST_HEAD(>work_list); INIT_DELAYED_WORK(>dwork, wb_workfn); + atomic_set(>dirty_sleeping, 0); wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp); if (!wb->congested) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 999792d35ccc..028a3d4d7129 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1746,7 +1746,9 @@ pause: pause, start_time); __set_current_state(TASK_KILLABLE); + atomic_inc(>dirty_sleeping); io_schedule_timeout(pause); + atomic_dec(>dirty_sleeping); current->dirty_paused_when = now + pause; current->nr_dirtied = 0; -- 2.8.0.rc4.6.g7e4ba36
[PATCH 7/8] block: add scalable completion tracking of requests
For legacy block, we simply track them in the request queue. For blk-mq, we track them on a per-sw queue basis, which we can then sum up through the hardware queues and finally to a per device state. The stats are tracked in, roughly, 0.1s interval windows. Add sysfs files to display the stats. Signed-off-by: Jens Axboe--- block/Makefile| 2 +- block/blk-core.c | 4 + block/blk-mq-sysfs.c | 47 block/blk-mq.c| 14 block/blk-mq.h| 3 + block/blk-stat.c | 184 ++ block/blk-stat.h | 17 + block/blk-sysfs.c | 26 +++ include/linux/blk_types.h | 8 ++ include/linux/blkdev.h| 4 + 10 files changed, 308 insertions(+), 1 deletion(-) create mode 100644 block/blk-stat.c create mode 100644 block/blk-stat.h diff --git a/block/Makefile b/block/Makefile index 9eda2322b2d4..3446e0472df0 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - blk-lib.o blk-mq.o blk-mq-tag.o \ + blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ badblocks.o partitions/ diff --git a/block/blk-core.c b/block/blk-core.c index 74c16fd8995d..40b57bf4852c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2514,6 +2514,8 @@ void blk_start_request(struct request *req) { blk_dequeue_request(req); + req->issue_time = ktime_to_ns(ktime_get()); + /* * We are now handing the request to the hardware, initialize * resid_len to full count and add the timeout handler. @@ -2581,6 +2583,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) trace_block_rq_complete(req->q, req, nr_bytes); + blk_stat_add(>q->rq_stats[rq_data_dir(req)], req); + if (!req->bio) return false; diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 4ea4dd8a1eed..2f68015f8616 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -247,6 +247,47 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) return ret; } +static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx) +{ + struct blk_mq_ctx *ctx; + unsigned int i; + + hctx_for_each_ctx(hctx, ctx, i) { + blk_stat_init(>stat[0]); + blk_stat_init(>stat[1]); + } +} + +static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx, + const char *page, size_t count) +{ + blk_mq_stat_clear(hctx); + return count; +} + +static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre) +{ + return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n", + pre, (long long) stat->nr_samples, + (long long) stat->mean, (long long) stat->min, + (long long) stat->max); +} + +static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page) +{ + struct blk_rq_stat stat[2]; + ssize_t ret; + + blk_stat_init([0]); + blk_stat_init([1]); + + blk_hctx_stat_get(hctx, stat); + + ret = print_stat(page, [0], "read :"); + ret += print_stat(page + ret, [1], "write:"); + return ret; +} + static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = { .attr = {.name = "dispatched", .mode = S_IRUGO }, .show = blk_mq_sysfs_dispatched_show, @@ -304,6 +345,11 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = { .attr = {.name = "io_poll", .mode = S_IRUGO }, .show = blk_mq_hw_sysfs_poll_show, }; +static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = { + .attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR }, + .show = blk_mq_hw_sysfs_stat_show, + .store = blk_mq_hw_sysfs_stat_store, +}; static struct attribute *default_hw_ctx_attrs[] = { _mq_hw_sysfs_queued.attr, @@ -314,6 +360,7 @@ static struct attribute *default_hw_ctx_attrs[] = { _mq_hw_sysfs_cpus.attr, _mq_hw_sysfs_active.attr, _mq_hw_sysfs_poll.attr, + _mq_hw_sysfs_stat.attr, NULL, }; diff --git a/block/blk-mq.c b/block/blk-mq.c index 1699baf39b78..71b4a13fbf94 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -29,6 +29,7 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-tag.h" +#include "blk-stat.h" static DEFINE_MUTEX(all_q_mutex); static LIST_HEAD(all_q_list); @@ -356,10 +357,19 @@ static void
[PATCH 7/8] block: add scalable completion tracking of requests
For legacy block, we simply track them in the request queue. For blk-mq, we track them on a per-sw queue basis, which we can then sum up through the hardware queues and finally to a per device state. The stats are tracked in, roughly, 0.1s interval windows. Add sysfs files to display the stats. Signed-off-by: Jens Axboe --- block/Makefile| 2 +- block/blk-core.c | 4 + block/blk-mq-sysfs.c | 47 block/blk-mq.c| 14 block/blk-mq.h| 3 + block/blk-stat.c | 184 ++ block/blk-stat.h | 17 + block/blk-sysfs.c | 26 +++ include/linux/blk_types.h | 8 ++ include/linux/blkdev.h| 4 + 10 files changed, 308 insertions(+), 1 deletion(-) create mode 100644 block/blk-stat.c create mode 100644 block/blk-stat.h diff --git a/block/Makefile b/block/Makefile index 9eda2322b2d4..3446e0472df0 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - blk-lib.o blk-mq.o blk-mq-tag.o \ + blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ badblocks.o partitions/ diff --git a/block/blk-core.c b/block/blk-core.c index 74c16fd8995d..40b57bf4852c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2514,6 +2514,8 @@ void blk_start_request(struct request *req) { blk_dequeue_request(req); + req->issue_time = ktime_to_ns(ktime_get()); + /* * We are now handing the request to the hardware, initialize * resid_len to full count and add the timeout handler. @@ -2581,6 +2583,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) trace_block_rq_complete(req->q, req, nr_bytes); + blk_stat_add(>q->rq_stats[rq_data_dir(req)], req); + if (!req->bio) return false; diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 4ea4dd8a1eed..2f68015f8616 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -247,6 +247,47 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) return ret; } +static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx) +{ + struct blk_mq_ctx *ctx; + unsigned int i; + + hctx_for_each_ctx(hctx, ctx, i) { + blk_stat_init(>stat[0]); + blk_stat_init(>stat[1]); + } +} + +static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx, + const char *page, size_t count) +{ + blk_mq_stat_clear(hctx); + return count; +} + +static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre) +{ + return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n", + pre, (long long) stat->nr_samples, + (long long) stat->mean, (long long) stat->min, + (long long) stat->max); +} + +static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page) +{ + struct blk_rq_stat stat[2]; + ssize_t ret; + + blk_stat_init([0]); + blk_stat_init([1]); + + blk_hctx_stat_get(hctx, stat); + + ret = print_stat(page, [0], "read :"); + ret += print_stat(page + ret, [1], "write:"); + return ret; +} + static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = { .attr = {.name = "dispatched", .mode = S_IRUGO }, .show = blk_mq_sysfs_dispatched_show, @@ -304,6 +345,11 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = { .attr = {.name = "io_poll", .mode = S_IRUGO }, .show = blk_mq_hw_sysfs_poll_show, }; +static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = { + .attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR }, + .show = blk_mq_hw_sysfs_stat_show, + .store = blk_mq_hw_sysfs_stat_store, +}; static struct attribute *default_hw_ctx_attrs[] = { _mq_hw_sysfs_queued.attr, @@ -314,6 +360,7 @@ static struct attribute *default_hw_ctx_attrs[] = { _mq_hw_sysfs_cpus.attr, _mq_hw_sysfs_active.attr, _mq_hw_sysfs_poll.attr, + _mq_hw_sysfs_stat.attr, NULL, }; diff --git a/block/blk-mq.c b/block/blk-mq.c index 1699baf39b78..71b4a13fbf94 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -29,6 +29,7 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-tag.h" +#include "blk-stat.h" static DEFINE_MUTEX(all_q_mutex); static LIST_HEAD(all_q_list); @@ -356,10 +357,19 @@ static void
[PATCH 2/8] writeback: add wbc_to_write_cmd()
Add wbc_to_write_cmd(), which returns the write type to use, based on a struct writeback_control. No functional changes in this patch, but it prepares us for factoring other wbc fields for write type. Signed-off-by: Jens Axboe --- fs/block_dev.c| 2 +- fs/buffer.c | 2 +- fs/f2fs/data.c| 2 +- fs/f2fs/node.c| 2 +- fs/gfs2/meta_io.c | 3 +-- fs/mpage.c| 9 - fs/xfs/xfs_aops.c | 2 +- include/linux/writeback.h | 8 8 files changed, 18 insertions(+), 12 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 20a2c02b77c4..8662da6aa07c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -432,7 +432,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, struct page *page, struct writeback_control *wbc) { int result; - int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE; + int rw = wbc_to_write_cmd(wbc); const struct block_device_operations *ops = bdev->bd_disk->fops; if (!ops->rw_page || bdev_get_integrity(bdev)) diff --git a/fs/buffer.c b/fs/buffer.c index af0d9a82a8ed..46763c58e786 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1697,7 +1697,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; unsigned int blocksize, bbits; int nr_underway = 0; - int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); + int write_op = wbc_to_write_cmd(wbc); head = create_page_buffers(page, inode, (1 << BH_Dirty)|(1 << BH_Uptodate)); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5dafb9cef12e..e4e81ce663c5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1153,7 +1153,7 @@ static int f2fs_write_data_page(struct page *page, struct f2fs_io_info fio = { .sbi = sbi, .type = DATA, - .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, + .rw = wbc_to_write_cmd(wbc), .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1a33de9d84b1..3b377258dc09 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1397,7 +1397,7 @@ static int f2fs_write_node_page(struct page *page, struct f2fs_io_info fio = { .sbi = sbi, .type = NODE, - .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, + .rw = wbc_to_write_cmd(wbc), .page = page, .encrypted_page = NULL, }; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 0448524c11bc..3fdfa3848f18 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -37,8 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb { struct buffer_head *bh, *head; int nr_underway = 0; - int write_op = REQ_META | REQ_PRIO | - (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); + int write_op = REQ_META | REQ_PRIO | wbc_to_write_cmd(wbc); BUG_ON(!PageLocked(page)); BUG_ON(!page_has_buffers(page)); diff --git a/fs/mpage.c b/fs/mpage.c index eedc644b78d7..bcbdb61b24f1 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -486,7 +486,6 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; - int wr = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); @@ -595,7 +594,7 @@ page_is_mapped: * This page will go to BIO. Do we need to send this BIO off first? */ if (bio && mpd->last_block_in_bio != blocks[0] - 1) - bio = mpage_bio_submit(wr, bio); + bio = mpage_bio_submit(wbc_to_write_cmd(wbc), bio); alloc_new: if (bio == NULL) { @@ -622,7 +621,7 @@ alloc_new: wbc_account_io(wbc, page, PAGE_SIZE); length = first_unmapped << blkbits; if (bio_add_page(bio, page, length, 0) < length) { - bio = mpage_bio_submit(wr, bio); + bio = mpage_bio_submit(wbc_to_write_cmd(wbc), bio); goto alloc_new; } @@ -632,7 +631,7 @@ alloc_new: set_page_writeback(page); unlock_page(page); if (boundary || (first_unmapped != blocks_per_page)) { - bio = mpage_bio_submit(wr, bio); + bio = mpage_bio_submit(wbc_to_write_cmd(wbc), bio); if (boundary_block) { write_boundary_block(boundary_bdev, boundary_block, 1 << blkbits); @@ -644,7 +643,7 @@ alloc_new: confused: if (bio) - bio = mpage_bio_submit(wr, bio); + bio =
[PATCH 3/8] writeback: use WRITE_BG for kupdate and background writeback
If we're doing background type writes, then use the appropriate write command for that. Signed-off-by: Jens Axboe --- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index aa66fa05ff0d..6e4a35acaa3e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -104,6 +104,8 @@ static inline int wbc_to_write_cmd(struct writeback_control *wbc) { if (wbc->sync_mode == WB_SYNC_ALL) return WRITE_SYNC; + else if (wbc->for_kupdate || wbc->for_background) + return WRITE_BG; return WRITE; } -- 2.8.0.rc4.6.g7e4ba36
[PATCH 4/8] writeback: track if we're sleeping on progress in balance_dirty_pages()
Note in the bdi_writeback structure if a task is currently being limited in balance_dirty_pages(), waiting for writeback to proceed. Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 2 ++ mm/backing-dev.c | 1 + mm/page-writeback.c | 2 ++ 3 files changed, 5 insertions(+) diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 3f103076d0bf..1212c374b928 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -116,6 +116,8 @@ struct bdi_writeback { struct list_head work_list; struct delayed_work dwork; /* work item used for writeback */ + atomic_t dirty_sleeping;/* waiting on dirty limit exceeded */ + struct list_head bdi_node; /* anchored at bdi->wb_list */ #ifdef CONFIG_CGROUP_WRITEBACK diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 0c6317b7db38..41db7dff11d0 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -310,6 +310,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, spin_lock_init(>work_lock); INIT_LIST_HEAD(>work_list); INIT_DELAYED_WORK(>dwork, wb_workfn); + atomic_set(>dirty_sleeping, 0); wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp); if (!wb->congested) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 999792d35ccc..028a3d4d7129 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1746,7 +1746,9 @@ pause: pause, start_time); __set_current_state(TASK_KILLABLE); + atomic_inc(>dirty_sleeping); io_schedule_timeout(pause); + atomic_dec(>dirty_sleeping); current->dirty_paused_when = now + pause; current->nr_dirtied = 0; -- 2.8.0.rc4.6.g7e4ba36
[PATCH 8/8] writeback: throttle buffered writeback
Test patch that throttles buffered writeback to make it a lot more smooth, and has way less impact on other system activity. Background writeback should be, by definition, background activity. The fact that we flush huge bundles of it at the time means that it potentially has heavy impacts on foreground workloads, which isn't ideal. We can't easily limit the sizes of writes that we do, since that would impact file system layout in the presence of delayed allocation. So just throttle back buffered writeback, unless someone is waiting for it. The algorithm for when to throttle takes its inspiration in the CoDel networking scheduling algorithm. Like CoDel, blk-wb monitors the minimum latencies of requests over a window of time. In that window of time, if the minimum latency of any request exceeds a given target, then a scale count is incremented and the queue depth is shrunk. The next monitoring window is shrunk accordingly. Unlike CoDel, if we hit a window that exhibits good behavior, then we simply increment the scale count and re-calculate the limits for that scale value. This prevents us from oscillating between a close-to-ideal value and max all the time, instead remaining in the windows where we get good behavior. The patch registers two sysfs entries. The first one, 'wb_lat_usec', sets the latency target for the window. It defaults to 2 msec for non-rotational storage, and 75 msec for rotational storage. Setting this value to '0' disables blk-wb. The second entry, 'wb_stats', is a debug entry, that simply shows the current internal state of the throttling machine: $ cat /sys/block/nvme0n1/queue/wb_stats background=16, normal=32, max=64, inflight=0, wait=0, bdp_wait=0 'background' denotes how many requests we will allow in-flight for idle background buffered writeback, 'normal' for higher priority writeback, and 'max' for when it's urgent we clean pages. 'inflight' shows how many requests are currently in-flight for buffered writeback, 'wait' shows if anyone is currently waiting for access, and 'bdp_wait' shows if someone is currently throttled on this device in balance_dirty_pages(). blk-wb also registers a few trace events, that can be used to monitor the state changes: block_wb_lat: Latency 2446318 block_wb_stat: read lat: mean=2446318, min=2446318, max=2446318, samples=1, write lat: mean=518866, min=15522, max=5330353, samples=57 block_wb_step: step down: step=1, background=8, normal=16, max=32 'block_wb_lat' logs a violation in sync issue latency, 'block_wb_stat' logs a window violation of latencies and dumps the stats that lead to that, and finally, 'block_wb_stat' logs a step up/down and the new limits associated with that state. Signed-off-by: Jens Axboe--- block/Makefile | 2 +- block/blk-core.c | 15 ++ block/blk-mq.c | 31 ++- block/blk-settings.c | 4 + block/blk-sysfs.c| 57 + block/blk-wb.c | 495 +++ block/blk-wb.h | 42 include/linux/blk_types.h| 2 + include/linux/blkdev.h | 3 + include/trace/events/block.h | 98 + 10 files changed, 746 insertions(+), 3 deletions(-) create mode 100644 block/blk-wb.c create mode 100644 block/blk-wb.h diff --git a/block/Makefile b/block/Makefile index 3446e0472df0..7e4be7a56a59 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ + blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o blk-wb.o \ blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ badblocks.o partitions/ diff --git a/block/blk-core.c b/block/blk-core.c index 40b57bf4852c..d941f69dfb4b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -39,6 +39,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-wb.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); @@ -880,6 +881,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, fail: blk_free_flush_queue(q->fq); + blk_wb_exit(q); return NULL; } EXPORT_SYMBOL(blk_init_allocated_queue); @@ -1395,6 +1397,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) blk_delete_timer(rq); blk_clear_rq_complete(rq); trace_block_rq_requeue(q, rq); + blk_wb_requeue(q->rq_wb, rq); if (rq->cmd_flags & REQ_QUEUED) blk_queue_end_tag(q, rq); @@ -1485,6 +1488,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
[PATCH 8/8] writeback: throttle buffered writeback
Test patch that throttles buffered writeback to make it a lot more smooth, and has way less impact on other system activity. Background writeback should be, by definition, background activity. The fact that we flush huge bundles of it at the time means that it potentially has heavy impacts on foreground workloads, which isn't ideal. We can't easily limit the sizes of writes that we do, since that would impact file system layout in the presence of delayed allocation. So just throttle back buffered writeback, unless someone is waiting for it. The algorithm for when to throttle takes its inspiration in the CoDel networking scheduling algorithm. Like CoDel, blk-wb monitors the minimum latencies of requests over a window of time. In that window of time, if the minimum latency of any request exceeds a given target, then a scale count is incremented and the queue depth is shrunk. The next monitoring window is shrunk accordingly. Unlike CoDel, if we hit a window that exhibits good behavior, then we simply increment the scale count and re-calculate the limits for that scale value. This prevents us from oscillating between a close-to-ideal value and max all the time, instead remaining in the windows where we get good behavior. The patch registers two sysfs entries. The first one, 'wb_lat_usec', sets the latency target for the window. It defaults to 2 msec for non-rotational storage, and 75 msec for rotational storage. Setting this value to '0' disables blk-wb. The second entry, 'wb_stats', is a debug entry, that simply shows the current internal state of the throttling machine: $ cat /sys/block/nvme0n1/queue/wb_stats background=16, normal=32, max=64, inflight=0, wait=0, bdp_wait=0 'background' denotes how many requests we will allow in-flight for idle background buffered writeback, 'normal' for higher priority writeback, and 'max' for when it's urgent we clean pages. 'inflight' shows how many requests are currently in-flight for buffered writeback, 'wait' shows if anyone is currently waiting for access, and 'bdp_wait' shows if someone is currently throttled on this device in balance_dirty_pages(). blk-wb also registers a few trace events, that can be used to monitor the state changes: block_wb_lat: Latency 2446318 block_wb_stat: read lat: mean=2446318, min=2446318, max=2446318, samples=1, write lat: mean=518866, min=15522, max=5330353, samples=57 block_wb_step: step down: step=1, background=8, normal=16, max=32 'block_wb_lat' logs a violation in sync issue latency, 'block_wb_stat' logs a window violation of latencies and dumps the stats that lead to that, and finally, 'block_wb_stat' logs a step up/down and the new limits associated with that state. Signed-off-by: Jens Axboe --- block/Makefile | 2 +- block/blk-core.c | 15 ++ block/blk-mq.c | 31 ++- block/blk-settings.c | 4 + block/blk-sysfs.c| 57 + block/blk-wb.c | 495 +++ block/blk-wb.h | 42 include/linux/blk_types.h| 2 + include/linux/blkdev.h | 3 + include/trace/events/block.h | 98 + 10 files changed, 746 insertions(+), 3 deletions(-) create mode 100644 block/blk-wb.c create mode 100644 block/blk-wb.h diff --git a/block/Makefile b/block/Makefile index 3446e0472df0..7e4be7a56a59 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ + blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o blk-wb.o \ blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ badblocks.o partitions/ diff --git a/block/blk-core.c b/block/blk-core.c index 40b57bf4852c..d941f69dfb4b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -39,6 +39,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-wb.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); @@ -880,6 +881,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, fail: blk_free_flush_queue(q->fq); + blk_wb_exit(q); return NULL; } EXPORT_SYMBOL(blk_init_allocated_queue); @@ -1395,6 +1397,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) blk_delete_timer(rq); blk_clear_rq_complete(rq); trace_block_rq_requeue(q, rq); + blk_wb_requeue(q->rq_wb, rq); if (rq->cmd_flags & REQ_QUEUED) blk_queue_end_tag(q, rq); @@ -1485,6 +1488,8 @@ void __blk_put_request(struct request_queue *q, struct request *req) /* this
linux-next: manual merge of the livepatching tree with Linus' tree
Hi Jiri, Today's linux-next merge of the livepatching tree got a conflict in: arch/powerpc/Kconfig between commit: 7f2bd0063342 ("powerpc/mm: enable page parallel initialisation") from Linus' tree and commit: 85baa095497f ("powerpc/livepatch: Add live patching support on ppc64le") from the livepatching tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. Maybe the selects under config PPC should be sorted ... -- Cheers, Stephen Rothwell diff --cc arch/powerpc/Kconfig index fbebde0771c8,944a79a2768f.. --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@@ -159,7 -159,7 +159,8 @@@ config PP select ARCH_HAS_DEVMEM_IS_ALLOWED select HAVE_ARCH_SECCOMP_FILTER select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN
linux-next: manual merge of the livepatching tree with Linus' tree
Hi Jiri, Today's linux-next merge of the livepatching tree got a conflict in: arch/powerpc/Kconfig between commit: 7f2bd0063342 ("powerpc/mm: enable page parallel initialisation") from Linus' tree and commit: 85baa095497f ("powerpc/livepatch: Add live patching support on ppc64le") from the livepatching tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. Maybe the selects under config PPC should be sorted ... -- Cheers, Stephen Rothwell diff --cc arch/powerpc/Kconfig index fbebde0771c8,944a79a2768f.. --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@@ -159,7 -159,7 +159,8 @@@ config PP select ARCH_HAS_DEVMEM_IS_ALLOWED select HAVE_ARCH_SECCOMP_FILTER select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN
Re: [PATCH] spi: cadence: mark pm functions __maybe_unused
On Sun, Apr 17, 2016 at 2:09 AM, Arnd Bergmannwrote: > The newly added runtime PM support for the cadence spi driver > causes harmless warnings when PM is disabled: > > drivers/spi/spi-cadence.c:681:12: warning: 'cnds_runtime_suspend' defined but > not used > drivers/spi/spi-cadence.c:652:12: warning: 'cnds_runtime_resume' defined but > not used > > This adds __maybe_unused annotations to the respective functions > to shut up the warnings, while leaving the code in place for > compile testing and avoiding ugly #ifdefs. Thanks for the patch. Feel free to add my ack. Acked-by: Shubhrajyoti Datta > > Signed-off-by: Arnd Bergmann > Fixes: d36ccd9f7ea4 ("spi: cadence: Runtime pm adaptation") > ---
Re: [PATCH] spi: cadence: mark pm functions __maybe_unused
On Sun, Apr 17, 2016 at 2:09 AM, Arnd Bergmann wrote: > The newly added runtime PM support for the cadence spi driver > causes harmless warnings when PM is disabled: > > drivers/spi/spi-cadence.c:681:12: warning: 'cnds_runtime_suspend' defined but > not used > drivers/spi/spi-cadence.c:652:12: warning: 'cnds_runtime_resume' defined but > not used > > This adds __maybe_unused annotations to the respective functions > to shut up the warnings, while leaving the code in place for > compile testing and avoiding ugly #ifdefs. Thanks for the patch. Feel free to add my ack. Acked-by: Shubhrajyoti Datta > > Signed-off-by: Arnd Bergmann > Fixes: d36ccd9f7ea4 ("spi: cadence: Runtime pm adaptation") > ---
vgacon.c:undefined reference to `screen_info'
Hi Chen, It's probably a bug fix that unveils the link errors. tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 9d090d01e3efdcdb8f7f07e86fcac91e01ae30cd commit: f69405ce6c0fc9f4a039011007371b31f80b470d openrisc: include: asm: Kbuild: add default "vga.h" date: 2 years, 5 months ago config: openrisc-alldefconfig (attached as .config) reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout f69405ce6c0fc9f4a039011007371b31f80b470d # save the attached .config to linux build tree make.cross ARCH=openrisc All errors (new ones prefixed by >>): drivers/built-in.o: In function `vgacon_save_screen': >> vgacon.c:(.text+0x20e0): undefined reference to `screen_info' vgacon.c:(.text+0x20e8): undefined reference to `screen_info' drivers/built-in.o: In function `vgacon_init': vgacon.c:(.text+0x284c): undefined reference to `screen_info' vgacon.c:(.text+0x2850): undefined reference to `screen_info' drivers/built-in.o: In function `vgacon_startup': vgacon.c:(.text+0x28d8): undefined reference to `screen_info' drivers/built-in.o:vgacon.c:(.text+0x28f0): more undefined references to `screen_info' follow --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: Binary data
vgacon.c:undefined reference to `screen_info'
Hi Chen, It's probably a bug fix that unveils the link errors. tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 9d090d01e3efdcdb8f7f07e86fcac91e01ae30cd commit: f69405ce6c0fc9f4a039011007371b31f80b470d openrisc: include: asm: Kbuild: add default "vga.h" date: 2 years, 5 months ago config: openrisc-alldefconfig (attached as .config) reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout f69405ce6c0fc9f4a039011007371b31f80b470d # save the attached .config to linux build tree make.cross ARCH=openrisc All errors (new ones prefixed by >>): drivers/built-in.o: In function `vgacon_save_screen': >> vgacon.c:(.text+0x20e0): undefined reference to `screen_info' vgacon.c:(.text+0x20e8): undefined reference to `screen_info' drivers/built-in.o: In function `vgacon_init': vgacon.c:(.text+0x284c): undefined reference to `screen_info' vgacon.c:(.text+0x2850): undefined reference to `screen_info' drivers/built-in.o: In function `vgacon_startup': vgacon.c:(.text+0x28d8): undefined reference to `screen_info' drivers/built-in.o:vgacon.c:(.text+0x28f0): more undefined references to `screen_info' follow --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: Binary data
Re: [PATCH 2/2] mountinfo: implement show_path for kernfs and cgroup
On Sun, Apr 17, 2016 at 03:04:32PM -0500, serge.hal...@ubuntu.com wrote: > From: Serge Hallyn> > When showing a cgroupfs entry in mountinfo, show the > path of the mount root dentry relative to the reader's > cgroup namespace root. > > Signed-off-by: Serge Hallyn > --- > fs/kernfs/mount.c | 14 ++ > include/linux/kernfs.h | 2 ++ > kernel/cgroup.c| 35 +++ > 3 files changed, 51 insertions(+) > > diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c > index f73541f..3b78724 100644 > --- a/fs/kernfs/mount.c > +++ b/fs/kernfs/mount.c > @@ -15,6 +15,7 @@ > #include > #include > #include > +#include > > #include "kernfs-internal.h" > > @@ -40,6 +41,18 @@ static int kernfs_sop_show_options(struct seq_file *sf, > struct dentry *dentry) > return 0; > } > > +static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry) > +{ > + struct kernfs_node *node = dentry->d_fsdata; > + struct kernfs_root *root = kernfs_root(node); > + struct kernfs_syscall_ops *scops = root->syscall_ops; > + > + if (scops && scops->show_path) > + return scops->show_path(sf, node, root); > + > + return seq_dentry(sf, dentry, " \t\n\\"); > +} > + > const struct super_operations kernfs_sops = { > .statfs = simple_statfs, > .drop_inode = generic_delete_inode, > @@ -47,6 +60,7 @@ const struct super_operations kernfs_sops = { > > .remount_fs = kernfs_sop_remount_fs, > .show_options = kernfs_sop_show_options, > + .show_path = kernfs_sop_show_path, > }; > > /** > diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h > index c06c442..30f089e 100644 > --- a/include/linux/kernfs.h > +++ b/include/linux/kernfs.h > @@ -152,6 +152,8 @@ struct kernfs_syscall_ops { > int (*rmdir)(struct kernfs_node *kn); > int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent, > const char *new_name); > + int (*show_path)(struct seq_file *sf, struct kernfs_node *kn, > + struct kernfs_root *root); > }; > > struct kernfs_root { > diff --git a/kernel/cgroup.c b/kernel/cgroup.c > index 671dc05..9a0d7b3 100644 > --- a/kernel/cgroup.c > +++ b/kernel/cgroup.c > @@ -1593,6 +1593,40 @@ static int rebind_subsystems(struct cgroup_root > *dst_root, u16 ss_mask) > return 0; > } > > +static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, > + struct kernfs_root *kf_root) > +{ > + int len = 0, ret = 0; > + char *buf = NULL; > + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; > + struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root); > + struct cgroup *ns_cgroup; > + > + mutex_lock(_mutex); Hm, I can't grab the cgroup mutex here because I already have the namespace_sem. But that's required by cset_cgroup_from_root(). Can I just call that under rcu_read_lock() instead? (Not without changing the lockdep_assert_help()). Is there another way to get the info needed here? > + spin_lock_bh(_set_lock); > + ns_cgroup = cset_cgroup_from_root(ns->root_cset, kf_cgroot); > + len = kernfs_path_from_node(kf_node, ns_cgroup->kn, NULL, 0); > + if (len > 0) > + buf = kmalloc(len + 1, GFP_ATOMIC); > + if (buf) > + ret = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, len + > 1); > + > + spin_unlock_bh(_set_lock); > + mutex_unlock(_mutex); > + > + if (len <= 0) > + return len; > + if (!buf) > + return -ENOMEM; > + if (ret == len) { > + seq_escape(sf, buf, " \t\n\\"); > + ret = 0; > + } else if (ret >= 0) > + ret = -EINVAL; > + kfree(buf); > + return ret; > +} > + > static int cgroup_show_options(struct seq_file *seq, > struct kernfs_root *kf_root) > { > @@ -5430,6 +5464,7 @@ static struct kernfs_syscall_ops cgroup_kf_syscall_ops > = { > .mkdir = cgroup_mkdir, > .rmdir = cgroup_rmdir, > .rename = cgroup_rename, > + .show_path = cgroup_show_path, > }; > > static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) > -- > 2.7.4 > > ___ > Containers mailing list > contain...@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/containers
Re: [PATCH 2/2] mountinfo: implement show_path for kernfs and cgroup
On Sun, Apr 17, 2016 at 03:04:32PM -0500, serge.hal...@ubuntu.com wrote: > From: Serge Hallyn > > When showing a cgroupfs entry in mountinfo, show the > path of the mount root dentry relative to the reader's > cgroup namespace root. > > Signed-off-by: Serge Hallyn > --- > fs/kernfs/mount.c | 14 ++ > include/linux/kernfs.h | 2 ++ > kernel/cgroup.c| 35 +++ > 3 files changed, 51 insertions(+) > > diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c > index f73541f..3b78724 100644 > --- a/fs/kernfs/mount.c > +++ b/fs/kernfs/mount.c > @@ -15,6 +15,7 @@ > #include > #include > #include > +#include > > #include "kernfs-internal.h" > > @@ -40,6 +41,18 @@ static int kernfs_sop_show_options(struct seq_file *sf, > struct dentry *dentry) > return 0; > } > > +static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry) > +{ > + struct kernfs_node *node = dentry->d_fsdata; > + struct kernfs_root *root = kernfs_root(node); > + struct kernfs_syscall_ops *scops = root->syscall_ops; > + > + if (scops && scops->show_path) > + return scops->show_path(sf, node, root); > + > + return seq_dentry(sf, dentry, " \t\n\\"); > +} > + > const struct super_operations kernfs_sops = { > .statfs = simple_statfs, > .drop_inode = generic_delete_inode, > @@ -47,6 +60,7 @@ const struct super_operations kernfs_sops = { > > .remount_fs = kernfs_sop_remount_fs, > .show_options = kernfs_sop_show_options, > + .show_path = kernfs_sop_show_path, > }; > > /** > diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h > index c06c442..30f089e 100644 > --- a/include/linux/kernfs.h > +++ b/include/linux/kernfs.h > @@ -152,6 +152,8 @@ struct kernfs_syscall_ops { > int (*rmdir)(struct kernfs_node *kn); > int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent, > const char *new_name); > + int (*show_path)(struct seq_file *sf, struct kernfs_node *kn, > + struct kernfs_root *root); > }; > > struct kernfs_root { > diff --git a/kernel/cgroup.c b/kernel/cgroup.c > index 671dc05..9a0d7b3 100644 > --- a/kernel/cgroup.c > +++ b/kernel/cgroup.c > @@ -1593,6 +1593,40 @@ static int rebind_subsystems(struct cgroup_root > *dst_root, u16 ss_mask) > return 0; > } > > +static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, > + struct kernfs_root *kf_root) > +{ > + int len = 0, ret = 0; > + char *buf = NULL; > + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; > + struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root); > + struct cgroup *ns_cgroup; > + > + mutex_lock(_mutex); Hm, I can't grab the cgroup mutex here because I already have the namespace_sem. But that's required by cset_cgroup_from_root(). Can I just call that under rcu_read_lock() instead? (Not without changing the lockdep_assert_help()). Is there another way to get the info needed here? > + spin_lock_bh(_set_lock); > + ns_cgroup = cset_cgroup_from_root(ns->root_cset, kf_cgroot); > + len = kernfs_path_from_node(kf_node, ns_cgroup->kn, NULL, 0); > + if (len > 0) > + buf = kmalloc(len + 1, GFP_ATOMIC); > + if (buf) > + ret = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, len + > 1); > + > + spin_unlock_bh(_set_lock); > + mutex_unlock(_mutex); > + > + if (len <= 0) > + return len; > + if (!buf) > + return -ENOMEM; > + if (ret == len) { > + seq_escape(sf, buf, " \t\n\\"); > + ret = 0; > + } else if (ret >= 0) > + ret = -EINVAL; > + kfree(buf); > + return ret; > +} > + > static int cgroup_show_options(struct seq_file *seq, > struct kernfs_root *kf_root) > { > @@ -5430,6 +5464,7 @@ static struct kernfs_syscall_ops cgroup_kf_syscall_ops > = { > .mkdir = cgroup_mkdir, > .rmdir = cgroup_rmdir, > .rename = cgroup_rename, > + .show_path = cgroup_show_path, > }; > > static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) > -- > 2.7.4 > > ___ > Containers mailing list > contain...@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/containers
linux-next: build failure after merge of the gpio tree
Hi Linus, After merging the gpio tree, today's linux-next build (x86_64 allmodconfig) failed like this: drivers/gpio/gpio-f7188x.c: In function 'f7188x_gpio_set_single_ended': drivers/gpio/gpio-f7188x.c:331:26: error: implicit declaration of function 'gpio_data_mode' [-Werror=implicit-function-declaration] superio_outb(sio->addr, gpio_data_mode(bank->regbase), data); ^ Caused by commit f90c6bdb690b ("gpio: f7188x: use the new open drain callback") I have used the gpio tree from next-20160415 for today. -- Cheers, Stephen Rothwell
linux-next: build failure after merge of the gpio tree
Hi Linus, After merging the gpio tree, today's linux-next build (x86_64 allmodconfig) failed like this: drivers/gpio/gpio-f7188x.c: In function 'f7188x_gpio_set_single_ended': drivers/gpio/gpio-f7188x.c:331:26: error: implicit declaration of function 'gpio_data_mode' [-Werror=implicit-function-declaration] superio_outb(sio->addr, gpio_data_mode(bank->regbase), data); ^ Caused by commit f90c6bdb690b ("gpio: f7188x: use the new open drain callback") I have used the gpio tree from next-20160415 for today. -- Cheers, Stephen Rothwell
Re: [PATCH] ixgbe: use msleep for long delays
From: Arnd BergmannDate: Sat, 16 Apr 2016 22:35:08 +0200 > The newly added x550em_a support causes a link failure on ARM because of > an overly long time passed into udelay(): > > ERROR: "__bad_udelay" [drivers/net/ethernet/intel/ixgbe/ixgbe.ko] undefined! > > There are multiple variants of the ixgbe_acquire_swfw_sync_*() function, > and the other ones all use msleep(), so we can safely assume that all > callers are allowed to sleep, which makes msleep() a better replacement > than mdelay(). > > Signed-off-by: Arnd Bergmann > Fixes: 49425dfc7451 ("ixgbe: Add support for x550em_a 10G MAC type") I'm assuming Jeff will pick this up.
Re: [PATCH] bpf: avoid warning for wrong pointer cast
From: Arnd BergmannDate: Sat, 16 Apr 2016 22:29:33 +0200 > Two new functions in bpf contain a cast from a 'u64' to a > pointer. This works on 64-bit architectures but causes a warning > on all 32-bit architectures: > > kernel/trace/bpf_trace.c: In function 'bpf_perf_event_output_tp': > kernel/trace/bpf_trace.c:350:13: error: cast to pointer from integer of > different size [-Werror=int-to-pointer-cast] > u64 ctx = *(long *)r1; > > This changes the cast to first convert the u64 argument into a uintptr_t, > which is guaranteed to be the same size as a pointer. > > Signed-off-by: Arnd Bergmann > Fixes: 9940d67c93b5 ("bpf: support bpf_get_stackid() and > bpf_perf_event_output() in tracepoint programs") Applied.
Re: [PATCH] ixgbe: use msleep for long delays
From: Arnd Bergmann Date: Sat, 16 Apr 2016 22:35:08 +0200 > The newly added x550em_a support causes a link failure on ARM because of > an overly long time passed into udelay(): > > ERROR: "__bad_udelay" [drivers/net/ethernet/intel/ixgbe/ixgbe.ko] undefined! > > There are multiple variants of the ixgbe_acquire_swfw_sync_*() function, > and the other ones all use msleep(), so we can safely assume that all > callers are allowed to sleep, which makes msleep() a better replacement > than mdelay(). > > Signed-off-by: Arnd Bergmann > Fixes: 49425dfc7451 ("ixgbe: Add support for x550em_a 10G MAC type") I'm assuming Jeff will pick this up.
Re: [PATCH] bpf: avoid warning for wrong pointer cast
From: Arnd Bergmann Date: Sat, 16 Apr 2016 22:29:33 +0200 > Two new functions in bpf contain a cast from a 'u64' to a > pointer. This works on 64-bit architectures but causes a warning > on all 32-bit architectures: > > kernel/trace/bpf_trace.c: In function 'bpf_perf_event_output_tp': > kernel/trace/bpf_trace.c:350:13: error: cast to pointer from integer of > different size [-Werror=int-to-pointer-cast] > u64 ctx = *(long *)r1; > > This changes the cast to first convert the u64 argument into a uintptr_t, > which is guaranteed to be the same size as a pointer. > > Signed-off-by: Arnd Bergmann > Fixes: 9940d67c93b5 ("bpf: support bpf_get_stackid() and > bpf_perf_event_output() in tracepoint programs") Applied.
include/linux/unaligned/access_ok.h:7:19: error: redefinition of 'get_unaligned_le16'
tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 9d090d01e3efdcdb8f7f07e86fcac91e01ae30cd commit: 3194c6870158e305dac2af52f83681e9cb67280f NFC: nfcmrvl: add firmware download support date: 6 months ago config: ia64-allmodconfig (attached as .config) reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout 3194c6870158e305dac2af52f83681e9cb67280f # save the attached .config to linux build tree make.cross ARCH=ia64 All errors (new ones prefixed by >>): In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0: >> include/linux/unaligned/access_ok.h:7:19: error: redefinition of >> 'get_unaligned_le16' static inline u16 get_unaligned_le16(const void *p) ^ In file included from arch/ia64/include/asm/unaligned.h:4:0, from arch/ia64/include/asm/io.h:22, from arch/ia64/include/asm/smp.h:20, from include/linux/smp.h:59, from include/linux/topology.h:33, from include/linux/gfp.h:8, from include/linux/kmod.h:22, from include/linux/module.h:13, from drivers/nfc/nfcmrvl/fw_dnld.c:19: include/linux/unaligned/le_struct.h:6:19: note: previous definition of 'get_unaligned_le16' was here static inline u16 get_unaligned_le16(const void *p) ^ In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0: >> include/linux/unaligned/access_ok.h:12:19: error: redefinition of >> 'get_unaligned_le32' static inline u32 get_unaligned_le32(const void *p) ^ In file included from arch/ia64/include/asm/unaligned.h:4:0, from arch/ia64/include/asm/io.h:22, from arch/ia64/include/asm/smp.h:20, from include/linux/smp.h:59, from include/linux/topology.h:33, from include/linux/gfp.h:8, from include/linux/kmod.h:22, from include/linux/module.h:13, from drivers/nfc/nfcmrvl/fw_dnld.c:19: include/linux/unaligned/le_struct.h:11:19: note: previous definition of 'get_unaligned_le32' was here static inline u32 get_unaligned_le32(const void *p) ^ In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0: >> include/linux/unaligned/access_ok.h:17:19: error: redefinition of >> 'get_unaligned_le64' static inline u64 get_unaligned_le64(const void *p) ^ In file included from arch/ia64/include/asm/unaligned.h:4:0, from arch/ia64/include/asm/io.h:22, from arch/ia64/include/asm/smp.h:20, from include/linux/smp.h:59, from include/linux/topology.h:33, from include/linux/gfp.h:8, from include/linux/kmod.h:22, from include/linux/module.h:13, from drivers/nfc/nfcmrvl/fw_dnld.c:19: include/linux/unaligned/le_struct.h:16:19: note: previous definition of 'get_unaligned_le64' was here static inline u64 get_unaligned_le64(const void *p) ^ In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0: >> include/linux/unaligned/access_ok.h:22:19: error: redefinition of >> 'get_unaligned_be16' static inline u16 get_unaligned_be16(const void *p) ^ In file included from arch/ia64/include/asm/unaligned.h:5:0, from arch/ia64/include/asm/io.h:22, from arch/ia64/include/asm/smp.h:20, from include/linux/smp.h:59, from include/linux/topology.h:33, from include/linux/gfp.h:8, from include/linux/kmod.h:22, from include/linux/module.h:13, from drivers/nfc/nfcmrvl/fw_dnld.c:19: include/linux/unaligned/be_byteshift.h:40:19: note: previous definition of 'get_unaligned_be16' was here static inline u16 get_unaligned_be16(const void *p) ^ In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0: >> include/linux/unaligned/access_ok.h:27:19: error: redefinition of >> 'get_unaligned_be32' static inline u32 get_unaligned_be32(const void *p) ^ In file included from arch/ia64/include/asm/unaligned.h:5:0, from arch/ia64/include/asm/io.h:22, from arch/ia64/include/asm/smp.h:20, from include/linux/smp.h:59, from include/linux/topology.h:33, from include/linux/gfp.h:8, from include/linux/kmod.h:22, from include/linux/module.h:13, from
include/linux/unaligned/access_ok.h:7:19: error: redefinition of 'get_unaligned_le16'
tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 9d090d01e3efdcdb8f7f07e86fcac91e01ae30cd commit: 3194c6870158e305dac2af52f83681e9cb67280f NFC: nfcmrvl: add firmware download support date: 6 months ago config: ia64-allmodconfig (attached as .config) reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout 3194c6870158e305dac2af52f83681e9cb67280f # save the attached .config to linux build tree make.cross ARCH=ia64 All errors (new ones prefixed by >>): In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0: >> include/linux/unaligned/access_ok.h:7:19: error: redefinition of >> 'get_unaligned_le16' static inline u16 get_unaligned_le16(const void *p) ^ In file included from arch/ia64/include/asm/unaligned.h:4:0, from arch/ia64/include/asm/io.h:22, from arch/ia64/include/asm/smp.h:20, from include/linux/smp.h:59, from include/linux/topology.h:33, from include/linux/gfp.h:8, from include/linux/kmod.h:22, from include/linux/module.h:13, from drivers/nfc/nfcmrvl/fw_dnld.c:19: include/linux/unaligned/le_struct.h:6:19: note: previous definition of 'get_unaligned_le16' was here static inline u16 get_unaligned_le16(const void *p) ^ In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0: >> include/linux/unaligned/access_ok.h:12:19: error: redefinition of >> 'get_unaligned_le32' static inline u32 get_unaligned_le32(const void *p) ^ In file included from arch/ia64/include/asm/unaligned.h:4:0, from arch/ia64/include/asm/io.h:22, from arch/ia64/include/asm/smp.h:20, from include/linux/smp.h:59, from include/linux/topology.h:33, from include/linux/gfp.h:8, from include/linux/kmod.h:22, from include/linux/module.h:13, from drivers/nfc/nfcmrvl/fw_dnld.c:19: include/linux/unaligned/le_struct.h:11:19: note: previous definition of 'get_unaligned_le32' was here static inline u32 get_unaligned_le32(const void *p) ^ In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0: >> include/linux/unaligned/access_ok.h:17:19: error: redefinition of >> 'get_unaligned_le64' static inline u64 get_unaligned_le64(const void *p) ^ In file included from arch/ia64/include/asm/unaligned.h:4:0, from arch/ia64/include/asm/io.h:22, from arch/ia64/include/asm/smp.h:20, from include/linux/smp.h:59, from include/linux/topology.h:33, from include/linux/gfp.h:8, from include/linux/kmod.h:22, from include/linux/module.h:13, from drivers/nfc/nfcmrvl/fw_dnld.c:19: include/linux/unaligned/le_struct.h:16:19: note: previous definition of 'get_unaligned_le64' was here static inline u64 get_unaligned_le64(const void *p) ^ In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0: >> include/linux/unaligned/access_ok.h:22:19: error: redefinition of >> 'get_unaligned_be16' static inline u16 get_unaligned_be16(const void *p) ^ In file included from arch/ia64/include/asm/unaligned.h:5:0, from arch/ia64/include/asm/io.h:22, from arch/ia64/include/asm/smp.h:20, from include/linux/smp.h:59, from include/linux/topology.h:33, from include/linux/gfp.h:8, from include/linux/kmod.h:22, from include/linux/module.h:13, from drivers/nfc/nfcmrvl/fw_dnld.c:19: include/linux/unaligned/be_byteshift.h:40:19: note: previous definition of 'get_unaligned_be16' was here static inline u16 get_unaligned_be16(const void *p) ^ In file included from drivers/nfc/nfcmrvl/fw_dnld.c:20:0: >> include/linux/unaligned/access_ok.h:27:19: error: redefinition of >> 'get_unaligned_be32' static inline u32 get_unaligned_be32(const void *p) ^ In file included from arch/ia64/include/asm/unaligned.h:5:0, from arch/ia64/include/asm/io.h:22, from arch/ia64/include/asm/smp.h:20, from include/linux/smp.h:59, from include/linux/topology.h:33, from include/linux/gfp.h:8, from include/linux/kmod.h:22, from include/linux/module.h:13, from
Re: [PATCH V2] net: ethernet: mellanox: correct page conversion
From: Sinan KayaDate: Sat, 16 Apr 2016 18:23:32 -0400 > Current code is assuming that the address returned by dma_alloc_coherent > is a logical address. This is not true on ARM/ARM64 systems. This patch > replaces dma_alloc_coherent with dma_map_page API. The address returned > can later by virtually mapped from the CPU side with vmap API. > > Signed-off-by: Sinan Kaya You can't do this. The DMA map page API gives non-coherent mappings, and thus requires proper flushing. So a straight conversion like this is never legitimate.
Re: [PATCH V2] net: ethernet: mellanox: correct page conversion
From: Sinan Kaya Date: Sat, 16 Apr 2016 18:23:32 -0400 > Current code is assuming that the address returned by dma_alloc_coherent > is a logical address. This is not true on ARM/ARM64 systems. This patch > replaces dma_alloc_coherent with dma_map_page API. The address returned > can later by virtually mapped from the CPU side with vmap API. > > Signed-off-by: Sinan Kaya You can't do this. The DMA map page API gives non-coherent mappings, and thus requires proper flushing. So a straight conversion like this is never legitimate.
Re: [PATCH 3/7] [Media] vcodec: mediatek: Add Mediatek V4L2 Video Decoder Driver
Hi Hans, On Fri, 2016-04-15 at 16:27 +0200, Hans Verkuil wrote: > On 04/13/2016 02:01 PM, Tiffany Lin wrote: > > Add v4l2 layer decoder driver for MT8173 > > > > Signed-off-by: Tiffany Lin> > --- > > drivers/media/platform/mtk-vcodec/Makefile | 10 +- > > drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c | 1429 > > > > drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.h | 81 ++ > > .../media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c | 469 +++ > > .../media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c | 153 +++ > > .../media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h | 28 + > > drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h | 98 +- > > drivers/media/platform/mtk-vcodec/vdec_drv_base.h | 56 + > > drivers/media/platform/mtk-vcodec/vdec_drv_if.c| 113 ++ > > drivers/media/platform/mtk-vcodec/vdec_drv_if.h| 93 ++ > > drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h | 86 ++ > > 11 files changed, 2596 insertions(+), 20 deletions(-) > > create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c > > create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.h > > create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c > > create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c > > create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h > > create mode 100644 drivers/media/platform/mtk-vcodec/vdec_drv_base.h > > create mode 100644 drivers/media/platform/mtk-vcodec/vdec_drv_if.c > > create mode 100644 drivers/media/platform/mtk-vcodec/vdec_drv_if.h > > create mode 100644 drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h > > > > diff --git a/drivers/media/platform/mtk-vcodec/Makefile > > b/drivers/media/platform/mtk-vcodec/Makefile > > index dc5cb00..4c8ed2f 100644 > > --- a/drivers/media/platform/mtk-vcodec/Makefile > > +++ b/drivers/media/platform/mtk-vcodec/Makefile > > @@ -1,7 +1,13 @@ > > > > > > -obj-$(CONFIG_VIDEO_MEDIATEK_VCODEC) += mtk-vcodec-enc.o mtk-vcodec-common.o > > - > > +obj-$(CONFIG_VIDEO_MEDIATEK_VCODEC) += mtk-vcodec-dec.o \ > > + mtk-vcodec-enc.o \ > > + mtk-vcodec-common.o > > + > > +mtk-vcodec-dec-y := mtk_vcodec_dec_drv.o \ > > + vdec_drv_if.o \ > > + mtk_vcodec_dec.o \ > > + mtk_vcodec_dec_pm.o \ > > > > > > mtk-vcodec-enc-y := venc/venc_vp8_if.o \ > > diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c > > b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c > > new file mode 100644 > > index 000..0499413 > > --- /dev/null > > +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c > > @@ -0,0 +1,1429 @@ > > +/* > > +* Copyright (c) 2016 MediaTek Inc. > > +* Author: PC Chen > > +* Tiffany Lin > > +* > > +* This program is free software; you can redistribute it and/or modify > > +* it under the terms of the GNU General Public License version 2 as > > +* published by the Free Software Foundation. > > +* > > +* This program is distributed in the hope that it will be useful, > > +* but WITHOUT ANY WARRANTY; without even the implied warranty of > > +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > +* GNU General Public License for more details. > > +*/ > > + > > +#include > > +#include > > +#include > > + > > +#include "mtk_vcodec_drv.h" > > +#include "mtk_vcodec_dec.h" > > +#include "mtk_vcodec_intr.h" > > +#include "mtk_vcodec_util.h" > > +#include "vdec_drv_if.h" > > +#include "mtk_vcodec_dec_pm.h" > > + > > +static struct mtk_video_fmt mtk_video_formats[] = { > > + { > > + .fourcc = V4L2_PIX_FMT_H264, > > + .type = MTK_FMT_DEC, > > + .num_planes = 1, > > + }, > > + { > > + .fourcc = V4L2_PIX_FMT_VP8, > > + .type = MTK_FMT_DEC, > > + .num_planes = 1, > > + }, > > + { > > + .fourcc = V4L2_PIX_FMT_VP9, > > + .type = MTK_FMT_DEC, > > + .num_planes = 1, > > + }, > > + { > > + .fourcc = V4L2_PIX_FMT_MT21, > > + .type = MTK_FMT_FRAME, > > + .num_planes = 2, > > + }, > > +}; > > +#define OUT_FMT_IDX0 > > +#define CAP_FMT_IDX3 > > + > > +#define VCODEC_CAPABILITY_4K_DISABLED 0x10 > > +#define VCODEC_DEC_4K_CODED_WIDTH 4096U > > +#define VCODEC_DEC_4K_CODED_HEIGHT 2304U > > + > > +#define MTK_VDEC_MIN_W 64U > > +#define MTK_VDEC_MIN_H 64U > > +#define MTK_VDEC_MAX_W 2048U > > +#define MTK_VDEC_MAX_H 1088U > > +#define DFT_CFG_WIDTH MTK_VDEC_MIN_W > > +#define DFT_CFG_HEIGHT MTK_VDEC_MIN_H > > + > > +static const struct mtk_codec_framesizes mtk_vdec_framesizes[] = { > > + { > > + .fourcc = V4L2_PIX_FMT_H264, > > + .stepwise = { MTK_VDEC_MIN_W, MTK_VDEC_MAX_W, 16, > > +
Re: [PATCH 3/7] [Media] vcodec: mediatek: Add Mediatek V4L2 Video Decoder Driver
Hi Hans, On Fri, 2016-04-15 at 16:27 +0200, Hans Verkuil wrote: > On 04/13/2016 02:01 PM, Tiffany Lin wrote: > > Add v4l2 layer decoder driver for MT8173 > > > > Signed-off-by: Tiffany Lin > > --- > > drivers/media/platform/mtk-vcodec/Makefile | 10 +- > > drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c | 1429 > > > > drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.h | 81 ++ > > .../media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c | 469 +++ > > .../media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c | 153 +++ > > .../media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h | 28 + > > drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h | 98 +- > > drivers/media/platform/mtk-vcodec/vdec_drv_base.h | 56 + > > drivers/media/platform/mtk-vcodec/vdec_drv_if.c| 113 ++ > > drivers/media/platform/mtk-vcodec/vdec_drv_if.h| 93 ++ > > drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h | 86 ++ > > 11 files changed, 2596 insertions(+), 20 deletions(-) > > create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c > > create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.h > > create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c > > create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c > > create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h > > create mode 100644 drivers/media/platform/mtk-vcodec/vdec_drv_base.h > > create mode 100644 drivers/media/platform/mtk-vcodec/vdec_drv_if.c > > create mode 100644 drivers/media/platform/mtk-vcodec/vdec_drv_if.h > > create mode 100644 drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h > > > > diff --git a/drivers/media/platform/mtk-vcodec/Makefile > > b/drivers/media/platform/mtk-vcodec/Makefile > > index dc5cb00..4c8ed2f 100644 > > --- a/drivers/media/platform/mtk-vcodec/Makefile > > +++ b/drivers/media/platform/mtk-vcodec/Makefile > > @@ -1,7 +1,13 @@ > > > > > > -obj-$(CONFIG_VIDEO_MEDIATEK_VCODEC) += mtk-vcodec-enc.o mtk-vcodec-common.o > > - > > +obj-$(CONFIG_VIDEO_MEDIATEK_VCODEC) += mtk-vcodec-dec.o \ > > + mtk-vcodec-enc.o \ > > + mtk-vcodec-common.o > > + > > +mtk-vcodec-dec-y := mtk_vcodec_dec_drv.o \ > > + vdec_drv_if.o \ > > + mtk_vcodec_dec.o \ > > + mtk_vcodec_dec_pm.o \ > > > > > > mtk-vcodec-enc-y := venc/venc_vp8_if.o \ > > diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c > > b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c > > new file mode 100644 > > index 000..0499413 > > --- /dev/null > > +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c > > @@ -0,0 +1,1429 @@ > > +/* > > +* Copyright (c) 2016 MediaTek Inc. > > +* Author: PC Chen > > +* Tiffany Lin > > +* > > +* This program is free software; you can redistribute it and/or modify > > +* it under the terms of the GNU General Public License version 2 as > > +* published by the Free Software Foundation. > > +* > > +* This program is distributed in the hope that it will be useful, > > +* but WITHOUT ANY WARRANTY; without even the implied warranty of > > +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > +* GNU General Public License for more details. > > +*/ > > + > > +#include > > +#include > > +#include > > + > > +#include "mtk_vcodec_drv.h" > > +#include "mtk_vcodec_dec.h" > > +#include "mtk_vcodec_intr.h" > > +#include "mtk_vcodec_util.h" > > +#include "vdec_drv_if.h" > > +#include "mtk_vcodec_dec_pm.h" > > + > > +static struct mtk_video_fmt mtk_video_formats[] = { > > + { > > + .fourcc = V4L2_PIX_FMT_H264, > > + .type = MTK_FMT_DEC, > > + .num_planes = 1, > > + }, > > + { > > + .fourcc = V4L2_PIX_FMT_VP8, > > + .type = MTK_FMT_DEC, > > + .num_planes = 1, > > + }, > > + { > > + .fourcc = V4L2_PIX_FMT_VP9, > > + .type = MTK_FMT_DEC, > > + .num_planes = 1, > > + }, > > + { > > + .fourcc = V4L2_PIX_FMT_MT21, > > + .type = MTK_FMT_FRAME, > > + .num_planes = 2, > > + }, > > +}; > > +#define OUT_FMT_IDX0 > > +#define CAP_FMT_IDX3 > > + > > +#define VCODEC_CAPABILITY_4K_DISABLED 0x10 > > +#define VCODEC_DEC_4K_CODED_WIDTH 4096U > > +#define VCODEC_DEC_4K_CODED_HEIGHT 2304U > > + > > +#define MTK_VDEC_MIN_W 64U > > +#define MTK_VDEC_MIN_H 64U > > +#define MTK_VDEC_MAX_W 2048U > > +#define MTK_VDEC_MAX_H 1088U > > +#define DFT_CFG_WIDTH MTK_VDEC_MIN_W > > +#define DFT_CFG_HEIGHT MTK_VDEC_MIN_H > > + > > +static const struct mtk_codec_framesizes mtk_vdec_framesizes[] = { > > + { > > + .fourcc = V4L2_PIX_FMT_H264, > > + .stepwise = { MTK_VDEC_MIN_W, MTK_VDEC_MAX_W, 16, > > + MTK_VDEC_MIN_H, MTK_VDEC_MAX_H, 16 }, > > + }, > > + { > > + .fourcc =
Re: [PATCH] mmc/sdio: utilize runtime PM to speed up SDIO card's resume process
Any comments are welcome. Thanks, Zhonghui On 4/13/2016 2:42 PM, Fu, Zhonghui wrote: > Leave some work of SDIO card's resume process into it's runtime resume > process to shorten system resume latency. > > Signed-off-by: Zhonghui Fu> --- > drivers/mmc/core/sdio.c | 21 ++--- > include/linux/mmc/host.h |9 ++--- > 2 files changed, 16 insertions(+), 14 deletions(-) > > diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c > index bd44ba8..fe5d3c5 100644 > --- a/drivers/mmc/core/sdio.c > +++ b/drivers/mmc/core/sdio.c > @@ -902,6 +902,10 @@ static int mmc_sdio_suspend(struct mmc_host *host) > > if (!mmc_card_keep_power(host)) { > mmc_power_off(host); > + if (host->caps & MMC_CAP_POWER_OFF_CARD) { > + pm_runtime_disable(>card->dev); > + pm_runtime_set_suspended(>card->dev); > + } > } else if (host->retune_period) { > mmc_retune_timer_stop(host); > mmc_retune_needed(host); > @@ -924,18 +928,16 @@ static int mmc_sdio_resume(struct mmc_host *host) > > /* Restore power if needed */ > if (!mmc_card_keep_power(host)) { > - mmc_power_up(host, host->card->ocr); > /* > - * Tell runtime PM core we just powered up the card, > - * since it still believes the card is powered off. >* Note that currently runtime PM is only enabled >* for SDIO cards that are MMC_CAP_POWER_OFF_CARD >*/ > if (host->caps & MMC_CAP_POWER_OFF_CARD) { > - pm_runtime_disable(>card->dev); > - pm_runtime_set_active(>card->dev); > pm_runtime_enable(>card->dev); > + goto out; > } > + > + mmc_power_up(host, host->card->ocr); > } > > /* No need to reinitialize powered-resumed nonremovable cards */ > @@ -953,13 +955,10 @@ static int mmc_sdio_resume(struct mmc_host *host) > err = sdio_enable_4bit_bus(host->card); > } > > - if (!err && host->sdio_irqs) { > - if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) > - wake_up_process(host->sdio_irq_thread); > - else if (host->caps & MMC_CAP_SDIO_IRQ) > - host->ops->enable_sdio_irq(host, 1); > - } > + if (!err && host->sdio_irqs) > + mmc_signal_sdio_irq(host); > > +out: > mmc_release_host(host); > > host->pm_flags &= ~MMC_PM_KEEP_POWER; > diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h > index 8dd4d29..8faaa5b 100644 > --- a/include/linux/mmc/host.h > +++ b/include/linux/mmc/host.h > @@ -400,10 +400,13 @@ void mmc_request_done(struct mmc_host *, struct > mmc_request *); > > static inline void mmc_signal_sdio_irq(struct mmc_host *host) > { > - host->ops->enable_sdio_irq(host, 0); > - host->sdio_irq_pending = true; > - if (host->sdio_irq_thread) > + if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) { > + host->ops->enable_sdio_irq(host, 0); > + host->sdio_irq_pending = true; > wake_up_process(host->sdio_irq_thread); > + } else if (host->caps & MMC_CAP_SDIO_IRQ) { > + host->ops->enable_sdio_irq(host, 1); > + } > } > > void sdio_run_irqs(struct mmc_host *host); > -- 1.7.1 >
Re: [PATCH] mmc/sdio: utilize runtime PM to speed up SDIO card's resume process
Any comments are welcome. Thanks, Zhonghui On 4/13/2016 2:42 PM, Fu, Zhonghui wrote: > Leave some work of SDIO card's resume process into it's runtime resume > process to shorten system resume latency. > > Signed-off-by: Zhonghui Fu > --- > drivers/mmc/core/sdio.c | 21 ++--- > include/linux/mmc/host.h |9 ++--- > 2 files changed, 16 insertions(+), 14 deletions(-) > > diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c > index bd44ba8..fe5d3c5 100644 > --- a/drivers/mmc/core/sdio.c > +++ b/drivers/mmc/core/sdio.c > @@ -902,6 +902,10 @@ static int mmc_sdio_suspend(struct mmc_host *host) > > if (!mmc_card_keep_power(host)) { > mmc_power_off(host); > + if (host->caps & MMC_CAP_POWER_OFF_CARD) { > + pm_runtime_disable(>card->dev); > + pm_runtime_set_suspended(>card->dev); > + } > } else if (host->retune_period) { > mmc_retune_timer_stop(host); > mmc_retune_needed(host); > @@ -924,18 +928,16 @@ static int mmc_sdio_resume(struct mmc_host *host) > > /* Restore power if needed */ > if (!mmc_card_keep_power(host)) { > - mmc_power_up(host, host->card->ocr); > /* > - * Tell runtime PM core we just powered up the card, > - * since it still believes the card is powered off. >* Note that currently runtime PM is only enabled >* for SDIO cards that are MMC_CAP_POWER_OFF_CARD >*/ > if (host->caps & MMC_CAP_POWER_OFF_CARD) { > - pm_runtime_disable(>card->dev); > - pm_runtime_set_active(>card->dev); > pm_runtime_enable(>card->dev); > + goto out; > } > + > + mmc_power_up(host, host->card->ocr); > } > > /* No need to reinitialize powered-resumed nonremovable cards */ > @@ -953,13 +955,10 @@ static int mmc_sdio_resume(struct mmc_host *host) > err = sdio_enable_4bit_bus(host->card); > } > > - if (!err && host->sdio_irqs) { > - if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) > - wake_up_process(host->sdio_irq_thread); > - else if (host->caps & MMC_CAP_SDIO_IRQ) > - host->ops->enable_sdio_irq(host, 1); > - } > + if (!err && host->sdio_irqs) > + mmc_signal_sdio_irq(host); > > +out: > mmc_release_host(host); > > host->pm_flags &= ~MMC_PM_KEEP_POWER; > diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h > index 8dd4d29..8faaa5b 100644 > --- a/include/linux/mmc/host.h > +++ b/include/linux/mmc/host.h > @@ -400,10 +400,13 @@ void mmc_request_done(struct mmc_host *, struct > mmc_request *); > > static inline void mmc_signal_sdio_irq(struct mmc_host *host) > { > - host->ops->enable_sdio_irq(host, 0); > - host->sdio_irq_pending = true; > - if (host->sdio_irq_thread) > + if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) { > + host->ops->enable_sdio_irq(host, 0); > + host->sdio_irq_pending = true; > wake_up_process(host->sdio_irq_thread); > + } else if (host->caps & MMC_CAP_SDIO_IRQ) { > + host->ops->enable_sdio_irq(host, 1); > + } > } > > void sdio_run_irqs(struct mmc_host *host); > -- 1.7.1 >