Re: [RFC v3 4/5] drm/mediatek: add support for Mediatek SoC MT2701

2016-06-12 Thread CK Hu
Hi, YT:

Some comments inline.

On Thu, 2016-06-09 at 00:03 +0800, YT Shen wrote:
> This patch add support for the Mediatek MT2701 DISP subsystem.
> There is only one OVL engine in MT2701.
> 
> Signed-off-by: YT Shen 
> ---
>  drivers/gpu/drm/mediatek/mtk_disp_ovl.c |6 
>  drivers/gpu/drm/mediatek/mtk_disp_rdma.c|6 
>  drivers/gpu/drm/mediatek/mtk_drm_ddp.c  |   42 
> +++
>  drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c |8 +
>  drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h |2 ++
>  drivers/gpu/drm/mediatek/mtk_drm_drv.c  |   31 
>  6 files changed, 95 insertions(+)
> 
> diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c 
> b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
> index eb5c05e..1da0a71 100644
> --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
> +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
> @@ -286,11 +286,17 @@ static int mtk_disp_ovl_remove(struct platform_device 
> *pdev)
>   return 0;
>  }
>  
> +static const struct mtk_ddp_comp_driver_data mt2701_ovl_driver_data = {
> + .ovl = {0x0040, 1 << 12, 0}
> +};
> +
>  static const struct mtk_ddp_comp_driver_data mt8173_ovl_driver_data = {
>   .ovl = {0x0f40, 0, 1 << 12}
>  };
>  
>  static const struct of_device_id mtk_disp_ovl_driver_dt_match[] = {
> + { .compatible = "mediatek,mt2701-disp-ovl",
> +   .data = _ovl_driver_data},
>   { .compatible = "mediatek,mt8173-disp-ovl",
> .data = _ovl_driver_data},
>   {},
> diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c 
> b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
> index fb0db50..506a353 100644
> --- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
> +++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
> @@ -225,11 +225,17 @@ static int mtk_disp_rdma_remove(struct platform_device 
> *pdev)
>   return 0;
>  }
>  
> +static const struct mtk_ddp_comp_driver_data mt2701_rdma_driver_data = {
> + .rdma_fifo_pseudo_size = SZ_4K,
> +};
> +
>  static const struct mtk_ddp_comp_driver_data mt8173_rdma_driver_data = {
>   .rdma_fifo_pseudo_size = SZ_8K,
>  };
>  
>  static const struct of_device_id mtk_disp_rdma_driver_dt_match[] = {
> + { .compatible = "mediatek,mt2701-disp-rdma",
> +   .data = _rdma_driver_data},
>   { .compatible = "mediatek,mt8173-disp-rdma",
> .data = _rdma_driver_data},
>   {},
> diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c 
> b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c
> index fa53806..7ab6986 100644
> --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c
> +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c
> @@ -32,6 +32,10 @@
>  #define DISP_REG_CONFIG_DISP_RDMA1_MOUT_EN   0x0c8
>  #define DISP_REG_CONFIG_MMSYS_CG_CON00x100
>  
> +#define DISP_REG_CONFIG_DISP_OVL_MOUT_EN 0x030
> +#define DISP_REG_CONFIG_OUT_SEL  0x04c
> +#define DISP_REG_CONFIG_DSI_SEL  0x050

Align the digital value.

> +
>  #define DISP_REG_MUTEX_EN(n) (0x20 + 0x20 * (n))
>  #define DISP_REG_MUTEX(n)(0x24 + 0x20 * (n))
>  #define DISP_REG_MUTEX_RST(n)(0x28 + 0x20 * (n))
> @@ -54,6 +58,13 @@
>  #define MT8173_MUTEX_MOD_DISP_PWM1   BIT(24)
>  #define MT8173_MUTEX_MOD_DISP_OD BIT(25)
>  
> +#define MT2701_MUTEX_MOD_DISP_OVLBIT(3)
> +#define MT2701_MUTEX_MOD_DISP_WDMA   BIT(6)
> +#define MT2701_MUTEX_MOD_DISP_COLOR  BIT(7)
> +#define MT2701_MUTEX_MOD_DISP_BLSBIT(9)
> +#define MT2701_MUTEX_MOD_DISP_RDMA0  BIT(10)
> +#define MT2701_MUTEX_MOD_DISP_RDMA1  BIT(12)
> +
>  #define MUTEX_SOF_SINGLE_MODE0
>  #define MUTEX_SOF_DSI0   1
>  #define MUTEX_SOF_DSI1   2
> @@ -69,6 +80,10 @@
>  #define DPI0_SEL_IN_RDMA10x1
>  #define COLOR1_SEL_IN_OVL1   0x1
>  
> +#define OVL_MOUT_EN_RDMA 0x1
> +#define BLS_TO_DSI_RDMA1_TO_DPI1 0x8
> +#define DSI_SEL_IN_BLS   0x0
> +
>  struct mtk_disp_mutex {
>   int id;
>   bool claimed;
> @@ -82,6 +97,15 @@ struct mtk_ddp {
>   const unsigned int  *mutex_mod;
>  };
>  
> +static const unsigned int mt2701_mutex_mod[DDP_COMPONENT_ID_MAX] = {
> + [DDP_COMPONENT_BLS] = MT2701_MUTEX_MOD_DISP_BLS,
> + [DDP_COMPONENT_COLOR0] = MT2701_MUTEX_MOD_DISP_COLOR,
> + [DDP_COMPONENT_OVL0] = MT2701_MUTEX_MOD_DISP_OVL,
> + [DDP_COMPONENT_RDMA0] = MT2701_MUTEX_MOD_DISP_RDMA0,
> + [DDP_COMPONENT_RDMA1] = MT2701_MUTEX_MOD_DISP_RDMA1,
> + [DDP_COMPONENT_WDMA0] = MT2701_MUTEX_MOD_DISP_WDMA,
> +};
> +
>  static const unsigned int mt8173_mutex_mod[DDP_COMPONENT_ID_MAX] = {
>   [DDP_COMPONENT_AAL] = MT8173_MUTEX_MOD_DISP_AAL,
>   [DDP_COMPONENT_COLOR0] = MT8173_MUTEX_MOD_DISP_COLOR0,
> @@ -109,6 +133,9 @@ static unsigned int mtk_ddp_mout_en(enum mtk_ddp_comp_id 
> cur,
>   if (cur == DDP_COMPONENT_OVL0 && next == DDP_COMPONENT_COLOR0) {
>   *addr = 

Re: [RFC v3 4/5] drm/mediatek: add support for Mediatek SoC MT2701

2016-06-12 Thread CK Hu
Hi, YT:

Some comments inline.

On Thu, 2016-06-09 at 00:03 +0800, YT Shen wrote:
> This patch add support for the Mediatek MT2701 DISP subsystem.
> There is only one OVL engine in MT2701.
> 
> Signed-off-by: YT Shen 
> ---
>  drivers/gpu/drm/mediatek/mtk_disp_ovl.c |6 
>  drivers/gpu/drm/mediatek/mtk_disp_rdma.c|6 
>  drivers/gpu/drm/mediatek/mtk_drm_ddp.c  |   42 
> +++
>  drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c |8 +
>  drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h |2 ++
>  drivers/gpu/drm/mediatek/mtk_drm_drv.c  |   31 
>  6 files changed, 95 insertions(+)
> 
> diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c 
> b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
> index eb5c05e..1da0a71 100644
> --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
> +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
> @@ -286,11 +286,17 @@ static int mtk_disp_ovl_remove(struct platform_device 
> *pdev)
>   return 0;
>  }
>  
> +static const struct mtk_ddp_comp_driver_data mt2701_ovl_driver_data = {
> + .ovl = {0x0040, 1 << 12, 0}
> +};
> +
>  static const struct mtk_ddp_comp_driver_data mt8173_ovl_driver_data = {
>   .ovl = {0x0f40, 0, 1 << 12}
>  };
>  
>  static const struct of_device_id mtk_disp_ovl_driver_dt_match[] = {
> + { .compatible = "mediatek,mt2701-disp-ovl",
> +   .data = _ovl_driver_data},
>   { .compatible = "mediatek,mt8173-disp-ovl",
> .data = _ovl_driver_data},
>   {},
> diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c 
> b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
> index fb0db50..506a353 100644
> --- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
> +++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
> @@ -225,11 +225,17 @@ static int mtk_disp_rdma_remove(struct platform_device 
> *pdev)
>   return 0;
>  }
>  
> +static const struct mtk_ddp_comp_driver_data mt2701_rdma_driver_data = {
> + .rdma_fifo_pseudo_size = SZ_4K,
> +};
> +
>  static const struct mtk_ddp_comp_driver_data mt8173_rdma_driver_data = {
>   .rdma_fifo_pseudo_size = SZ_8K,
>  };
>  
>  static const struct of_device_id mtk_disp_rdma_driver_dt_match[] = {
> + { .compatible = "mediatek,mt2701-disp-rdma",
> +   .data = _rdma_driver_data},
>   { .compatible = "mediatek,mt8173-disp-rdma",
> .data = _rdma_driver_data},
>   {},
> diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c 
> b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c
> index fa53806..7ab6986 100644
> --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c
> +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c
> @@ -32,6 +32,10 @@
>  #define DISP_REG_CONFIG_DISP_RDMA1_MOUT_EN   0x0c8
>  #define DISP_REG_CONFIG_MMSYS_CG_CON00x100
>  
> +#define DISP_REG_CONFIG_DISP_OVL_MOUT_EN 0x030
> +#define DISP_REG_CONFIG_OUT_SEL  0x04c
> +#define DISP_REG_CONFIG_DSI_SEL  0x050

Align the digital value.

> +
>  #define DISP_REG_MUTEX_EN(n) (0x20 + 0x20 * (n))
>  #define DISP_REG_MUTEX(n)(0x24 + 0x20 * (n))
>  #define DISP_REG_MUTEX_RST(n)(0x28 + 0x20 * (n))
> @@ -54,6 +58,13 @@
>  #define MT8173_MUTEX_MOD_DISP_PWM1   BIT(24)
>  #define MT8173_MUTEX_MOD_DISP_OD BIT(25)
>  
> +#define MT2701_MUTEX_MOD_DISP_OVLBIT(3)
> +#define MT2701_MUTEX_MOD_DISP_WDMA   BIT(6)
> +#define MT2701_MUTEX_MOD_DISP_COLOR  BIT(7)
> +#define MT2701_MUTEX_MOD_DISP_BLSBIT(9)
> +#define MT2701_MUTEX_MOD_DISP_RDMA0  BIT(10)
> +#define MT2701_MUTEX_MOD_DISP_RDMA1  BIT(12)
> +
>  #define MUTEX_SOF_SINGLE_MODE0
>  #define MUTEX_SOF_DSI0   1
>  #define MUTEX_SOF_DSI1   2
> @@ -69,6 +80,10 @@
>  #define DPI0_SEL_IN_RDMA10x1
>  #define COLOR1_SEL_IN_OVL1   0x1
>  
> +#define OVL_MOUT_EN_RDMA 0x1
> +#define BLS_TO_DSI_RDMA1_TO_DPI1 0x8
> +#define DSI_SEL_IN_BLS   0x0
> +
>  struct mtk_disp_mutex {
>   int id;
>   bool claimed;
> @@ -82,6 +97,15 @@ struct mtk_ddp {
>   const unsigned int  *mutex_mod;
>  };
>  
> +static const unsigned int mt2701_mutex_mod[DDP_COMPONENT_ID_MAX] = {
> + [DDP_COMPONENT_BLS] = MT2701_MUTEX_MOD_DISP_BLS,
> + [DDP_COMPONENT_COLOR0] = MT2701_MUTEX_MOD_DISP_COLOR,
> + [DDP_COMPONENT_OVL0] = MT2701_MUTEX_MOD_DISP_OVL,
> + [DDP_COMPONENT_RDMA0] = MT2701_MUTEX_MOD_DISP_RDMA0,
> + [DDP_COMPONENT_RDMA1] = MT2701_MUTEX_MOD_DISP_RDMA1,
> + [DDP_COMPONENT_WDMA0] = MT2701_MUTEX_MOD_DISP_WDMA,
> +};
> +
>  static const unsigned int mt8173_mutex_mod[DDP_COMPONENT_ID_MAX] = {
>   [DDP_COMPONENT_AAL] = MT8173_MUTEX_MOD_DISP_AAL,
>   [DDP_COMPONENT_COLOR0] = MT8173_MUTEX_MOD_DISP_COLOR0,
> @@ -109,6 +133,9 @@ static unsigned int mtk_ddp_mout_en(enum mtk_ddp_comp_id 
> cur,
>   if (cur == DDP_COMPONENT_OVL0 && next == DDP_COMPONENT_COLOR0) {
>   *addr = 

[RESEND PATCH 2/3] i.MX: system.c: Remove redundant errata 752271 code

2016-06-12 Thread Andrey Smirnov
Applying a fix for ARM errata 752271 would already be taken care by a
call to a 'fixup' hook as a part of l2x0_of_init() -> __l2c_init() call
chain. Moreso the code in 'fixup' function would do that based on the
PL310's revsion information, whereas removed code does so based on SoC
version which does not work very well on i.MX6Q+ which identifies itself
as i.MX6Q, but is not affected by 752271.

Signed-off-by: Andrey Smirnov 
---

RESEND, now with Russell King on CC list.

 arch/arm/mach-imx/system.c | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c
index d9f8b0e..b153376 100644
--- a/arch/arm/mach-imx/system.c
+++ b/arch/arm/mach-imx/system.c
@@ -110,17 +110,6 @@ void __init imx_init_l2cache(void)
/* Configure the L2 PREFETCH and POWER registers */
val = readl_relaxed(l2x0_base + L310_PREFETCH_CTRL);
val |= 0x7080;
-   /*
-* The L2 cache controller(PL310) version on the i.MX6D/Q is 
r3p1-50rel0
-* The L2 cache controller(PL310) version on the 
i.MX6DL/SOLO/SL is r3p2
-* But according to ARM PL310 errata: 752271
-* ID: 752271: Double linefill feature can cause data corruption
-* Fault Status: Present in: r3p0, r3p1, r3p1-50rel0. Fixed in 
r3p2
-* Workaround: The only workaround to this erratum is to 
disable the
-* double linefill feature. This is the default behavior.
-*/
-   if (cpu_is_imx6q())
-   val &= ~(1 << 30 | 1 << 23);
writel_relaxed(val, l2x0_base + L310_PREFETCH_CTRL);
}
 
-- 
2.5.5



[PATCH 3/3] i.MX: system.c: Replace magic numbers

2016-06-12 Thread Andrey Smirnov
Signed-off-by: Andrey Smirnov 
---

RESEND, now with Russell King on CC list.

 arch/arm/include/asm/hardware/cache-l2x0.h | 7 +++
 arch/arm/mach-imx/system.c | 5 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h 
b/arch/arm/include/asm/hardware/cache-l2x0.h
index 3a5ec1c..5e27162 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -61,6 +61,13 @@
 #define L2X0_LINE_TAG  0xF30
 #define L2X0_DEBUG_CTRL0xF40
 #define L310_PREFETCH_CTRL 0xF60
+#define   L310_DOUBLE_LINEFILL_EN  BIT(30)
+#define   L310_INSTRUCTION_PREFETCH_EN BIT(29)
+#define   L310_DATA_PREFETCH_ENBIT(28)
+#define   L310_DOUBLE_LINEFILL_ON_WRAP_READ_DISBIT(27)
+#define   L310_PREFETCH_DROP_ENBIT(24)
+#define   L310_INCR_DOUBLE_LINEFILL_EN BIT(23)
+#define   L310_ESCLUSIVE_SEQUENCE_EN   BIT(21)
 #define L310_POWER_CTRL0xF80
 #define   L310_DYNAMIC_CLK_GATING_EN   (1 << 1)
 #define   L310_STNDBY_MODE_EN  (1 << 0)
diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c
index b153376..bd9a96b 100644
--- a/arch/arm/mach-imx/system.c
+++ b/arch/arm/mach-imx/system.c
@@ -109,7 +109,10 @@ void __init imx_init_l2cache(void)
if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
/* Configure the L2 PREFETCH and POWER registers */
val = readl_relaxed(l2x0_base + L310_PREFETCH_CTRL);
-   val |= 0x7080;
+   val |=  L310_DOUBLE_LINEFILL_EN |
+   L310_INSTRUCTION_PREFETCH_EN |
+   L310_DATA_PREFETCH_EN |
+   L310_INCR_DOUBLE_LINEFILL_EN;
writel_relaxed(val, l2x0_base + L310_PREFETCH_CTRL);
}
 
-- 
2.5.5



[RESEND PATCH 2/3] i.MX: system.c: Remove redundant errata 752271 code

2016-06-12 Thread Andrey Smirnov
Applying a fix for ARM errata 752271 would already be taken care by a
call to a 'fixup' hook as a part of l2x0_of_init() -> __l2c_init() call
chain. Moreso the code in 'fixup' function would do that based on the
PL310's revsion information, whereas removed code does so based on SoC
version which does not work very well on i.MX6Q+ which identifies itself
as i.MX6Q, but is not affected by 752271.

Signed-off-by: Andrey Smirnov 
---

RESEND, now with Russell King on CC list.

 arch/arm/mach-imx/system.c | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c
index d9f8b0e..b153376 100644
--- a/arch/arm/mach-imx/system.c
+++ b/arch/arm/mach-imx/system.c
@@ -110,17 +110,6 @@ void __init imx_init_l2cache(void)
/* Configure the L2 PREFETCH and POWER registers */
val = readl_relaxed(l2x0_base + L310_PREFETCH_CTRL);
val |= 0x7080;
-   /*
-* The L2 cache controller(PL310) version on the i.MX6D/Q is 
r3p1-50rel0
-* The L2 cache controller(PL310) version on the 
i.MX6DL/SOLO/SL is r3p2
-* But according to ARM PL310 errata: 752271
-* ID: 752271: Double linefill feature can cause data corruption
-* Fault Status: Present in: r3p0, r3p1, r3p1-50rel0. Fixed in 
r3p2
-* Workaround: The only workaround to this erratum is to 
disable the
-* double linefill feature. This is the default behavior.
-*/
-   if (cpu_is_imx6q())
-   val &= ~(1 << 30 | 1 << 23);
writel_relaxed(val, l2x0_base + L310_PREFETCH_CTRL);
}
 
-- 
2.5.5



[PATCH 3/3] i.MX: system.c: Replace magic numbers

2016-06-12 Thread Andrey Smirnov
Signed-off-by: Andrey Smirnov 
---

RESEND, now with Russell King on CC list.

 arch/arm/include/asm/hardware/cache-l2x0.h | 7 +++
 arch/arm/mach-imx/system.c | 5 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h 
b/arch/arm/include/asm/hardware/cache-l2x0.h
index 3a5ec1c..5e27162 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -61,6 +61,13 @@
 #define L2X0_LINE_TAG  0xF30
 #define L2X0_DEBUG_CTRL0xF40
 #define L310_PREFETCH_CTRL 0xF60
+#define   L310_DOUBLE_LINEFILL_EN  BIT(30)
+#define   L310_INSTRUCTION_PREFETCH_EN BIT(29)
+#define   L310_DATA_PREFETCH_ENBIT(28)
+#define   L310_DOUBLE_LINEFILL_ON_WRAP_READ_DISBIT(27)
+#define   L310_PREFETCH_DROP_ENBIT(24)
+#define   L310_INCR_DOUBLE_LINEFILL_EN BIT(23)
+#define   L310_ESCLUSIVE_SEQUENCE_EN   BIT(21)
 #define L310_POWER_CTRL0xF80
 #define   L310_DYNAMIC_CLK_GATING_EN   (1 << 1)
 #define   L310_STNDBY_MODE_EN  (1 << 0)
diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c
index b153376..bd9a96b 100644
--- a/arch/arm/mach-imx/system.c
+++ b/arch/arm/mach-imx/system.c
@@ -109,7 +109,10 @@ void __init imx_init_l2cache(void)
if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
/* Configure the L2 PREFETCH and POWER registers */
val = readl_relaxed(l2x0_base + L310_PREFETCH_CTRL);
-   val |= 0x7080;
+   val |=  L310_DOUBLE_LINEFILL_EN |
+   L310_INSTRUCTION_PREFETCH_EN |
+   L310_DATA_PREFETCH_EN |
+   L310_INCR_DOUBLE_LINEFILL_EN;
writel_relaxed(val, l2x0_base + L310_PREFETCH_CTRL);
}
 
-- 
2.5.5



Re: [PATCH 1/4] mtd: introduce the mtd_pairing_scheme concept

2016-06-12 Thread Brian Norris
Hi,

On Sat, Jun 11, 2016 at 08:54:08AM +0200, Boris Brezillon wrote:
> On Fri, 10 Jun 2016 19:17:15 -0700
> Brian Norris  wrote:
> > On Mon, Apr 25, 2016 at 12:01:18PM +0200, Boris Brezillon wrote:
> > > MLC and TLC NAND devices are using NAND cells exposing more than one bit,
> > > but instead of attaching all the bits in a given cell to a single NAND
> > > page, each bit is usually attached to a different page. This concept is
> > > called 'page pairing', and has significant impacts on the flash storage
> > > usage.
> > > The main problem showed by these devices is that interrupting a page
> > > program operation may not only corrupt the page we are programming
> > > but also the page it is paired with, hence the need to expose to MTD
> > > users the pairing scheme information.
> > > 
> > > The pairing APIs allows one to query pairing information attached to a
> > > given page (here called wunit), or the other way around (the wunit
> > > pointed by pairing information).  
> > 
> > Why the "write unit" terminology? Is a write unit ever different from a
> > page?
> 
> Because there's no concept of pages at the MTD level. The page size is
> actually translated into writesize, so I thought keeping the same
> wording for pairing scheme would be more appropriate. Not sure other
> device types will need this pairing scheme feature though.

Ah, I suppose that makes sense.

> > 
> > > It also provides several helpers to help the conversion between absolute
> > > offsets and wunits, and query the number of pairing groups.
> > > 
> > > Signed-off-by: Boris Brezillon 
> > > ---
> > >  drivers/mtd/mtdcore.c   | 62 
> > > +++
> > >  drivers/mtd/mtdpart.c   |  1 +
> > >  include/linux/mtd/mtd.h | 64 
> > > +
> > >  3 files changed, 127 insertions(+)
> > > 

[...]

> > > diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
> > > index 29a1706..4961092 100644
> > > --- a/include/linux/mtd/mtd.h
> > > +++ b/include/linux/mtd/mtd.h
> > > @@ -127,6 +127,36 @@ struct mtd_ooblayout_ops {
> > >   struct mtd_oob_region *oobfree);
> > >  };
> > >  
> > > +/**
> > > + * struct mtd_pairing_info - Page pairing information
> > > + *
> > > + * @pair: represent the pair index in the paired pages table.For 
> > > example, if  
> > 
> > Needs a space after the period.
> 
> Yep.
> 
> > 
> > > + * page 0 and page 2 are paired together they form the first 
> > > pair.  
> > 
> > This example doesn't help. What would the value of @pair be in this
> > case? "First pair" doesn't translate to an integer unambiguously.
> 
> pair 0
> 
> > 
> > > + * @group: the group represent the bit position in the cell. For example,
> > > + *  page 0 uses bit 0 and is thus part of group 0.  
> > 
> > I can barely understand what your description for these two fields
> > means. I think you need a much more verbose overall description for the
> > struct (some here, and maybe more in mtd_pairing_scheme?), and some
> > better specifics about what values to expect in the fields. For example
> > you might include language like: "this struct describes a single write
> > unit in terms of its page pairing geometry."
> > 
> > Also, the "pair" term (and examples you use) seem to imply 2-cell MLC,
> > whereas I believe you're trying to handle TLC too. I don't know if we
> > should drop the "pair" term, or just explain it better.
> 
> I clearly have some problems with the words I've chosen, but those terms
> were extracted from NAND datasheets (group and pair), and I think
> keeping the same wording help people converting datasheet specs into
> pairing scheme implementation.
> 
> Any suggestions to replace those 2 words?

I'm not sure we should replace the words (esp. if those are used by
multiple vendors). I just think you might need better examples -- for
instance, an example witih TLC. Also, (0, 0) is the trivial case;
perhaps a non-zero case?

I'm also wondering how I use this stuct and accompanying API to answer
questions like "what page(s) are paired with page X"? I understand I can
convert from a page number to a 'pairing_info', but how do I determine
the other pages in my pairing? I guess it's implied that I can modify
the 'group' to any other value in [0, ngroups) then run get_wunit() to
get the inverse? I can understand why you might do this instead of
passing back an array (for instance), but I think it deserves a little
bit of explanation.

> > 
> > You also need to steal more documentation from your commit message and
> > cover and put it somewhere, whether it's the comments or
> > Documentation/mtd/nand/.
> 
> Okay.
> 
> > 
> > > + */
> > > +struct mtd_pairing_info {
> > > + int pair;
> > > + int group;
> > > +};
> > > +
> > > +/**
> > > + * struct mtd_pairing_scheme - Page pairing information
> > > + *
> > > + * @ngroups: number of groups. Should be related to the 

Re: [PATCH 1/4] mtd: introduce the mtd_pairing_scheme concept

2016-06-12 Thread Brian Norris
Hi,

On Sat, Jun 11, 2016 at 08:54:08AM +0200, Boris Brezillon wrote:
> On Fri, 10 Jun 2016 19:17:15 -0700
> Brian Norris  wrote:
> > On Mon, Apr 25, 2016 at 12:01:18PM +0200, Boris Brezillon wrote:
> > > MLC and TLC NAND devices are using NAND cells exposing more than one bit,
> > > but instead of attaching all the bits in a given cell to a single NAND
> > > page, each bit is usually attached to a different page. This concept is
> > > called 'page pairing', and has significant impacts on the flash storage
> > > usage.
> > > The main problem showed by these devices is that interrupting a page
> > > program operation may not only corrupt the page we are programming
> > > but also the page it is paired with, hence the need to expose to MTD
> > > users the pairing scheme information.
> > > 
> > > The pairing APIs allows one to query pairing information attached to a
> > > given page (here called wunit), or the other way around (the wunit
> > > pointed by pairing information).  
> > 
> > Why the "write unit" terminology? Is a write unit ever different from a
> > page?
> 
> Because there's no concept of pages at the MTD level. The page size is
> actually translated into writesize, so I thought keeping the same
> wording for pairing scheme would be more appropriate. Not sure other
> device types will need this pairing scheme feature though.

Ah, I suppose that makes sense.

> > 
> > > It also provides several helpers to help the conversion between absolute
> > > offsets and wunits, and query the number of pairing groups.
> > > 
> > > Signed-off-by: Boris Brezillon 
> > > ---
> > >  drivers/mtd/mtdcore.c   | 62 
> > > +++
> > >  drivers/mtd/mtdpart.c   |  1 +
> > >  include/linux/mtd/mtd.h | 64 
> > > +
> > >  3 files changed, 127 insertions(+)
> > > 

[...]

> > > diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
> > > index 29a1706..4961092 100644
> > > --- a/include/linux/mtd/mtd.h
> > > +++ b/include/linux/mtd/mtd.h
> > > @@ -127,6 +127,36 @@ struct mtd_ooblayout_ops {
> > >   struct mtd_oob_region *oobfree);
> > >  };
> > >  
> > > +/**
> > > + * struct mtd_pairing_info - Page pairing information
> > > + *
> > > + * @pair: represent the pair index in the paired pages table.For 
> > > example, if  
> > 
> > Needs a space after the period.
> 
> Yep.
> 
> > 
> > > + * page 0 and page 2 are paired together they form the first 
> > > pair.  
> > 
> > This example doesn't help. What would the value of @pair be in this
> > case? "First pair" doesn't translate to an integer unambiguously.
> 
> pair 0
> 
> > 
> > > + * @group: the group represent the bit position in the cell. For example,
> > > + *  page 0 uses bit 0 and is thus part of group 0.  
> > 
> > I can barely understand what your description for these two fields
> > means. I think you need a much more verbose overall description for the
> > struct (some here, and maybe more in mtd_pairing_scheme?), and some
> > better specifics about what values to expect in the fields. For example
> > you might include language like: "this struct describes a single write
> > unit in terms of its page pairing geometry."
> > 
> > Also, the "pair" term (and examples you use) seem to imply 2-cell MLC,
> > whereas I believe you're trying to handle TLC too. I don't know if we
> > should drop the "pair" term, or just explain it better.
> 
> I clearly have some problems with the words I've chosen, but those terms
> were extracted from NAND datasheets (group and pair), and I think
> keeping the same wording help people converting datasheet specs into
> pairing scheme implementation.
> 
> Any suggestions to replace those 2 words?

I'm not sure we should replace the words (esp. if those are used by
multiple vendors). I just think you might need better examples -- for
instance, an example witih TLC. Also, (0, 0) is the trivial case;
perhaps a non-zero case?

I'm also wondering how I use this stuct and accompanying API to answer
questions like "what page(s) are paired with page X"? I understand I can
convert from a page number to a 'pairing_info', but how do I determine
the other pages in my pairing? I guess it's implied that I can modify
the 'group' to any other value in [0, ngroups) then run get_wunit() to
get the inverse? I can understand why you might do this instead of
passing back an array (for instance), but I think it deserves a little
bit of explanation.

> > 
> > You also need to steal more documentation from your commit message and
> > cover and put it somewhere, whether it's the comments or
> > Documentation/mtd/nand/.
> 
> Okay.
> 
> > 
> > > + */
> > > +struct mtd_pairing_info {
> > > + int pair;
> > > + int group;
> > > +};
> > > +
> > > +/**
> > > + * struct mtd_pairing_scheme - Page pairing information
> > > + *
> > > + * @ngroups: number of groups. Should be related to the number of bits
> > > + *per cell.
> > > + * 

[PATCH 1/3] i.MX: system.c: Convert goto to if statement

2016-06-12 Thread Andrey Smirnov
Using goto here doesn't bring any advantages and only makes the code
flow less clear. No functional changes.

Signed-off-by: Andrey Smirnov 
---

 RESEND, now with Russell King on CC list.

 arch/arm/mach-imx/system.c | 36 +---
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c
index 105d1ce..d9f8b0e 100644
--- a/arch/arm/mach-imx/system.c
+++ b/arch/arm/mach-imx/system.c
@@ -106,26 +106,24 @@ void __init imx_init_l2cache(void)
goto out;
}
 
-   if (readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)
-   goto skip_if_enabled;
-
-   /* Configure the L2 PREFETCH and POWER registers */
-   val = readl_relaxed(l2x0_base + L310_PREFETCH_CTRL);
-   val |= 0x7080;
-   /*
-* The L2 cache controller(PL310) version on the i.MX6D/Q is r3p1-50rel0
-* The L2 cache controller(PL310) version on the i.MX6DL/SOLO/SL is r3p2
-* But according to ARM PL310 errata: 752271
-* ID: 752271: Double linefill feature can cause data corruption
-* Fault Status: Present in: r3p0, r3p1, r3p1-50rel0. Fixed in r3p2
-* Workaround: The only workaround to this erratum is to disable the
-* double linefill feature. This is the default behavior.
-*/
-   if (cpu_is_imx6q())
-   val &= ~(1 << 30 | 1 << 23);
-   writel_relaxed(val, l2x0_base + L310_PREFETCH_CTRL);
+   if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+   /* Configure the L2 PREFETCH and POWER registers */
+   val = readl_relaxed(l2x0_base + L310_PREFETCH_CTRL);
+   val |= 0x7080;
+   /*
+* The L2 cache controller(PL310) version on the i.MX6D/Q is 
r3p1-50rel0
+* The L2 cache controller(PL310) version on the 
i.MX6DL/SOLO/SL is r3p2
+* But according to ARM PL310 errata: 752271
+* ID: 752271: Double linefill feature can cause data corruption
+* Fault Status: Present in: r3p0, r3p1, r3p1-50rel0. Fixed in 
r3p2
+* Workaround: The only workaround to this erratum is to 
disable the
+* double linefill feature. This is the default behavior.
+*/
+   if (cpu_is_imx6q())
+   val &= ~(1 << 30 | 1 << 23);
+   writel_relaxed(val, l2x0_base + L310_PREFETCH_CTRL);
+   }
 
-skip_if_enabled:
iounmap(l2x0_base);
of_node_put(np);
 
-- 
2.5.5



[PATCH 1/3] i.MX: system.c: Convert goto to if statement

2016-06-12 Thread Andrey Smirnov
Using goto here doesn't bring any advantages and only makes the code
flow less clear. No functional changes.

Signed-off-by: Andrey Smirnov 
---

 RESEND, now with Russell King on CC list.

 arch/arm/mach-imx/system.c | 36 +---
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c
index 105d1ce..d9f8b0e 100644
--- a/arch/arm/mach-imx/system.c
+++ b/arch/arm/mach-imx/system.c
@@ -106,26 +106,24 @@ void __init imx_init_l2cache(void)
goto out;
}
 
-   if (readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)
-   goto skip_if_enabled;
-
-   /* Configure the L2 PREFETCH and POWER registers */
-   val = readl_relaxed(l2x0_base + L310_PREFETCH_CTRL);
-   val |= 0x7080;
-   /*
-* The L2 cache controller(PL310) version on the i.MX6D/Q is r3p1-50rel0
-* The L2 cache controller(PL310) version on the i.MX6DL/SOLO/SL is r3p2
-* But according to ARM PL310 errata: 752271
-* ID: 752271: Double linefill feature can cause data corruption
-* Fault Status: Present in: r3p0, r3p1, r3p1-50rel0. Fixed in r3p2
-* Workaround: The only workaround to this erratum is to disable the
-* double linefill feature. This is the default behavior.
-*/
-   if (cpu_is_imx6q())
-   val &= ~(1 << 30 | 1 << 23);
-   writel_relaxed(val, l2x0_base + L310_PREFETCH_CTRL);
+   if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+   /* Configure the L2 PREFETCH and POWER registers */
+   val = readl_relaxed(l2x0_base + L310_PREFETCH_CTRL);
+   val |= 0x7080;
+   /*
+* The L2 cache controller(PL310) version on the i.MX6D/Q is 
r3p1-50rel0
+* The L2 cache controller(PL310) version on the 
i.MX6DL/SOLO/SL is r3p2
+* But according to ARM PL310 errata: 752271
+* ID: 752271: Double linefill feature can cause data corruption
+* Fault Status: Present in: r3p0, r3p1, r3p1-50rel0. Fixed in 
r3p2
+* Workaround: The only workaround to this erratum is to 
disable the
+* double linefill feature. This is the default behavior.
+*/
+   if (cpu_is_imx6q())
+   val &= ~(1 << 30 | 1 << 23);
+   writel_relaxed(val, l2x0_base + L310_PREFETCH_CTRL);
+   }
 
-skip_if_enabled:
iounmap(l2x0_base);
of_node_put(np);
 
-- 
2.5.5



Re: [PATCH 0/4] mtd: add support for pairing scheme description

2016-06-12 Thread Brian Norris
On Sat, Jun 11, 2016 at 08:45:18AM +0200, Boris Brezillon wrote:
> On Fri, 10 Jun 2016 19:16:25 -0700
> Brian Norris  wrote:
> > On Mon, Apr 25, 2016 at 12:01:17PM +0200, Boris Brezillon wrote:
> > > Hi,
> > > 
> > > This series is the first step towards reliable MLC/TLC NAND support.
> > > Those patches allows the NAND layer to expose page pairing information
> > > to MTD users.  
> > 
> > Have you surveyed many types of NAND to get a representative sampling of
> > what kind of pairing schemes are out there? Do you think you've covered
> > the possibilities well enough in your API? I have a few comments on the
> > patches to this effect. I honestly don't know the answer to these
> > questions, because AFAIR, this is rarely well documented in datasheets.
> 
> I only tested on 3 different NANDs from Micron, Toshiba and Hynix, but

I'm curious, do you have an example part number for Micron? When I
looked briefly last week, I only found either MLC that don't mention it
at all (they fundamentally *have* to have write pairing, don't they?) or
TLC that required too much work for me to get past their login screens.

> I had a look at several datasheets. Unlike read-retry this part is
> usually documented in public datasheets, and on a panel of approximately
> 20 NANDs (mainly from Toshiba, Samsung, Hynix and Micron), all of them
> where using the 'distance 3' or 'distance 6' pairing scheme.
> The only exception I've seen so far is the one pointed by Bean here [1],
> and it can be described using the mtd_pairing_scheme approach.

Yeah, I suppose the API is rather generic. It doesn't really assume
anything about patterns/distances -- just that the pairings are formed
in groups of the same size.

> > > The plan is to teach UBI about those constraints and let UBI code take
> > > the appropriate precautions when dealing with those multi-level cells
> > > NANDs. The way we'll handle this "paired pages" constraint will be
> > > described soon in a series adapting the UBI layer, so stay tune ;).
> > > 
> > > Note that this implementation only allows page pairing scheme description
> > > when the NAND has a full-id entry in the nand_ids table.
> > > This should be addressed in some way for ONFI and JEDEC NANDs, though
> > > I'm not sure how to handle this yet.  
> > 
> > Do ONFI or JEDEC parameter pages even provide this kind of info? The
> > ONFI spec doesn't mention paired pages.
> 
> Nope that's the problem. The only way you can deduce that is to extract
> it from other information, but I think my series reworking the NAND
> initialization will help us [2].

Sure, I suppose.

Brian


Re: [PATCH 0/4] mtd: add support for pairing scheme description

2016-06-12 Thread Brian Norris
On Sat, Jun 11, 2016 at 08:45:18AM +0200, Boris Brezillon wrote:
> On Fri, 10 Jun 2016 19:16:25 -0700
> Brian Norris  wrote:
> > On Mon, Apr 25, 2016 at 12:01:17PM +0200, Boris Brezillon wrote:
> > > Hi,
> > > 
> > > This series is the first step towards reliable MLC/TLC NAND support.
> > > Those patches allows the NAND layer to expose page pairing information
> > > to MTD users.  
> > 
> > Have you surveyed many types of NAND to get a representative sampling of
> > what kind of pairing schemes are out there? Do you think you've covered
> > the possibilities well enough in your API? I have a few comments on the
> > patches to this effect. I honestly don't know the answer to these
> > questions, because AFAIR, this is rarely well documented in datasheets.
> 
> I only tested on 3 different NANDs from Micron, Toshiba and Hynix, but

I'm curious, do you have an example part number for Micron? When I
looked briefly last week, I only found either MLC that don't mention it
at all (they fundamentally *have* to have write pairing, don't they?) or
TLC that required too much work for me to get past their login screens.

> I had a look at several datasheets. Unlike read-retry this part is
> usually documented in public datasheets, and on a panel of approximately
> 20 NANDs (mainly from Toshiba, Samsung, Hynix and Micron), all of them
> where using the 'distance 3' or 'distance 6' pairing scheme.
> The only exception I've seen so far is the one pointed by Bean here [1],
> and it can be described using the mtd_pairing_scheme approach.

Yeah, I suppose the API is rather generic. It doesn't really assume
anything about patterns/distances -- just that the pairings are formed
in groups of the same size.

> > > The plan is to teach UBI about those constraints and let UBI code take
> > > the appropriate precautions when dealing with those multi-level cells
> > > NANDs. The way we'll handle this "paired pages" constraint will be
> > > described soon in a series adapting the UBI layer, so stay tune ;).
> > > 
> > > Note that this implementation only allows page pairing scheme description
> > > when the NAND has a full-id entry in the nand_ids table.
> > > This should be addressed in some way for ONFI and JEDEC NANDs, though
> > > I'm not sure how to handle this yet.  
> > 
> > Do ONFI or JEDEC parameter pages even provide this kind of info? The
> > ONFI spec doesn't mention paired pages.
> 
> Nope that's the problem. The only way you can deduce that is to extract
> it from other information, but I think my series reworking the NAND
> initialization will help us [2].

Sure, I suppose.

Brian


Re: [PATCH V3 1/2] ACPI: Add support for ResourceSource/IRQ domain mapping

2016-06-12 Thread Hanjun Guo
+cc linux-acpi, linux-arm-kernel
(blocked, send again, sorry for the noise)

On 2016/6/12 19:22, Hanjun Guo wrote:
> Hi,
>
> On 2016/6/7 5:54, agust...@codeaurora.org wrote:
>> On 2016-06-04 08:30, Marc Zyngier wrote:
>>> On Fri, 13 May 2016 12:16:42 -0400
>>> Agustin Vega-Frias  wrote:
>
>>>
 + * @rcirq: IRQ number
 + * @trigger: trigger type of the IRQ number to be mapped
 + * @polarity: polarity of the IRQ to be mapped
>>>
>>> So if I'm right in my above understanding, you've reinvented an
>>> existing abstraction (irq_fwspec).
>>>
>>>
>
>>> So at this point, you should be able to create a irq_fwspec, and call
>>> into irq_create_fwspec_mapping(), without the need to open-code stuff
>>> we already have. And as a bonus point, you'd end-up with code that'd be
>>> similar to what is in gsi.c...
>>>
>>
>> Got it.
>>

>
>>>
>>> Again, this smell a lot like gsi.c, with added sugar on top.
>>
>> Yes, this can go away since a client can just call irq_dispose_mapping which 
>> finds the domain from the irq_data.
>
> I reworked my previous patches [1] which trying to support a mbi-gen interrupt
> controller, here is the updated one for discussion to see it's a option or 
> not:
>
> [1]: 
> http://git.linaro.org/people/hanjun.guo/acpi.git/shortlog/refs/heads/7-topic-d02-mbi-gen
> patch: ACPI: resource: pass acpi dev to acpi_register_gsi()
> acpi: gsi: make the interrupt parent be 
> selectable
>
 drivers/acpi/gsi.c  |  8 +++--
 drivers/acpi/resource.c | 85 ++---
 include/acpi/acpi_bus.h |  1 +
 3 files changed, 66 insertions(+), 28 deletions(-)

diff --git a/drivers/acpi/gsi.c b/drivers/acpi/gsi.c
index fa4585a..afcb343 100644
--- a/drivers/acpi/gsi.c
+++ b/drivers/acpi/gsi.c
@@ -74,13 +74,17 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int 
trigger,
  int polarity)
 {
struct irq_fwspec fwspec;
+   struct acpi_device *adev = dev ? to_acpi_device(dev) : NULL;
 
-   if (WARN_ON(!acpi_gsi_domain_id)) {
+   if (acpi_gsi_domain_id)
+   fwspec.fwnode = acpi_gsi_domain_id;
+   else if (adev && >fwnode && adev->interrupt_parent)
+   fwspec.fwnode = adev->interrupt_parent;
+   else {
pr_warn("GSI: No registered irqchip, giving up\n");
return -EINVAL;
}
 
-   fwspec.fwnode = acpi_gsi_domain_id;
fwspec.param[0] = gsi;
fwspec.param[1] = acpi_gsi_get_irq_type(trigger, polarity);
fwspec.param_count = 2;
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 627f8fb..ed9491d 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -355,7 +355,7 @@ static void acpi_dev_irqresource_disabled(struct resource 
*res, u32 gsi)
res->flags = IORESOURCE_IRQ | IORESOURCE_DISABLED | IORESOURCE_UNSET;
 }
 
-static void acpi_dev_get_irqresource(struct resource *res, u32 gsi,
+static void acpi_dev_get_irqresource(struct acpi_device *adev, struct resource 
*res, u32 gsi,
 u8 triggering, u8 polarity, u8 shareable,
 bool legacy)
 {
@@ -389,7 +389,7 @@ static void acpi_dev_get_irqresource(struct resource *res, 
u32 gsi,
}
 
res->flags = acpi_dev_irq_flags(triggering, polarity, shareable);
-   irq = acpi_register_gsi(NULL, gsi, triggering, polarity);
+   irq = acpi_register_gsi(>dev, gsi, triggering, polarity);
if (irq >= 0) {
res->start = irq;
res->end = irq;
@@ -398,27 +398,9 @@ static void acpi_dev_get_irqresource(struct resource *res, 
u32 gsi,
}
 }
 
-/**
- * acpi_dev_resource_interrupt - Extract ACPI interrupt resource information.
- * @ares: Input ACPI resource object.
- * @index: Index into the array of GSIs represented by the resource.
- * @res: Output generic resource object.
- *
- * Check if the given ACPI resource object represents an interrupt resource
- * and @index does not exceed the resource's interrupt count (true is returned
- * in that case regardless of the results of the other checks)).  If that's the
- * case, register the GSI corresponding to @index from the array of interrupts
- * represented by the resource and populate the generic resource object pointed
- * to by @res accordingly.  If the registration of the GSI is not successful,
- * IORESOURCE_DISABLED will be set it that object's flags.
- *
- * Return:
- * 1) false with res->flags setting to zero: not the expected resource type
- * 2) false with IORESOURCE_DISABLED in res->flags: valid unassigned resource
- * 3) true: valid assigned resource
- */
-bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
-struct resource *res)
+static bool __acpi_dev_resource_interrupt(struct acpi_device *adev,
+ struct acpi_resource *ares, int index,
+   

Re: [PATCH V3 1/2] ACPI: Add support for ResourceSource/IRQ domain mapping

2016-06-12 Thread Hanjun Guo
+cc linux-acpi, linux-arm-kernel
(blocked, send again, sorry for the noise)

On 2016/6/12 19:22, Hanjun Guo wrote:
> Hi,
>
> On 2016/6/7 5:54, agust...@codeaurora.org wrote:
>> On 2016-06-04 08:30, Marc Zyngier wrote:
>>> On Fri, 13 May 2016 12:16:42 -0400
>>> Agustin Vega-Frias  wrote:
>
>>>
 + * @rcirq: IRQ number
 + * @trigger: trigger type of the IRQ number to be mapped
 + * @polarity: polarity of the IRQ to be mapped
>>>
>>> So if I'm right in my above understanding, you've reinvented an
>>> existing abstraction (irq_fwspec).
>>>
>>>
>
>>> So at this point, you should be able to create a irq_fwspec, and call
>>> into irq_create_fwspec_mapping(), without the need to open-code stuff
>>> we already have. And as a bonus point, you'd end-up with code that'd be
>>> similar to what is in gsi.c...
>>>
>>
>> Got it.
>>

>
>>>
>>> Again, this smell a lot like gsi.c, with added sugar on top.
>>
>> Yes, this can go away since a client can just call irq_dispose_mapping which 
>> finds the domain from the irq_data.
>
> I reworked my previous patches [1] which trying to support a mbi-gen interrupt
> controller, here is the updated one for discussion to see it's a option or 
> not:
>
> [1]: 
> http://git.linaro.org/people/hanjun.guo/acpi.git/shortlog/refs/heads/7-topic-d02-mbi-gen
> patch: ACPI: resource: pass acpi dev to acpi_register_gsi()
> acpi: gsi: make the interrupt parent be 
> selectable
>
 drivers/acpi/gsi.c  |  8 +++--
 drivers/acpi/resource.c | 85 ++---
 include/acpi/acpi_bus.h |  1 +
 3 files changed, 66 insertions(+), 28 deletions(-)

diff --git a/drivers/acpi/gsi.c b/drivers/acpi/gsi.c
index fa4585a..afcb343 100644
--- a/drivers/acpi/gsi.c
+++ b/drivers/acpi/gsi.c
@@ -74,13 +74,17 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int 
trigger,
  int polarity)
 {
struct irq_fwspec fwspec;
+   struct acpi_device *adev = dev ? to_acpi_device(dev) : NULL;
 
-   if (WARN_ON(!acpi_gsi_domain_id)) {
+   if (acpi_gsi_domain_id)
+   fwspec.fwnode = acpi_gsi_domain_id;
+   else if (adev && >fwnode && adev->interrupt_parent)
+   fwspec.fwnode = adev->interrupt_parent;
+   else {
pr_warn("GSI: No registered irqchip, giving up\n");
return -EINVAL;
}
 
-   fwspec.fwnode = acpi_gsi_domain_id;
fwspec.param[0] = gsi;
fwspec.param[1] = acpi_gsi_get_irq_type(trigger, polarity);
fwspec.param_count = 2;
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 627f8fb..ed9491d 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -355,7 +355,7 @@ static void acpi_dev_irqresource_disabled(struct resource 
*res, u32 gsi)
res->flags = IORESOURCE_IRQ | IORESOURCE_DISABLED | IORESOURCE_UNSET;
 }
 
-static void acpi_dev_get_irqresource(struct resource *res, u32 gsi,
+static void acpi_dev_get_irqresource(struct acpi_device *adev, struct resource 
*res, u32 gsi,
 u8 triggering, u8 polarity, u8 shareable,
 bool legacy)
 {
@@ -389,7 +389,7 @@ static void acpi_dev_get_irqresource(struct resource *res, 
u32 gsi,
}
 
res->flags = acpi_dev_irq_flags(triggering, polarity, shareable);
-   irq = acpi_register_gsi(NULL, gsi, triggering, polarity);
+   irq = acpi_register_gsi(>dev, gsi, triggering, polarity);
if (irq >= 0) {
res->start = irq;
res->end = irq;
@@ -398,27 +398,9 @@ static void acpi_dev_get_irqresource(struct resource *res, 
u32 gsi,
}
 }
 
-/**
- * acpi_dev_resource_interrupt - Extract ACPI interrupt resource information.
- * @ares: Input ACPI resource object.
- * @index: Index into the array of GSIs represented by the resource.
- * @res: Output generic resource object.
- *
- * Check if the given ACPI resource object represents an interrupt resource
- * and @index does not exceed the resource's interrupt count (true is returned
- * in that case regardless of the results of the other checks)).  If that's the
- * case, register the GSI corresponding to @index from the array of interrupts
- * represented by the resource and populate the generic resource object pointed
- * to by @res accordingly.  If the registration of the GSI is not successful,
- * IORESOURCE_DISABLED will be set it that object's flags.
- *
- * Return:
- * 1) false with res->flags setting to zero: not the expected resource type
- * 2) false with IORESOURCE_DISABLED in res->flags: valid unassigned resource
- * 3) true: valid assigned resource
- */
-bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
-struct resource *res)
+static bool __acpi_dev_resource_interrupt(struct acpi_device *adev,
+ struct acpi_resource *ares, int index,
+ struct resource *res)
 

Re: [PATCH 0/2] Fix CPU Online handling for unbounded worker threads

2016-06-12 Thread Gautham R Shenoy
Hi Peter, Thomas,
On Tue, Jun 07, 2016 at 08:44:01PM +0530, Gautham R. Shenoy wrote:
> Hi,
> 
> This patchset fixes a couple of issues in the CPU_ONLINE notification
> handling for the workqueues with respect to unbounded worker
threads.

Any thoughts on these patches ? They fix a race which
was causing WARN_ON() to be consistently reproduced on POWER machines
since 4.6.

Could you please review these patches ?

--
Thanks and Regard
gautham.

> 
> Patch 1 ensures that the affinity of a unbound worker thread
> associated with a node whose very first CPU has come online is set
> correctly. In the existing code path we will never call
> set_cpus_allowed_ptr() for unbound worker threads that have been
> created on a CPU Online operation after boot.
> 
> Patch 2 fixes the following WARN_ON() reported by Abdul when
> set_cpus_allowed_ptr() for an unbound worker thread is invoked when
> only one of the CPUs in its cpumask is online but not yet active.
> 
>  [ cut here ]
>  WARNING: CPU: 40 PID: 248 at kernel/sched/core.c:1166 
> __set_cpus_allowed_ptr+0x21c/0x290
>  Modules linked in:
> CPU: 40 PID: 248 Comm: cpuhp/40 Not tainted 4.6.0-autotest #1
> task: c00f27284200 ti: c00f273fc000 task.ti: c00f273fc000
> NIP: c010488c LR: c0104874 CTR: 
> REGS: c00f273ff7d0 TRAP: 0700   Not tainted  (4.6.0-autotest)
> MSR: 900100029033   CR: 28002804  XER: 
> 2000
> CFAR: c05b0888 SOFTE: 0
> GPR00: c010478c c00f273ffa50 c13ce400 
> GPR04: c140ed98 0800 c007f64d9408 
> GPR08:  0028 c140ee90 0020
> GPR12: 2200 cfb96800 c00f44a8 c007fa158480
> GPR16: c007fc621a70 c00f2721f800  0001
> GPR20: c1571ef0  c134879f c12bc510
> GPR24: 0100  c140ea98 c007f64d9408
> GPR28: c007fbc21c00 ffea  c00f2728
> NIP [c010488c] __set_cpus_allowed_ptr+0x21c/0x290
> LR [c0104874] __set_cpus_allowed_ptr+0x204/0x290
> Call Trace:
> [c00f273ffa50] [c010478c] __set_cpus_allowed_ptr+0x11c/0x290 
> (unreliable)
> [c00f273ffac0] [c00ed4b0] workqueue_cpu_up_callback+0x2c0/0x470
> [c00f273ffb70] [c00f5c58] notifier_call_chain+0x98/0x100
> [c00f273ffbc0] [c00c5ed0] __cpu_notify+0x70/0xe0
> [c00f273ffc00] [c00c6028] notify_online+0x38/0x50
> [c00f273ffc30] [c00c5214] cpuhp_invoke_callback+0x84/0x250
> [c00f273ffc90] [c00c562c] cpuhp_up_callbacks+0x5c/0x120
> [c00f273ffce0] [c00c64d4] cpuhp_thread_fun+0x184/0x1c0
> [c00f273ffd20] [c00fa050] smpboot_thread_fn+0x290/0x2a0
> [c00f273ffd80] [c00f45b0] kthread+0x110/0x130
> [c00f273ffe30] [c0009570] ret_from_kernel_thread+0x5c/0x6c
> Instruction dump:
> 419eff3c 3d420004 38a00800 388a0998 7f63db78 484abfa1 6000 2fa3
> 409eff1c 813f0378 2f890001 419eff10 <0fe0> 4b08 6000 6000
>  ---[ end trace cbc1c5cfbc9591d0 ]---
> 
> The patches are based on 4.7-rc2. I have tested the patches on a
> multi-node x86_64 and a ppc64
> 
> Gautham R. Shenoy (2):
>   workqueue: Move wq_update_unbound_numa() to the beginning of
> CPU_ONLINE
>   workqueue:Fix affinity of an unbound worker of a node with 1 online
> CPU
> 
>  kernel/workqueue.c | 27 +++
>  1 file changed, 19 insertions(+), 8 deletions(-)
> 
> -- 
> 1.9.3
> 



Re: [PATCH 0/2] Fix CPU Online handling for unbounded worker threads

2016-06-12 Thread Gautham R Shenoy
Hi Peter, Thomas,
On Tue, Jun 07, 2016 at 08:44:01PM +0530, Gautham R. Shenoy wrote:
> Hi,
> 
> This patchset fixes a couple of issues in the CPU_ONLINE notification
> handling for the workqueues with respect to unbounded worker
threads.

Any thoughts on these patches ? They fix a race which
was causing WARN_ON() to be consistently reproduced on POWER machines
since 4.6.

Could you please review these patches ?

--
Thanks and Regard
gautham.

> 
> Patch 1 ensures that the affinity of a unbound worker thread
> associated with a node whose very first CPU has come online is set
> correctly. In the existing code path we will never call
> set_cpus_allowed_ptr() for unbound worker threads that have been
> created on a CPU Online operation after boot.
> 
> Patch 2 fixes the following WARN_ON() reported by Abdul when
> set_cpus_allowed_ptr() for an unbound worker thread is invoked when
> only one of the CPUs in its cpumask is online but not yet active.
> 
>  [ cut here ]
>  WARNING: CPU: 40 PID: 248 at kernel/sched/core.c:1166 
> __set_cpus_allowed_ptr+0x21c/0x290
>  Modules linked in:
> CPU: 40 PID: 248 Comm: cpuhp/40 Not tainted 4.6.0-autotest #1
> task: c00f27284200 ti: c00f273fc000 task.ti: c00f273fc000
> NIP: c010488c LR: c0104874 CTR: 
> REGS: c00f273ff7d0 TRAP: 0700   Not tainted  (4.6.0-autotest)
> MSR: 900100029033   CR: 28002804  XER: 
> 2000
> CFAR: c05b0888 SOFTE: 0
> GPR00: c010478c c00f273ffa50 c13ce400 
> GPR04: c140ed98 0800 c007f64d9408 
> GPR08:  0028 c140ee90 0020
> GPR12: 2200 cfb96800 c00f44a8 c007fa158480
> GPR16: c007fc621a70 c00f2721f800  0001
> GPR20: c1571ef0  c134879f c12bc510
> GPR24: 0100  c140ea98 c007f64d9408
> GPR28: c007fbc21c00 ffea  c00f2728
> NIP [c010488c] __set_cpus_allowed_ptr+0x21c/0x290
> LR [c0104874] __set_cpus_allowed_ptr+0x204/0x290
> Call Trace:
> [c00f273ffa50] [c010478c] __set_cpus_allowed_ptr+0x11c/0x290 
> (unreliable)
> [c00f273ffac0] [c00ed4b0] workqueue_cpu_up_callback+0x2c0/0x470
> [c00f273ffb70] [c00f5c58] notifier_call_chain+0x98/0x100
> [c00f273ffbc0] [c00c5ed0] __cpu_notify+0x70/0xe0
> [c00f273ffc00] [c00c6028] notify_online+0x38/0x50
> [c00f273ffc30] [c00c5214] cpuhp_invoke_callback+0x84/0x250
> [c00f273ffc90] [c00c562c] cpuhp_up_callbacks+0x5c/0x120
> [c00f273ffce0] [c00c64d4] cpuhp_thread_fun+0x184/0x1c0
> [c00f273ffd20] [c00fa050] smpboot_thread_fn+0x290/0x2a0
> [c00f273ffd80] [c00f45b0] kthread+0x110/0x130
> [c00f273ffe30] [c0009570] ret_from_kernel_thread+0x5c/0x6c
> Instruction dump:
> 419eff3c 3d420004 38a00800 388a0998 7f63db78 484abfa1 6000 2fa3
> 409eff1c 813f0378 2f890001 419eff10 <0fe0> 4b08 6000 6000
>  ---[ end trace cbc1c5cfbc9591d0 ]---
> 
> The patches are based on 4.7-rc2. I have tested the patches on a
> multi-node x86_64 and a ppc64
> 
> Gautham R. Shenoy (2):
>   workqueue: Move wq_update_unbound_numa() to the beginning of
> CPU_ONLINE
>   workqueue:Fix affinity of an unbound worker of a node with 1 online
> CPU
> 
>  kernel/workqueue.c | 27 +++
>  1 file changed, 19 insertions(+), 8 deletions(-)
> 
> -- 
> 1.9.3
> 



Re: [PATCH v10 2/2] dmaengine: Add Xilinx zynqmp dma engine driver support

2016-06-12 Thread Vinod Koul
On Wed, Jun 08, 2016 at 07:40:52AM +, Appana Durga Kedareswara Rao wrote:
> > > +static void zynqmp_dma_desc_config_eod(struct zynqmp_dma_chan *chan,
> > > +void *desc)
> > 
> > eod? 80 line?

What's eod?

> > > +int zynqmp_dma_channel_set_config(struct dma_chan *dchan,
> > > +   struct zynqmp_dma_config *cfg)
> > > +{
> > > + struct zynqmp_dma_chan *chan = to_chan(dchan);
> > > +
> > > + chan->config.ovrfetch = cfg->ovrfetch;
> > > + chan->config.has_sg = cfg->has_sg;
> > 
> > is this HW capability? if so why would anyone not like to use it!
> 
> Yes it is HW capability. It can be either in simple mode or SG mode
> Earlier In the driver this configuration is read from the device-tree 
> But as per lars and your suggestion moved it as runtime config parameters.

If sg mode is available why would anyone _not_ want it?

I do not think there is point to have this

> 
> > 
> > > + chan->config.ratectrl = cfg->ratectrl;
> > > + chan->config.src_issue = cfg->src_issue;
> > > + chan->config.src_burst_len = cfg->src_burst_len;
> > > + chan->config.dst_burst_len = cfg->dst_burst_len;
> > 
> > can you describe these parameters?
> ratectl:
> Rate control can be independently enabled per channel. When rate control is 
> enabled, the
> DMA channel uses the rate control count to schedule successive data read 
> transactions.

And how is this used by client?

> src_issue:
> Tells outstanding transaction on SRC.

This should be read only then, right?

> Burst_len: 
> Configures the burst length of the src and dst transfers...

Hmmm, but you are on memcpy, so that should be programmed for throughput?

> > 
> > How would a client know how to configure them?
> 
> With the default values of the config parameters driver will work.

But how will client know what is default!

> If user has specific requirement to change these parameters they can pass
> It to the driver using set_config API and all these parameters are
> Documented in the include/linux/dma/xilinx_dma.h file...

Can you give me an example where user would like to do that

-- 
~Vinod


Re: [PATCH v10 2/2] dmaengine: Add Xilinx zynqmp dma engine driver support

2016-06-12 Thread Vinod Koul
On Wed, Jun 08, 2016 at 07:40:52AM +, Appana Durga Kedareswara Rao wrote:
> > > +static void zynqmp_dma_desc_config_eod(struct zynqmp_dma_chan *chan,
> > > +void *desc)
> > 
> > eod? 80 line?

What's eod?

> > > +int zynqmp_dma_channel_set_config(struct dma_chan *dchan,
> > > +   struct zynqmp_dma_config *cfg)
> > > +{
> > > + struct zynqmp_dma_chan *chan = to_chan(dchan);
> > > +
> > > + chan->config.ovrfetch = cfg->ovrfetch;
> > > + chan->config.has_sg = cfg->has_sg;
> > 
> > is this HW capability? if so why would anyone not like to use it!
> 
> Yes it is HW capability. It can be either in simple mode or SG mode
> Earlier In the driver this configuration is read from the device-tree 
> But as per lars and your suggestion moved it as runtime config parameters.

If sg mode is available why would anyone _not_ want it?

I do not think there is point to have this

> 
> > 
> > > + chan->config.ratectrl = cfg->ratectrl;
> > > + chan->config.src_issue = cfg->src_issue;
> > > + chan->config.src_burst_len = cfg->src_burst_len;
> > > + chan->config.dst_burst_len = cfg->dst_burst_len;
> > 
> > can you describe these parameters?
> ratectl:
> Rate control can be independently enabled per channel. When rate control is 
> enabled, the
> DMA channel uses the rate control count to schedule successive data read 
> transactions.

And how is this used by client?

> src_issue:
> Tells outstanding transaction on SRC.

This should be read only then, right?

> Burst_len: 
> Configures the burst length of the src and dst transfers...

Hmmm, but you are on memcpy, so that should be programmed for throughput?

> > 
> > How would a client know how to configure them?
> 
> With the default values of the config parameters driver will work.

But how will client know what is default!

> If user has specific requirement to change these parameters they can pass
> It to the driver using set_config API and all these parameters are
> Documented in the include/linux/dma/xilinx_dma.h file...

Can you give me an example where user would like to do that

-- 
~Vinod


Re: [PATCH 0/6] eBPF JIT for PPC64

2016-06-12 Thread Naveen N. Rao
On 2016/06/10 10:47PM, David Miller wrote:
> From: "Naveen N. Rao" 
> Date: Tue,  7 Jun 2016 19:02:17 +0530
> 
> > Please note that patch [2] is a pre-requisite for this patchset, and is
> > not yet upstream.
>  ...
> > [1] http://thread.gmane.org/gmane.linux.kernel/2188694
> > [2] http://thread.gmane.org/gmane.linux.ports.ppc.embedded/96514
> 
> Because of #2 I don't think I can take this directly into the networking
> tree, right?
> 
> Therefore, how would you like this to be merged?

Hi David,
Thanks for asking. Yes, I think it is better to take this through the 
powerpc tree as all the changes are contained within arch/powerpc, 
unless Michael Ellerman feels differently.

Michael?


Regards,
Naveen



Re: [PATCH 0/6] eBPF JIT for PPC64

2016-06-12 Thread Naveen N. Rao
On 2016/06/10 10:47PM, David Miller wrote:
> From: "Naveen N. Rao" 
> Date: Tue,  7 Jun 2016 19:02:17 +0530
> 
> > Please note that patch [2] is a pre-requisite for this patchset, and is
> > not yet upstream.
>  ...
> > [1] http://thread.gmane.org/gmane.linux.kernel/2188694
> > [2] http://thread.gmane.org/gmane.linux.ports.ppc.embedded/96514
> 
> Because of #2 I don't think I can take this directly into the networking
> tree, right?
> 
> Therefore, how would you like this to be merged?

Hi David,
Thanks for asking. Yes, I think it is better to take this through the 
powerpc tree as all the changes are contained within arch/powerpc, 
unless Michael Ellerman feels differently.

Michael?


Regards,
Naveen



RE: [PATCH] dmaengine: vdma: Fix compilation warning in cyclic dma mode

2016-06-12 Thread Appana Durga Kedareswara Rao
Hi Vinod,

> On Thu, Jun 09, 2016 at 11:32:12AM +0530, Kedareswara rao Appana wrote:
> > This patch fixes the below compilation warining.
> > drivers/dma/xilinx/xilinx_vdma.c: In function 'xilinx_dma_prep_dma_cyclic':
> > drivers/dma/xilinx/xilinx_vdma.c:1808:23: warning: 'segment' may be
> > used uninitialized in this function [-Wmaybe-uninitialized]
> >segment->hw.control |= XILINX_DMA_BD_SOP;
> >
> > The start of packet (SOP) should be set to the first segment in the
> > desc chain not for the last segment of the desc chain.
> 
> I have applied this after adding Reported by from SFR.

Sorry will fix it next time on wards

> 
> You should always give credit to folks who report issues.

Sure will fix next time onwards...

Thanks,
Kedar.



RE: [PATCH] dmaengine: vdma: Fix compilation warning in cyclic dma mode

2016-06-12 Thread Appana Durga Kedareswara Rao
Hi Vinod,

> On Thu, Jun 09, 2016 at 11:32:12AM +0530, Kedareswara rao Appana wrote:
> > This patch fixes the below compilation warining.
> > drivers/dma/xilinx/xilinx_vdma.c: In function 'xilinx_dma_prep_dma_cyclic':
> > drivers/dma/xilinx/xilinx_vdma.c:1808:23: warning: 'segment' may be
> > used uninitialized in this function [-Wmaybe-uninitialized]
> >segment->hw.control |= XILINX_DMA_BD_SOP;
> >
> > The start of packet (SOP) should be set to the first segment in the
> > desc chain not for the last segment of the desc chain.
> 
> I have applied this after adding Reported by from SFR.

Sorry will fix it next time on wards

> 
> You should always give credit to folks who report issues.

Sure will fix next time onwards...

Thanks,
Kedar.



Re: [PATCH] dmaengine: vdma: Fix compilation warning in cyclic dma mode

2016-06-12 Thread Vinod Koul
On Thu, Jun 09, 2016 at 11:32:12AM +0530, Kedareswara rao Appana wrote:
> This patch fixes the below compilation warining.
> drivers/dma/xilinx/xilinx_vdma.c: In function 'xilinx_dma_prep_dma_cyclic':
> drivers/dma/xilinx/xilinx_vdma.c:1808:23: warning: 'segment' may be used
> uninitialized in this function [-Wmaybe-uninitialized]
>segment->hw.control |= XILINX_DMA_BD_SOP;
> 
> The start of packet (SOP) should be set to the first segment in the desc
> chain not for the last segment of the desc chain.

I have applied this after adding Reported by from SFR.

You should always give credit to folks who report issues.

Thanks
-- 
~Vinod


Re: [PATCH] dmaengine: vdma: Fix compilation warning in cyclic dma mode

2016-06-12 Thread Vinod Koul
On Thu, Jun 09, 2016 at 11:32:12AM +0530, Kedareswara rao Appana wrote:
> This patch fixes the below compilation warining.
> drivers/dma/xilinx/xilinx_vdma.c: In function 'xilinx_dma_prep_dma_cyclic':
> drivers/dma/xilinx/xilinx_vdma.c:1808:23: warning: 'segment' may be used
> uninitialized in this function [-Wmaybe-uninitialized]
>segment->hw.control |= XILINX_DMA_BD_SOP;
> 
> The start of packet (SOP) should be set to the first segment in the desc
> chain not for the last segment of the desc chain.

I have applied this after adding Reported by from SFR.

You should always give credit to folks who report issues.

Thanks
-- 
~Vinod


Re: [PATCH ipvs-next] ipvs: count pre-established TCP states as active

2016-06-12 Thread Simon Horman
On Sun, Jun 12, 2016 at 06:27:39PM +0300, Julian Anastasov wrote:
> 
>   Hello,
> 
> On Fri, 3 Jun 2016, Michal Kubecek wrote:
> 
> > Some users observed that "least connection" distribution algorithm doesn't
> > handle well bursts of TCP connections from reconnecting clients after
> > a node or network failure.
> > 
> > This is because the algorithm counts active connection as worth 256
> > inactive ones where for TCP, "active" only means TCP connections in
> > ESTABLISHED state. In case of a connection burst, new connections are
> > handled before previous ones have finished the three way handshaking so
> > that all are still counted as "inactive", i.e. cheap ones. The become
> > "active" quickly but at that time, all of them are already assigned to one
> > real server (or few), resulting in highly unbalanced distribution.
> > 
> > Address this by counting the "pre-established" states as "active".
> > 
> > Signed-off-by: Michal Kubecek 
> 
> Acked-by: Julian Anastasov 
> 
>   Simon, please apply!

Thanks, done.


Re: [PATCH ipvs-next] ipvs: count pre-established TCP states as active

2016-06-12 Thread Simon Horman
On Sun, Jun 12, 2016 at 06:27:39PM +0300, Julian Anastasov wrote:
> 
>   Hello,
> 
> On Fri, 3 Jun 2016, Michal Kubecek wrote:
> 
> > Some users observed that "least connection" distribution algorithm doesn't
> > handle well bursts of TCP connections from reconnecting clients after
> > a node or network failure.
> > 
> > This is because the algorithm counts active connection as worth 256
> > inactive ones where for TCP, "active" only means TCP connections in
> > ESTABLISHED state. In case of a connection burst, new connections are
> > handled before previous ones have finished the three way handshaking so
> > that all are still counted as "inactive", i.e. cheap ones. The become
> > "active" quickly but at that time, all of them are already assigned to one
> > real server (or few), resulting in highly unbalanced distribution.
> > 
> > Address this by counting the "pre-established" states as "active".
> > 
> > Signed-off-by: Michal Kubecek 
> 
> Acked-by: Julian Anastasov 
> 
>   Simon, please apply!

Thanks, done.


Re: PCIe EndPoint DMA driver with DMA Framework

2016-06-12 Thread Vinod Koul
On Fri, Jun 10, 2016 at 03:39:15PM +, Bharat Kumar Gogada wrote:
> Hi,
> 

PLEASE wrap your replied to 80 chars..  I have reflown below..

> We are planning to write a PCIe EndPoint DMA driver with DMA Framework
> targeting x86 machine.  (
> "https://www.kernel.org/doc/Documentation/dmaengine/provider.txt;) Our DMA
> controller is part of PCIe End Point.  We are targeting to measure PCIe
> performance with this Framework driver.
> 
> But when I see DMA Framework drivers is kernel source "drivers/dma" most
> of the drivers are platform drivers.

wrong, there are bunch of PCI X86 driver. Look closely dw, ioat etc
> 
> So DMA Framework is mainly targeted for platform drivers?

First it is dmaengine framework.

And your assumption is wrong, btw did you see anything is dmaengine APIs
to make the assumption that dmaengine frameowrk is suited for platform
drivers. The frameworks do not care which type of device you have.

> 
> With current design model we need to have one DMA controller driver and
> PCIe EP client driver?

That depends on what you are trying to do but yes the dmaengine driver
will provide dma services and a client needs to use it

> 
> In which part of kernel source PCIe EP client driver will go?

whatever that client is trying to do DMA for. If its network then it
should go in network.

What exactly are you trying to do?

> 
> Can we use DMA Framework on x86 ?

And asking same question multiple times does not change the answer, which is
yes.

> 
> Thanks & Regards, Bharat
> 
> 
> 
> This email and any attachments are intended for the sole use of the named
> recipient(s) and contain(s) confidential information that may be
> proprietary, privileged or copyrighted under applicable law. If you are
> not the intended recipient, do not read, copy, or forward this email
> message or any attachments. Delete this email message and any attachments
> immediately.

What confidential information you have here?

-- 
~Vinod


Re: PCIe EndPoint DMA driver with DMA Framework

2016-06-12 Thread Vinod Koul
On Fri, Jun 10, 2016 at 03:39:15PM +, Bharat Kumar Gogada wrote:
> Hi,
> 

PLEASE wrap your replied to 80 chars..  I have reflown below..

> We are planning to write a PCIe EndPoint DMA driver with DMA Framework
> targeting x86 machine.  (
> "https://www.kernel.org/doc/Documentation/dmaengine/provider.txt;) Our DMA
> controller is part of PCIe End Point.  We are targeting to measure PCIe
> performance with this Framework driver.
> 
> But when I see DMA Framework drivers is kernel source "drivers/dma" most
> of the drivers are platform drivers.

wrong, there are bunch of PCI X86 driver. Look closely dw, ioat etc
> 
> So DMA Framework is mainly targeted for platform drivers?

First it is dmaengine framework.

And your assumption is wrong, btw did you see anything is dmaengine APIs
to make the assumption that dmaengine frameowrk is suited for platform
drivers. The frameworks do not care which type of device you have.

> 
> With current design model we need to have one DMA controller driver and
> PCIe EP client driver?

That depends on what you are trying to do but yes the dmaengine driver
will provide dma services and a client needs to use it

> 
> In which part of kernel source PCIe EP client driver will go?

whatever that client is trying to do DMA for. If its network then it
should go in network.

What exactly are you trying to do?

> 
> Can we use DMA Framework on x86 ?

And asking same question multiple times does not change the answer, which is
yes.

> 
> Thanks & Regards, Bharat
> 
> 
> 
> This email and any attachments are intended for the sole use of the named
> recipient(s) and contain(s) confidential information that may be
> proprietary, privileged or copyrighted under applicable law. If you are
> not the intended recipient, do not read, copy, or forward this email
> message or any attachments. Delete this email message and any attachments
> immediately.

What confidential information you have here?

-- 
~Vinod


RE: [PATCH v5 4/5] arm64: add support for ACPI Low Power Idle(LPI)

2016-06-12 Thread Sajjan, Vikas C
Hi Sudeep,

-Original Message-
From: Sudeep Holla [mailto:sudeep.ho...@arm.com] 
Sent: Wednesday, May 11, 2016 9:08 PM
To: linux-a...@vger.kernel.org; Rafael J. Wysocki 
Cc: Sudeep Holla ; linux-kernel@vger.kernel.org; Sajjan, 
Vikas C ; Lakshminarasimha, Sunil Vishwanathpur 
; Prashanth Prakash ; Ashwin 
Chaugule ; Al Stone ; Lorenzo 
Pieralisi ; Mark Rutland ; 
linux-arm-ker...@lists.infradead.org
Subject: [PATCH v5 4/5] arm64: add support for ACPI Low Power Idle(LPI)

This patch adds appropriate callbacks to support ACPI Low Power Idle
(LPI) on ARM64.

Cc: Lorenzo Pieralisi 
Cc: Mark Rutland 
Cc: linux-arm-ker...@lists.infradead.org
Signed-off-by: Sudeep Holla 
---
 arch/arm64/kernel/acpi.c | 48 +
 drivers/firmware/psci.c  | 56 
 2 files changed, 104 insertions(+)

diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 
d1ce8e2f98b9..bf82ce5c8fce 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -25,6 +26,9 @@
 #include 
 #include 
 
+#include 
+
+#include 
 #include 
 #include 
 #include 
@@ -211,6 +215,50 @@ void __init acpi_boot_table_init(void)
}
 }
 
+int acpi_processor_ffh_lpi_probe(unsigned int cpu) {
+   return arm_cpuidle_init(cpu);
+}
+
+#define ACPI_FFH_LPI_ARM_FLAGS_CORE_CONTEXTBIT(0)
+#define ACPI_FFH_LPI_ARM_FLAGS_TRACE_CONTEXT   BIT(1)
+#define ACPI_FFH_LPI_ARM_FLAGS_GICR_CONTEXTBIT(2)
+#define ACPI_FFH_LPI_ARM_FLAGS_GICD_CONTEXTBIT(3)
+#define ACPI_FFH_LPI_ARM_FLAGS_ALL_CONTEXT \
+   (ACPI_FFH_LPI_ARM_FLAGS_CORE_CONTEXT |  \
+ACPI_FFH_LPI_ARM_FLAGS_TRACE_CONTEXT | \
+ACPI_FFH_LPI_ARM_FLAGS_GICR_CONTEXT |  \
+ACPI_FFH_LPI_ARM_FLAGS_GICD_CONTEXT)
+
+struct acpi_lpi_state *lpi;
+int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi, int idx) {
+   int ret = 0;
+   bool save_ctx = lpi->arch_flags & ACPI_FFH_LPI_ARM_FLAGS_ALL_CONTEXT;
+
+   if (!idx) {
+   cpu_do_idle();
+   return idx;
+   }
+
+   /* TODO cpu_pm_{enter,exit} can be done in generic code ? */
+   if (save_ctx)
+   ret = cpu_pm_enter();
+   if (!ret) {
+   /*
+* Pass idle state index to cpu_suspend which in turn will
+* call the CPU ops suspend protocol with idle index as a
+* parameter.
+*/
+   ret = arm_cpuidle_suspend(idx);
+
+   if (save_ctx)
+   cpu_pm_exit();
+   }
+
+   return ret ? -1 : idx;
+}
+
 #ifdef CONFIG_ACPI_APEI
 pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)  { diff --git 
a/drivers/firmware/psci.c b/drivers/firmware/psci.c index 
fa4ea22ca12e..e06bfee68e1d 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -13,6 +13,7 @@
 
 #define pr_fmt(fmt) "psci: " fmt
 
+#include 
 #include 
 #include 
 #include 
@@ -310,11 +311,66 @@ static int psci_dt_cpu_init_idle(struct device_node 
*cpu_node, int cpu)
return ret;
 }
 
+#ifdef CONFIG_ACPI
+#include 
+
+static int __maybe_unused psci_acpi_cpu_init_idle(unsigned int cpu) {
+   int i, count;
+   u32 *psci_states;
+   struct acpi_processor *pr;
+   struct acpi_lpi_state *lpi;
+
+   pr = per_cpu(processors, cpu);
+   if (unlikely(!pr || !pr->flags.has_lpi))

Any particular reason for _not_ considering CST flag here.
Or you are planning to add CST support in some other patch set.

+   return -EINVAL;
+
+   /*
+* If the PSCI cpu_suspend function hook has not been initialized
+* idle states must not be enabled, so bail out
+*/
+   if (!psci_ops.cpu_suspend)
+   return -EOPNOTSUPP;
+
+   count = pr->power.count - 1;
+   if (!count)
+   return -ENODEV;
+
+   psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
+   if (!psci_states)
+   return -ENOMEM;
+
+   for (i = 0; i < count; i++) {
+   u32 state;
+
+   lpi = >power.lpi_states[i + 1];

  Same case here too.

+   state = lpi->address & 0x;
+   if (!psci_power_state_is_valid(state)) {
+   pr_warn("Invalid PSCI power state %#x\n", state);
+   kfree(psci_states);
+   return -EINVAL;
+   }
+   psci_states[i] = state;
+   }
+   /* Idle states parsed correctly, initialize per-cpu pointer */
+   per_cpu(psci_power_state, cpu) = psci_states;
+   return 0;
+}

RE: [PATCH v5 4/5] arm64: add support for ACPI Low Power Idle(LPI)

2016-06-12 Thread Sajjan, Vikas C
Hi Sudeep,

-Original Message-
From: Sudeep Holla [mailto:sudeep.ho...@arm.com] 
Sent: Wednesday, May 11, 2016 9:08 PM
To: linux-a...@vger.kernel.org; Rafael J. Wysocki 
Cc: Sudeep Holla ; linux-kernel@vger.kernel.org; Sajjan, 
Vikas C ; Lakshminarasimha, Sunil Vishwanathpur 
; Prashanth Prakash ; Ashwin 
Chaugule ; Al Stone ; Lorenzo 
Pieralisi ; Mark Rutland ; 
linux-arm-ker...@lists.infradead.org
Subject: [PATCH v5 4/5] arm64: add support for ACPI Low Power Idle(LPI)

This patch adds appropriate callbacks to support ACPI Low Power Idle
(LPI) on ARM64.

Cc: Lorenzo Pieralisi 
Cc: Mark Rutland 
Cc: linux-arm-ker...@lists.infradead.org
Signed-off-by: Sudeep Holla 
---
 arch/arm64/kernel/acpi.c | 48 +
 drivers/firmware/psci.c  | 56 
 2 files changed, 104 insertions(+)

diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 
d1ce8e2f98b9..bf82ce5c8fce 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -25,6 +26,9 @@
 #include 
 #include 
 
+#include 
+
+#include 
 #include 
 #include 
 #include 
@@ -211,6 +215,50 @@ void __init acpi_boot_table_init(void)
}
 }
 
+int acpi_processor_ffh_lpi_probe(unsigned int cpu) {
+   return arm_cpuidle_init(cpu);
+}
+
+#define ACPI_FFH_LPI_ARM_FLAGS_CORE_CONTEXTBIT(0)
+#define ACPI_FFH_LPI_ARM_FLAGS_TRACE_CONTEXT   BIT(1)
+#define ACPI_FFH_LPI_ARM_FLAGS_GICR_CONTEXTBIT(2)
+#define ACPI_FFH_LPI_ARM_FLAGS_GICD_CONTEXTBIT(3)
+#define ACPI_FFH_LPI_ARM_FLAGS_ALL_CONTEXT \
+   (ACPI_FFH_LPI_ARM_FLAGS_CORE_CONTEXT |  \
+ACPI_FFH_LPI_ARM_FLAGS_TRACE_CONTEXT | \
+ACPI_FFH_LPI_ARM_FLAGS_GICR_CONTEXT |  \
+ACPI_FFH_LPI_ARM_FLAGS_GICD_CONTEXT)
+
+struct acpi_lpi_state *lpi;
+int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi, int idx) {
+   int ret = 0;
+   bool save_ctx = lpi->arch_flags & ACPI_FFH_LPI_ARM_FLAGS_ALL_CONTEXT;
+
+   if (!idx) {
+   cpu_do_idle();
+   return idx;
+   }
+
+   /* TODO cpu_pm_{enter,exit} can be done in generic code ? */
+   if (save_ctx)
+   ret = cpu_pm_enter();
+   if (!ret) {
+   /*
+* Pass idle state index to cpu_suspend which in turn will
+* call the CPU ops suspend protocol with idle index as a
+* parameter.
+*/
+   ret = arm_cpuidle_suspend(idx);
+
+   if (save_ctx)
+   cpu_pm_exit();
+   }
+
+   return ret ? -1 : idx;
+}
+
 #ifdef CONFIG_ACPI_APEI
 pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)  { diff --git 
a/drivers/firmware/psci.c b/drivers/firmware/psci.c index 
fa4ea22ca12e..e06bfee68e1d 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -13,6 +13,7 @@
 
 #define pr_fmt(fmt) "psci: " fmt
 
+#include 
 #include 
 #include 
 #include 
@@ -310,11 +311,66 @@ static int psci_dt_cpu_init_idle(struct device_node 
*cpu_node, int cpu)
return ret;
 }
 
+#ifdef CONFIG_ACPI
+#include 
+
+static int __maybe_unused psci_acpi_cpu_init_idle(unsigned int cpu) {
+   int i, count;
+   u32 *psci_states;
+   struct acpi_processor *pr;
+   struct acpi_lpi_state *lpi;
+
+   pr = per_cpu(processors, cpu);
+   if (unlikely(!pr || !pr->flags.has_lpi))

Any particular reason for _not_ considering CST flag here.
Or you are planning to add CST support in some other patch set.

+   return -EINVAL;
+
+   /*
+* If the PSCI cpu_suspend function hook has not been initialized
+* idle states must not be enabled, so bail out
+*/
+   if (!psci_ops.cpu_suspend)
+   return -EOPNOTSUPP;
+
+   count = pr->power.count - 1;
+   if (!count)
+   return -ENODEV;
+
+   psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
+   if (!psci_states)
+   return -ENOMEM;
+
+   for (i = 0; i < count; i++) {
+   u32 state;
+
+   lpi = >power.lpi_states[i + 1];

  Same case here too.

+   state = lpi->address & 0x;
+   if (!psci_power_state_is_valid(state)) {
+   pr_warn("Invalid PSCI power state %#x\n", state);
+   kfree(psci_states);
+   return -EINVAL;
+   }
+   psci_states[i] = state;
+   }
+   /* Idle states parsed correctly, initialize per-cpu pointer */
+   per_cpu(psci_power_state, cpu) = psci_states;
+   return 0;
+}
+#else
+static int __maybe_unused psci_acpi_cpu_init_idle(unsigned int cpu) {
+   return -EINVAL;
+}
+#endif
+
 int psci_cpu_init_idle(unsigned int cpu)  {
struct device_node *cpu_node;
int ret;
 
+   if (!acpi_disabled)
+   return 

Re: [PATCH] mm/zsmalloc: add trace events for zs_compact

2016-06-12 Thread Ganesh Mahendran
2016-06-13 12:42 GMT+08:00 Minchan Kim :
> On Wed, Jun 08, 2016 at 02:39:19PM +0800, Ganesh Mahendran wrote:
>
> 
>
>> zsmalloc is not only used by zram, but also zswap. Maybe
>> others in the future.
>>
>> I tried to use function_graph. It seems there are too much log
>> printed:
>> --
>> root@leo-test:/sys/kernel/debug/tracing# cat trace
>> # tracer: function_graph
>> #
>> # CPU  DURATION  FUNCTION CALLS
>> # | |   | |   |   |   |
>>  2)   |  zs_compact [zsmalloc]() {
>>  2)   |  /* zsmalloc_compact_start: pool zram0 */
>>  2)   0.889 us|_raw_spin_lock();
>>  2)   0.896 us|isolate_zspage [zsmalloc]();
>>  2)   0.938 us|_raw_spin_lock();
>>  2)   0.875 us|isolate_zspage [zsmalloc]();
>>  2)   0.942 us|_raw_spin_lock();
>>  2)   0.962 us|isolate_zspage [zsmalloc]();
>> ...
>>  2)   0.879 us|  insert_zspage [zsmalloc]();
>>  2)   4.520 us|}
>>  2)   0.975 us|_raw_spin_lock();
>>  2)   0.890 us|isolate_zspage [zsmalloc]();
>>  2)   0.882 us|_raw_spin_lock();
>>  2)   0.894 us|isolate_zspage [zsmalloc]();
>>  2)   |  /* zsmalloc_compact_end: pool zram0: 0 pages
>> compacted(total 0) */
>>  2) # 1351.241 us |  }
>> --
>> => 1351.241 us used
>>
>> And it seems the overhead of function_graph is bigger than trace event.
>>
>> bash-3682  [002]   1439.180646: zsmalloc_compact_start: pool zram0
>> bash-3682  [002]   1439.180659: zsmalloc_compact_end: pool zram0:
>> 0 pages compacted(total 0)
>> => 13 us > 1351.241 us
>
> You could use  to cut out.
>
> To introduce new event trace to get a elasped time, it's pointless,
> I think.

Agree.

>
> It should have more like pool name you mentioned.
> Like saying other thread, It would be better to show
> [pool name, compact size_class,
> the number of object moved, the number of freed page], IMO.

Thanks for you suggestion!
I would be useful to see compact details for each class.
I will send another patch to do this.

Thanks.

>
> Thanks.


Re: [PATCH] mm/zsmalloc: add trace events for zs_compact

2016-06-12 Thread Ganesh Mahendran
2016-06-13 12:42 GMT+08:00 Minchan Kim :
> On Wed, Jun 08, 2016 at 02:39:19PM +0800, Ganesh Mahendran wrote:
>
> 
>
>> zsmalloc is not only used by zram, but also zswap. Maybe
>> others in the future.
>>
>> I tried to use function_graph. It seems there are too much log
>> printed:
>> --
>> root@leo-test:/sys/kernel/debug/tracing# cat trace
>> # tracer: function_graph
>> #
>> # CPU  DURATION  FUNCTION CALLS
>> # | |   | |   |   |   |
>>  2)   |  zs_compact [zsmalloc]() {
>>  2)   |  /* zsmalloc_compact_start: pool zram0 */
>>  2)   0.889 us|_raw_spin_lock();
>>  2)   0.896 us|isolate_zspage [zsmalloc]();
>>  2)   0.938 us|_raw_spin_lock();
>>  2)   0.875 us|isolate_zspage [zsmalloc]();
>>  2)   0.942 us|_raw_spin_lock();
>>  2)   0.962 us|isolate_zspage [zsmalloc]();
>> ...
>>  2)   0.879 us|  insert_zspage [zsmalloc]();
>>  2)   4.520 us|}
>>  2)   0.975 us|_raw_spin_lock();
>>  2)   0.890 us|isolate_zspage [zsmalloc]();
>>  2)   0.882 us|_raw_spin_lock();
>>  2)   0.894 us|isolate_zspage [zsmalloc]();
>>  2)   |  /* zsmalloc_compact_end: pool zram0: 0 pages
>> compacted(total 0) */
>>  2) # 1351.241 us |  }
>> --
>> => 1351.241 us used
>>
>> And it seems the overhead of function_graph is bigger than trace event.
>>
>> bash-3682  [002]   1439.180646: zsmalloc_compact_start: pool zram0
>> bash-3682  [002]   1439.180659: zsmalloc_compact_end: pool zram0:
>> 0 pages compacted(total 0)
>> => 13 us > 1351.241 us
>
> You could use  to cut out.
>
> To introduce new event trace to get a elasped time, it's pointless,
> I think.

Agree.

>
> It should have more like pool name you mentioned.
> Like saying other thread, It would be better to show
> [pool name, compact size_class,
> the number of object moved, the number of freed page], IMO.

Thanks for you suggestion!
I would be useful to see compact details for each class.
I will send another patch to do this.

Thanks.

>
> Thanks.


Re: [PATCH] mm/zsmalloc: add trace events for zs_compact

2016-06-12 Thread Sergey Senozhatsky
Hello,

On (06/13/16 13:42), Minchan Kim wrote:
[..]
> > compacted(total 0) */
> >  2) # 1351.241 us |  }
> > --
> > => 1351.241 us used
> > 
> > And it seems the overhead of function_graph is bigger than trace event.
> > 
> > bash-3682  [002]   1439.180646: zsmalloc_compact_start: pool zram0
> > bash-3682  [002]   1439.180659: zsmalloc_compact_end: pool zram0:
> > 0 pages compacted(total 0)
> > => 13 us > 1351.241 us
> 
> You could use set_ftrace_filter to cut out.
> 
> To introduce new event trace to get a elasped time, it's pointless,
> I think.
> 
> It should have more like pool name you mentioned.
> Like saying other thread, It would be better to show
> [pool name, compact size_class,
> the number of object moved, the number of freed page], IMO.

just my 5 cents:

some parts (of the info above) are already available: zram maps to
pool name, which maps to a sysfs file name, that can contain the rest.
I'm just trying to understand what kind of optimizations we are talking
about here and how would timings help... compaction can spin on class
lock, for example, if the device in question is busy, etc. etc. on the
other hand we have a per-class info in zsmalloc pool stats output, so
why not extend it instead of introducing a new debugging interface?

-ss


Re: [PATCH] mm/zsmalloc: add trace events for zs_compact

2016-06-12 Thread Sergey Senozhatsky
Hello,

On (06/13/16 13:42), Minchan Kim wrote:
[..]
> > compacted(total 0) */
> >  2) # 1351.241 us |  }
> > --
> > => 1351.241 us used
> > 
> > And it seems the overhead of function_graph is bigger than trace event.
> > 
> > bash-3682  [002]   1439.180646: zsmalloc_compact_start: pool zram0
> > bash-3682  [002]   1439.180659: zsmalloc_compact_end: pool zram0:
> > 0 pages compacted(total 0)
> > => 13 us > 1351.241 us
> 
> You could use set_ftrace_filter to cut out.
> 
> To introduce new event trace to get a elasped time, it's pointless,
> I think.
> 
> It should have more like pool name you mentioned.
> Like saying other thread, It would be better to show
> [pool name, compact size_class,
> the number of object moved, the number of freed page], IMO.

just my 5 cents:

some parts (of the info above) are already available: zram maps to
pool name, which maps to a sysfs file name, that can contain the rest.
I'm just trying to understand what kind of optimizations we are talking
about here and how would timings help... compaction can spin on class
lock, for example, if the device in question is busy, etc. etc. on the
other hand we have a per-class info in zsmalloc pool stats output, so
why not extend it instead of introducing a new debugging interface?

-ss


Re: [PATCH] mm/zsmalloc: add trace events for zs_compact

2016-06-12 Thread Minchan Kim
On Wed, Jun 08, 2016 at 02:39:19PM +0800, Ganesh Mahendran wrote:



> zsmalloc is not only used by zram, but also zswap. Maybe
> others in the future.
> 
> I tried to use function_graph. It seems there are too much log
> printed:
> --
> root@leo-test:/sys/kernel/debug/tracing# cat trace
> # tracer: function_graph
> #
> # CPU  DURATION  FUNCTION CALLS
> # | |   | |   |   |   |
>  2)   |  zs_compact [zsmalloc]() {
>  2)   |  /* zsmalloc_compact_start: pool zram0 */
>  2)   0.889 us|_raw_spin_lock();
>  2)   0.896 us|isolate_zspage [zsmalloc]();
>  2)   0.938 us|_raw_spin_lock();
>  2)   0.875 us|isolate_zspage [zsmalloc]();
>  2)   0.942 us|_raw_spin_lock();
>  2)   0.962 us|isolate_zspage [zsmalloc]();
> ...
>  2)   0.879 us|  insert_zspage [zsmalloc]();
>  2)   4.520 us|}
>  2)   0.975 us|_raw_spin_lock();
>  2)   0.890 us|isolate_zspage [zsmalloc]();
>  2)   0.882 us|_raw_spin_lock();
>  2)   0.894 us|isolate_zspage [zsmalloc]();
>  2)   |  /* zsmalloc_compact_end: pool zram0: 0 pages
> compacted(total 0) */
>  2) # 1351.241 us |  }
> --
> => 1351.241 us used
> 
> And it seems the overhead of function_graph is bigger than trace event.
> 
> bash-3682  [002]   1439.180646: zsmalloc_compact_start: pool zram0
> bash-3682  [002]   1439.180659: zsmalloc_compact_end: pool zram0:
> 0 pages compacted(total 0)
> => 13 us > 1351.241 us

You could use set_ftrace_filter to cut out.

To introduce new event trace to get a elasped time, it's pointless,
I think.

It should have more like pool name you mentioned.
Like saying other thread, It would be better to show
[pool name, compact size_class,
the number of object moved, the number of freed page], IMO.

Thanks.


Re: [PATCH] mm/zsmalloc: add trace events for zs_compact

2016-06-12 Thread Minchan Kim
On Wed, Jun 08, 2016 at 02:39:19PM +0800, Ganesh Mahendran wrote:



> zsmalloc is not only used by zram, but also zswap. Maybe
> others in the future.
> 
> I tried to use function_graph. It seems there are too much log
> printed:
> --
> root@leo-test:/sys/kernel/debug/tracing# cat trace
> # tracer: function_graph
> #
> # CPU  DURATION  FUNCTION CALLS
> # | |   | |   |   |   |
>  2)   |  zs_compact [zsmalloc]() {
>  2)   |  /* zsmalloc_compact_start: pool zram0 */
>  2)   0.889 us|_raw_spin_lock();
>  2)   0.896 us|isolate_zspage [zsmalloc]();
>  2)   0.938 us|_raw_spin_lock();
>  2)   0.875 us|isolate_zspage [zsmalloc]();
>  2)   0.942 us|_raw_spin_lock();
>  2)   0.962 us|isolate_zspage [zsmalloc]();
> ...
>  2)   0.879 us|  insert_zspage [zsmalloc]();
>  2)   4.520 us|}
>  2)   0.975 us|_raw_spin_lock();
>  2)   0.890 us|isolate_zspage [zsmalloc]();
>  2)   0.882 us|_raw_spin_lock();
>  2)   0.894 us|isolate_zspage [zsmalloc]();
>  2)   |  /* zsmalloc_compact_end: pool zram0: 0 pages
> compacted(total 0) */
>  2) # 1351.241 us |  }
> --
> => 1351.241 us used
> 
> And it seems the overhead of function_graph is bigger than trace event.
> 
> bash-3682  [002]   1439.180646: zsmalloc_compact_start: pool zram0
> bash-3682  [002]   1439.180659: zsmalloc_compact_end: pool zram0:
> 0 pages compacted(total 0)
> => 13 us > 1351.241 us

You could use set_ftrace_filter to cut out.

To introduce new event trace to get a elasped time, it's pointless,
I think.

It should have more like pool name you mentioned.
Like saying other thread, It would be better to show
[pool name, compact size_class,
the number of object moved, the number of freed page], IMO.

Thanks.


Re: [PATCH tip/master] [BUGFIX] kprobes/x86: Fix to clear TF bit in fault-on-single-stepping

2016-06-12 Thread Ananth N Mavinakayanahalli
On Sat, Jun 11, 2016 at 11:06:53PM +0900, Masami Hiramatsu wrote:
> Fix kprobe_fault_handler to clear TF (trap flag) bit of
> flags register in the case of fault fixup on single-stepping.
> 
> If we put a kprobe on the instruction which can cause a
> page fault (e.g. actual mov instructions in copy_user_*),
> that fault happens on a single-stepping buffer. In this
> case, kprobes resets running instance so that the CPU can
> retry execution on the original ip address.
> However, current code forgets reset TF bit. Since this
> fault happens with TF bit set for enabling single-stepping,
> when it retries, it causes a debug exception and kprobes
> can not handle it because it already reset itself.
> 
> On the most of x86-64 platform, it can be easily reproduced
> by using kprobe tracer. E.g.
> 
>   # cd /sys/kernel/debug/tracing
>   # echo p copy_user_enhanced_fast_string+5 > kprobe_events
>   # echo 1 > events/kprobes/enable
> 
> And you'll see a kernel panic on do_debug(), since the debug
> trap is not handled by kprobes.
> 
> To fix this problem, we just need to clear the TF bit when
> resetting running kprobe.
> 
> Signed-off-by: Masami Hiramatsu 

Good catch!

Reviewed-by: Ananth N Mavinakayanahalli 



Re: [PATCH tip/master] [BUGFIX] kprobes/x86: Fix to clear TF bit in fault-on-single-stepping

2016-06-12 Thread Ananth N Mavinakayanahalli
On Sat, Jun 11, 2016 at 11:06:53PM +0900, Masami Hiramatsu wrote:
> Fix kprobe_fault_handler to clear TF (trap flag) bit of
> flags register in the case of fault fixup on single-stepping.
> 
> If we put a kprobe on the instruction which can cause a
> page fault (e.g. actual mov instructions in copy_user_*),
> that fault happens on a single-stepping buffer. In this
> case, kprobes resets running instance so that the CPU can
> retry execution on the original ip address.
> However, current code forgets reset TF bit. Since this
> fault happens with TF bit set for enabling single-stepping,
> when it retries, it causes a debug exception and kprobes
> can not handle it because it already reset itself.
> 
> On the most of x86-64 platform, it can be easily reproduced
> by using kprobe tracer. E.g.
> 
>   # cd /sys/kernel/debug/tracing
>   # echo p copy_user_enhanced_fast_string+5 > kprobe_events
>   # echo 1 > events/kprobes/enable
> 
> And you'll see a kernel panic on do_debug(), since the debug
> trap is not handled by kprobes.
> 
> To fix this problem, we just need to clear the TF bit when
> resetting running kprobe.
> 
> Signed-off-by: Masami Hiramatsu 

Good catch!

Reviewed-by: Ananth N Mavinakayanahalli 



[PATCH V7 3/9] vfio: platform: determine reset capability

2016-06-12 Thread Sinan Kaya
Creating a new function to determine if this driver supports reset
function or not. This is an attempt to abstract device tree calls
from the rest of the code.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_platform_common.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index 3e2a7c0..6be92c3 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -49,6 +49,11 @@ static vfio_platform_reset_fn_t 
vfio_platform_lookup_reset(const char *compat,
return reset_fn;
 }
 
+static bool vfio_platform_has_reset(struct vfio_platform_device *vdev)
+{
+   return vdev->of_reset ? true : false;
+}
+
 static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
 {
vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
@@ -214,7 +219,7 @@ static long vfio_platform_ioctl(void *device_data,
if (info.argsz < minsz)
return -EINVAL;
 
-   if (vdev->of_reset)
+   if (vfio_platform_has_reset(vdev))
vdev->flags |= VFIO_DEVICE_FLAGS_RESET;
info.flags = vdev->flags;
info.num_regions = vdev->num_regions;
-- 
1.8.2.1



[PATCH V7 4/9] vfio: platform: add support for ACPI probe

2016-06-12 Thread Sinan Kaya
The code is using the compatible DT string to associate a reset driver
with the actual device itself. The compatible string does not exist on
ACPI based systems. HID is the unique identifier for a device driver
instead.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_platform_common.c  | 57 ---
 drivers/vfio/platform/vfio_platform_private.h |  1 +
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index 6be92c3..fbf4565 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -13,6 +13,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -49,6 +50,37 @@ static vfio_platform_reset_fn_t 
vfio_platform_lookup_reset(const char *compat,
return reset_fn;
 }
 
+#ifdef CONFIG_ACPI
+static int vfio_platform_acpi_probe(struct vfio_platform_device *vdev,
+   struct device *dev)
+{
+   struct acpi_device *adev = ACPI_COMPANION(dev);
+
+   if (acpi_disabled)
+   return -ENODEV;
+
+   if (!adev) {
+   pr_err("VFIO: ACPI companion device not found for %s\n",
+   vdev->name);
+   return -ENODEV;
+   }
+
+   vdev->acpihid = acpi_device_hid(adev);
+   if (!vdev->acpihid) {
+   pr_err("VFIO: cannot find ACPI HID for %s\n",
+  vdev->name);
+   return -ENODEV;
+   }
+   return 0;
+}
+#else
+static inline int vfio_platform_acpi_probe(struct vfio_platform_device *vdev,
+  struct device *dev)
+{
+   return -ENOENT;
+}
+#endif
+
 static bool vfio_platform_has_reset(struct vfio_platform_device *vdev)
 {
return vdev->of_reset ? true : false;
@@ -547,6 +579,20 @@ static const struct vfio_device_ops vfio_platform_ops = {
.mmap   = vfio_platform_mmap,
 };
 
+int vfio_platform_of_probe(struct vfio_platform_device *vdev,
+  struct device *dev)
+{
+   int ret;
+
+   ret = device_property_read_string(dev, "compatible",
+ >compat);
+   if (ret)
+   pr_err("VFIO: cannot retrieve compat for %s\n",
+   vdev->name);
+
+   return ret;
+}
+
 int vfio_platform_probe_common(struct vfio_platform_device *vdev,
   struct device *dev)
 {
@@ -556,11 +602,12 @@ int vfio_platform_probe_common(struct 
vfio_platform_device *vdev,
if (!vdev)
return -EINVAL;
 
-   ret = device_property_read_string(dev, "compatible", >compat);
-   if (ret) {
-   pr_err("VFIO: cannot retrieve compat for %s\n", vdev->name);
-   return -EINVAL;
-   }
+   ret = vfio_platform_acpi_probe(vdev, dev);
+   if (ret)
+   ret = vfio_platform_of_probe(vdev, dev);
+
+   if (ret)
+   return ret;
 
vdev->device = dev;
 
diff --git a/drivers/vfio/platform/vfio_platform_private.h 
b/drivers/vfio/platform/vfio_platform_private.h
index 71ed7d1..ba9e4f8 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -58,6 +58,7 @@ struct vfio_platform_device {
struct mutexigate;
struct module   *parent_module;
const char  *compat;
+   const char  *acpihid;
struct module   *reset_module;
struct device   *device;
 
-- 
1.8.2.1



[PATCH V7 2/9] vfio: platform: move reset call to a common function

2016-06-12 Thread Sinan Kaya
The reset call sequence seems to replicate itself multiple times
across the file. Grouping them together for maintenance reasons.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_platform_common.c | 30 +---
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index 08fd7c2..3e2a7c0 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -134,6 +134,17 @@ static void vfio_platform_regions_cleanup(struct 
vfio_platform_device *vdev)
kfree(vdev->regions);
 }
 
+static int vfio_platform_call_reset(struct vfio_platform_device *vdev)
+{
+   if (vdev->of_reset) {
+   dev_info(vdev->device, "reset\n");
+   return vdev->of_reset(vdev);
+   }
+
+   dev_warn(vdev->device, "no reset function found!\n");
+   return -EINVAL;
+}
+
 static void vfio_platform_release(void *device_data)
 {
struct vfio_platform_device *vdev = device_data;
@@ -141,12 +152,7 @@ static void vfio_platform_release(void *device_data)
mutex_lock(_lock);
 
if (!(--vdev->refcnt)) {
-   if (vdev->of_reset) {
-   dev_info(vdev->device, "reset\n");
-   vdev->of_reset(vdev);
-   } else {
-   dev_warn(vdev->device, "no reset function found!\n");
-   }
+   vfio_platform_call_reset(vdev);
vfio_platform_regions_cleanup(vdev);
vfio_platform_irq_cleanup(vdev);
}
@@ -175,12 +181,7 @@ static int vfio_platform_open(void *device_data)
if (ret)
goto err_irq;
 
-   if (vdev->of_reset) {
-   dev_info(vdev->device, "reset\n");
-   vdev->of_reset(vdev);
-   } else {
-   dev_warn(vdev->device, "no reset function found!\n");
-   }
+   vfio_platform_call_reset(vdev);
}
 
vdev->refcnt++;
@@ -312,10 +313,7 @@ static long vfio_platform_ioctl(void *device_data,
return ret;
 
} else if (cmd == VFIO_DEVICE_RESET) {
-   if (vdev->of_reset)
-   return vdev->of_reset(vdev);
-   else
-   return -EINVAL;
+   return vfio_platform_call_reset(vdev);
}
 
return -ENOTTY;
-- 
1.8.2.1



[PATCH V7 3/9] vfio: platform: determine reset capability

2016-06-12 Thread Sinan Kaya
Creating a new function to determine if this driver supports reset
function or not. This is an attempt to abstract device tree calls
from the rest of the code.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_platform_common.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index 3e2a7c0..6be92c3 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -49,6 +49,11 @@ static vfio_platform_reset_fn_t 
vfio_platform_lookup_reset(const char *compat,
return reset_fn;
 }
 
+static bool vfio_platform_has_reset(struct vfio_platform_device *vdev)
+{
+   return vdev->of_reset ? true : false;
+}
+
 static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
 {
vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
@@ -214,7 +219,7 @@ static long vfio_platform_ioctl(void *device_data,
if (info.argsz < minsz)
return -EINVAL;
 
-   if (vdev->of_reset)
+   if (vfio_platform_has_reset(vdev))
vdev->flags |= VFIO_DEVICE_FLAGS_RESET;
info.flags = vdev->flags;
info.num_regions = vdev->num_regions;
-- 
1.8.2.1



[PATCH V7 4/9] vfio: platform: add support for ACPI probe

2016-06-12 Thread Sinan Kaya
The code is using the compatible DT string to associate a reset driver
with the actual device itself. The compatible string does not exist on
ACPI based systems. HID is the unique identifier for a device driver
instead.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_platform_common.c  | 57 ---
 drivers/vfio/platform/vfio_platform_private.h |  1 +
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index 6be92c3..fbf4565 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -13,6 +13,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -49,6 +50,37 @@ static vfio_platform_reset_fn_t 
vfio_platform_lookup_reset(const char *compat,
return reset_fn;
 }
 
+#ifdef CONFIG_ACPI
+static int vfio_platform_acpi_probe(struct vfio_platform_device *vdev,
+   struct device *dev)
+{
+   struct acpi_device *adev = ACPI_COMPANION(dev);
+
+   if (acpi_disabled)
+   return -ENODEV;
+
+   if (!adev) {
+   pr_err("VFIO: ACPI companion device not found for %s\n",
+   vdev->name);
+   return -ENODEV;
+   }
+
+   vdev->acpihid = acpi_device_hid(adev);
+   if (!vdev->acpihid) {
+   pr_err("VFIO: cannot find ACPI HID for %s\n",
+  vdev->name);
+   return -ENODEV;
+   }
+   return 0;
+}
+#else
+static inline int vfio_platform_acpi_probe(struct vfio_platform_device *vdev,
+  struct device *dev)
+{
+   return -ENOENT;
+}
+#endif
+
 static bool vfio_platform_has_reset(struct vfio_platform_device *vdev)
 {
return vdev->of_reset ? true : false;
@@ -547,6 +579,20 @@ static const struct vfio_device_ops vfio_platform_ops = {
.mmap   = vfio_platform_mmap,
 };
 
+int vfio_platform_of_probe(struct vfio_platform_device *vdev,
+  struct device *dev)
+{
+   int ret;
+
+   ret = device_property_read_string(dev, "compatible",
+ >compat);
+   if (ret)
+   pr_err("VFIO: cannot retrieve compat for %s\n",
+   vdev->name);
+
+   return ret;
+}
+
 int vfio_platform_probe_common(struct vfio_platform_device *vdev,
   struct device *dev)
 {
@@ -556,11 +602,12 @@ int vfio_platform_probe_common(struct 
vfio_platform_device *vdev,
if (!vdev)
return -EINVAL;
 
-   ret = device_property_read_string(dev, "compatible", >compat);
-   if (ret) {
-   pr_err("VFIO: cannot retrieve compat for %s\n", vdev->name);
-   return -EINVAL;
-   }
+   ret = vfio_platform_acpi_probe(vdev, dev);
+   if (ret)
+   ret = vfio_platform_of_probe(vdev, dev);
+
+   if (ret)
+   return ret;
 
vdev->device = dev;
 
diff --git a/drivers/vfio/platform/vfio_platform_private.h 
b/drivers/vfio/platform/vfio_platform_private.h
index 71ed7d1..ba9e4f8 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -58,6 +58,7 @@ struct vfio_platform_device {
struct mutexigate;
struct module   *parent_module;
const char  *compat;
+   const char  *acpihid;
struct module   *reset_module;
struct device   *device;
 
-- 
1.8.2.1



[PATCH V7 2/9] vfio: platform: move reset call to a common function

2016-06-12 Thread Sinan Kaya
The reset call sequence seems to replicate itself multiple times
across the file. Grouping them together for maintenance reasons.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_platform_common.c | 30 +---
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index 08fd7c2..3e2a7c0 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -134,6 +134,17 @@ static void vfio_platform_regions_cleanup(struct 
vfio_platform_device *vdev)
kfree(vdev->regions);
 }
 
+static int vfio_platform_call_reset(struct vfio_platform_device *vdev)
+{
+   if (vdev->of_reset) {
+   dev_info(vdev->device, "reset\n");
+   return vdev->of_reset(vdev);
+   }
+
+   dev_warn(vdev->device, "no reset function found!\n");
+   return -EINVAL;
+}
+
 static void vfio_platform_release(void *device_data)
 {
struct vfio_platform_device *vdev = device_data;
@@ -141,12 +152,7 @@ static void vfio_platform_release(void *device_data)
mutex_lock(_lock);
 
if (!(--vdev->refcnt)) {
-   if (vdev->of_reset) {
-   dev_info(vdev->device, "reset\n");
-   vdev->of_reset(vdev);
-   } else {
-   dev_warn(vdev->device, "no reset function found!\n");
-   }
+   vfio_platform_call_reset(vdev);
vfio_platform_regions_cleanup(vdev);
vfio_platform_irq_cleanup(vdev);
}
@@ -175,12 +181,7 @@ static int vfio_platform_open(void *device_data)
if (ret)
goto err_irq;
 
-   if (vdev->of_reset) {
-   dev_info(vdev->device, "reset\n");
-   vdev->of_reset(vdev);
-   } else {
-   dev_warn(vdev->device, "no reset function found!\n");
-   }
+   vfio_platform_call_reset(vdev);
}
 
vdev->refcnt++;
@@ -312,10 +313,7 @@ static long vfio_platform_ioctl(void *device_data,
return ret;
 
} else if (cmd == VFIO_DEVICE_RESET) {
-   if (vdev->of_reset)
-   return vdev->of_reset(vdev);
-   else
-   return -EINVAL;
+   return vfio_platform_call_reset(vdev);
}
 
return -ENOTTY;
-- 
1.8.2.1



[PATCH V7 9/9] vfio: platform: check reset call return code during release

2016-06-12 Thread Sinan Kaya
Release call is ignoring the return code from reset call and can
potentially continue even though reset call failed.

If reset_required module parameter is set, this patch is going
to validate the return code and will cause stack dump with
WARN_ON and warn the user of failure.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_platform_common.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index d3141e7..5035c3f 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -243,7 +243,15 @@ static void vfio_platform_release(void *device_data)
mutex_lock(_lock);
 
if (!(--vdev->refcnt)) {
-   vfio_platform_call_reset(vdev, NULL);
+   const char *extra_dbg = NULL;
+   int ret;
+
+   ret = vfio_platform_call_reset(vdev, _dbg);
+   if (ret && vdev->reset_required) {
+   dev_warn(vdev->device, "reset driver is required and 
reset call failed in release (%d) %s\n",
+ret, extra_dbg ? extra_dbg : "");
+   WARN_ON(1);
+   }
vfio_platform_regions_cleanup(vdev);
vfio_platform_irq_cleanup(vdev);
}
-- 
1.8.2.1



[PATCH V7 1/9] vfio: platform: rename reset function

2016-06-12 Thread Sinan Kaya
Renaming the reset function to of_reset as it is only used
by the device tree based platforms.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_platform_common.c  | 30 +--
 drivers/vfio/platform/vfio_platform_private.h |  6 +++---
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index e65b142..08fd7c2 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -41,7 +41,7 @@ static vfio_platform_reset_fn_t 
vfio_platform_lookup_reset(const char *compat,
if (!strcmp(iter->compat, compat) &&
try_module_get(iter->owner)) {
*module = iter->owner;
-   reset_fn = iter->reset;
+   reset_fn = iter->of_reset;
break;
}
}
@@ -51,18 +51,18 @@ static vfio_platform_reset_fn_t 
vfio_platform_lookup_reset(const char *compat,
 
 static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
 {
-   vdev->reset = vfio_platform_lookup_reset(vdev->compat,
-   >reset_module);
-   if (!vdev->reset) {
+   vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
+   >reset_module);
+   if (!vdev->of_reset) {
request_module("vfio-reset:%s", vdev->compat);
-   vdev->reset = vfio_platform_lookup_reset(vdev->compat,
->reset_module);
+   vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
+   >reset_module);
}
 }
 
 static void vfio_platform_put_reset(struct vfio_platform_device *vdev)
 {
-   if (vdev->reset)
+   if (vdev->of_reset)
module_put(vdev->reset_module);
 }
 
@@ -141,9 +141,9 @@ static void vfio_platform_release(void *device_data)
mutex_lock(_lock);
 
if (!(--vdev->refcnt)) {
-   if (vdev->reset) {
+   if (vdev->of_reset) {
dev_info(vdev->device, "reset\n");
-   vdev->reset(vdev);
+   vdev->of_reset(vdev);
} else {
dev_warn(vdev->device, "no reset function found!\n");
}
@@ -175,9 +175,9 @@ static int vfio_platform_open(void *device_data)
if (ret)
goto err_irq;
 
-   if (vdev->reset) {
+   if (vdev->of_reset) {
dev_info(vdev->device, "reset\n");
-   vdev->reset(vdev);
+   vdev->of_reset(vdev);
} else {
dev_warn(vdev->device, "no reset function found!\n");
}
@@ -213,7 +213,7 @@ static long vfio_platform_ioctl(void *device_data,
if (info.argsz < minsz)
return -EINVAL;
 
-   if (vdev->reset)
+   if (vdev->of_reset)
vdev->flags |= VFIO_DEVICE_FLAGS_RESET;
info.flags = vdev->flags;
info.num_regions = vdev->num_regions;
@@ -312,8 +312,8 @@ static long vfio_platform_ioctl(void *device_data,
return ret;
 
} else if (cmd == VFIO_DEVICE_RESET) {
-   if (vdev->reset)
-   return vdev->reset(vdev);
+   if (vdev->of_reset)
+   return vdev->of_reset(vdev);
else
return -EINVAL;
}
@@ -611,7 +611,7 @@ void vfio_platform_unregister_reset(const char *compat,
 
mutex_lock(_lock);
list_for_each_entry_safe(iter, temp, _list, link) {
-   if (!strcmp(iter->compat, compat) && (iter->reset == fn)) {
+   if (!strcmp(iter->compat, compat) && (iter->of_reset == fn)) {
list_del(>link);
break;
}
diff --git a/drivers/vfio/platform/vfio_platform_private.h 
b/drivers/vfio/platform/vfio_platform_private.h
index 42816dd..71ed7d1 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -71,7 +71,7 @@ struct vfio_platform_device {
struct resource*
(*get_resource)(struct vfio_platform_device *vdev, int i);
int (*get_irq)(struct vfio_platform_device *vdev, int i);
-   int (*reset)(struct vfio_platform_device *vdev);
+   int (*of_reset)(struct vfio_platform_device *vdev);
 };
 
 typedef int (*vfio_platform_reset_fn_t)(struct vfio_platform_device *vdev);
@@ -80,7 +80,7 @@ struct vfio_platform_reset_node {
struct list_head link;
char *compat;
  

[PATCH V7 5/9] vfio: platform: add extra debug info argument to call reset

2016-06-12 Thread Sinan Kaya
Getting ready to bring out extra debug information to the caller
so that more verbose information can be printed when an error is
observed.

Signed-off-by: Sinan Kaya 
---
 drivers/vfio/platform/vfio_platform_common.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index fbf4565..e7ce2c2 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -171,7 +171,8 @@ static void vfio_platform_regions_cleanup(struct 
vfio_platform_device *vdev)
kfree(vdev->regions);
 }
 
-static int vfio_platform_call_reset(struct vfio_platform_device *vdev)
+static int vfio_platform_call_reset(struct vfio_platform_device *vdev,
+   const char **extra_dbg)
 {
if (vdev->of_reset) {
dev_info(vdev->device, "reset\n");
@@ -189,7 +190,7 @@ static void vfio_platform_release(void *device_data)
mutex_lock(_lock);
 
if (!(--vdev->refcnt)) {
-   vfio_platform_call_reset(vdev);
+   vfio_platform_call_reset(vdev, NULL);
vfio_platform_regions_cleanup(vdev);
vfio_platform_irq_cleanup(vdev);
}
@@ -218,7 +219,7 @@ static int vfio_platform_open(void *device_data)
if (ret)
goto err_irq;
 
-   vfio_platform_call_reset(vdev);
+   vfio_platform_call_reset(vdev, NULL);
}
 
vdev->refcnt++;
@@ -350,7 +351,7 @@ static long vfio_platform_ioctl(void *device_data,
return ret;
 
} else if (cmd == VFIO_DEVICE_RESET) {
-   return vfio_platform_call_reset(vdev);
+   return vfio_platform_call_reset(vdev, NULL);
}
 
return -ENOTTY;
-- 
1.8.2.1



[PATCH V7 9/9] vfio: platform: check reset call return code during release

2016-06-12 Thread Sinan Kaya
Release call is ignoring the return code from reset call and can
potentially continue even though reset call failed.

If reset_required module parameter is set, this patch is going
to validate the return code and will cause stack dump with
WARN_ON and warn the user of failure.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_platform_common.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index d3141e7..5035c3f 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -243,7 +243,15 @@ static void vfio_platform_release(void *device_data)
mutex_lock(_lock);
 
if (!(--vdev->refcnt)) {
-   vfio_platform_call_reset(vdev, NULL);
+   const char *extra_dbg = NULL;
+   int ret;
+
+   ret = vfio_platform_call_reset(vdev, _dbg);
+   if (ret && vdev->reset_required) {
+   dev_warn(vdev->device, "reset driver is required and 
reset call failed in release (%d) %s\n",
+ret, extra_dbg ? extra_dbg : "");
+   WARN_ON(1);
+   }
vfio_platform_regions_cleanup(vdev);
vfio_platform_irq_cleanup(vdev);
}
-- 
1.8.2.1



[PATCH V7 1/9] vfio: platform: rename reset function

2016-06-12 Thread Sinan Kaya
Renaming the reset function to of_reset as it is only used
by the device tree based platforms.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_platform_common.c  | 30 +--
 drivers/vfio/platform/vfio_platform_private.h |  6 +++---
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index e65b142..08fd7c2 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -41,7 +41,7 @@ static vfio_platform_reset_fn_t 
vfio_platform_lookup_reset(const char *compat,
if (!strcmp(iter->compat, compat) &&
try_module_get(iter->owner)) {
*module = iter->owner;
-   reset_fn = iter->reset;
+   reset_fn = iter->of_reset;
break;
}
}
@@ -51,18 +51,18 @@ static vfio_platform_reset_fn_t 
vfio_platform_lookup_reset(const char *compat,
 
 static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
 {
-   vdev->reset = vfio_platform_lookup_reset(vdev->compat,
-   >reset_module);
-   if (!vdev->reset) {
+   vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
+   >reset_module);
+   if (!vdev->of_reset) {
request_module("vfio-reset:%s", vdev->compat);
-   vdev->reset = vfio_platform_lookup_reset(vdev->compat,
->reset_module);
+   vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
+   >reset_module);
}
 }
 
 static void vfio_platform_put_reset(struct vfio_platform_device *vdev)
 {
-   if (vdev->reset)
+   if (vdev->of_reset)
module_put(vdev->reset_module);
 }
 
@@ -141,9 +141,9 @@ static void vfio_platform_release(void *device_data)
mutex_lock(_lock);
 
if (!(--vdev->refcnt)) {
-   if (vdev->reset) {
+   if (vdev->of_reset) {
dev_info(vdev->device, "reset\n");
-   vdev->reset(vdev);
+   vdev->of_reset(vdev);
} else {
dev_warn(vdev->device, "no reset function found!\n");
}
@@ -175,9 +175,9 @@ static int vfio_platform_open(void *device_data)
if (ret)
goto err_irq;
 
-   if (vdev->reset) {
+   if (vdev->of_reset) {
dev_info(vdev->device, "reset\n");
-   vdev->reset(vdev);
+   vdev->of_reset(vdev);
} else {
dev_warn(vdev->device, "no reset function found!\n");
}
@@ -213,7 +213,7 @@ static long vfio_platform_ioctl(void *device_data,
if (info.argsz < minsz)
return -EINVAL;
 
-   if (vdev->reset)
+   if (vdev->of_reset)
vdev->flags |= VFIO_DEVICE_FLAGS_RESET;
info.flags = vdev->flags;
info.num_regions = vdev->num_regions;
@@ -312,8 +312,8 @@ static long vfio_platform_ioctl(void *device_data,
return ret;
 
} else if (cmd == VFIO_DEVICE_RESET) {
-   if (vdev->reset)
-   return vdev->reset(vdev);
+   if (vdev->of_reset)
+   return vdev->of_reset(vdev);
else
return -EINVAL;
}
@@ -611,7 +611,7 @@ void vfio_platform_unregister_reset(const char *compat,
 
mutex_lock(_lock);
list_for_each_entry_safe(iter, temp, _list, link) {
-   if (!strcmp(iter->compat, compat) && (iter->reset == fn)) {
+   if (!strcmp(iter->compat, compat) && (iter->of_reset == fn)) {
list_del(>link);
break;
}
diff --git a/drivers/vfio/platform/vfio_platform_private.h 
b/drivers/vfio/platform/vfio_platform_private.h
index 42816dd..71ed7d1 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -71,7 +71,7 @@ struct vfio_platform_device {
struct resource*
(*get_resource)(struct vfio_platform_device *vdev, int i);
int (*get_irq)(struct vfio_platform_device *vdev, int i);
-   int (*reset)(struct vfio_platform_device *vdev);
+   int (*of_reset)(struct vfio_platform_device *vdev);
 };
 
 typedef int (*vfio_platform_reset_fn_t)(struct vfio_platform_device *vdev);
@@ -80,7 +80,7 @@ struct vfio_platform_reset_node {
struct list_head link;
char *compat;
struct module *owner;
-   

[PATCH V7 5/9] vfio: platform: add extra debug info argument to call reset

2016-06-12 Thread Sinan Kaya
Getting ready to bring out extra debug information to the caller
so that more verbose information can be printed when an error is
observed.

Signed-off-by: Sinan Kaya 
---
 drivers/vfio/platform/vfio_platform_common.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index fbf4565..e7ce2c2 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -171,7 +171,8 @@ static void vfio_platform_regions_cleanup(struct 
vfio_platform_device *vdev)
kfree(vdev->regions);
 }
 
-static int vfio_platform_call_reset(struct vfio_platform_device *vdev)
+static int vfio_platform_call_reset(struct vfio_platform_device *vdev,
+   const char **extra_dbg)
 {
if (vdev->of_reset) {
dev_info(vdev->device, "reset\n");
@@ -189,7 +190,7 @@ static void vfio_platform_release(void *device_data)
mutex_lock(_lock);
 
if (!(--vdev->refcnt)) {
-   vfio_platform_call_reset(vdev);
+   vfio_platform_call_reset(vdev, NULL);
vfio_platform_regions_cleanup(vdev);
vfio_platform_irq_cleanup(vdev);
}
@@ -218,7 +219,7 @@ static int vfio_platform_open(void *device_data)
if (ret)
goto err_irq;
 
-   vfio_platform_call_reset(vdev);
+   vfio_platform_call_reset(vdev, NULL);
}
 
vdev->refcnt++;
@@ -350,7 +351,7 @@ static long vfio_platform_ioctl(void *device_data,
return ret;
 
} else if (cmd == VFIO_DEVICE_RESET) {
-   return vfio_platform_call_reset(vdev);
+   return vfio_platform_call_reset(vdev, NULL);
}
 
return -ENOTTY;
-- 
1.8.2.1



[PATCH V7 7/9] vfio, platform: make reset driver a requirement by default

2016-06-12 Thread Sinan Kaya
The code was allowing platform devices to be used without a supporting
VFIO reset driver. The hardware can be left in some inconsistent state
after a guest machine abort.

The reset driver will put the hardware back to safe state and disable
interrupts before returning the control back to the host machine.

Adding a new reset_required kernel module option to AMBA and platform
VFIO drivers with a default value of true.

New requirements are:
1. A reset function needs to be implemented by the corresponding driver
via DT/ACPI.
2. The reset function needs to be discovered via DT/ACPI.

The probe of the driver will fail if any of the above conditions are
not satisfied.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_amba.c |  5 +
 drivers/vfio/platform/vfio_platform.c |  5 +
 drivers/vfio/platform/vfio_platform_common.c  | 22 +++---
 drivers/vfio/platform/vfio_platform_private.h |  1 +
 4 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/drivers/vfio/platform/vfio_amba.c 
b/drivers/vfio/platform/vfio_amba.c
index a66479b..7585902 100644
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -23,6 +23,10 @@
 #define DRIVER_AUTHOR   "Antonios Motakis "
 #define DRIVER_DESC "VFIO for AMBA devices - User Level meta-driver"
 
+static bool reset_required = true;
+module_param(reset_required, bool, 0644);
+MODULE_PARM_DESC(reset_required, "override reset requirement (default: 1)");
+
 /* probing devices from the AMBA bus */
 
 static struct resource *get_amba_resource(struct vfio_platform_device *vdev,
@@ -68,6 +72,7 @@ static int vfio_amba_probe(struct amba_device *adev, const 
struct amba_id *id)
vdev->get_resource = get_amba_resource;
vdev->get_irq = get_amba_irq;
vdev->parent_module = THIS_MODULE;
+   vdev->reset_required = reset_required;
 
ret = vfio_platform_probe_common(vdev, >dev);
if (ret) {
diff --git a/drivers/vfio/platform/vfio_platform.c 
b/drivers/vfio/platform/vfio_platform.c
index b1cc3a7..ef89146 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -23,6 +23,10 @@
 #define DRIVER_AUTHOR   "Antonios Motakis "
 #define DRIVER_DESC "VFIO for platform devices - User Level meta-driver"
 
+static bool reset_required = true;
+module_param(reset_required, bool, 0644);
+MODULE_PARM_DESC(reset_required, "override reset requirement (default: 1)");
+
 /* probing devices from the linux platform bus */
 
 static struct resource *get_platform_resource(struct vfio_platform_device 
*vdev,
@@ -66,6 +70,7 @@ static int vfio_platform_probe(struct platform_device *pdev)
vdev->get_resource = get_platform_resource;
vdev->get_irq = get_platform_irq;
vdev->parent_module = THIS_MODULE;
+   vdev->reset_required = reset_required;
 
ret = vfio_platform_probe_common(vdev, >dev);
if (ret)
diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index 0ea8c26..d84c399 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -128,10 +128,10 @@ static bool vfio_platform_has_reset(struct 
vfio_platform_device *vdev)
return vdev->of_reset ? true : false;
 }
 
-static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
+static int vfio_platform_get_reset(struct vfio_platform_device *vdev)
 {
if (vdev->acpihid)
-   return;
+   return vfio_platform_acpi_has_reset(vdev) ? 0 : -ENOENT;
 
vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
>reset_module);
@@ -140,6 +140,8 @@ static void vfio_platform_get_reset(struct 
vfio_platform_device *vdev)
vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
>reset_module);
}
+
+   return vdev->of_reset ? 0 : -ENOENT;
 }
 
 static void vfio_platform_put_reset(struct vfio_platform_device *vdev)
@@ -670,16 +672,22 @@ int vfio_platform_probe_common(struct 
vfio_platform_device *vdev,
}
 
ret = vfio_add_group_dev(dev, _platform_ops, vdev);
-   if (ret) {
-   iommu_group_put(group);
-   return ret;
-   }
+   if (ret)
+   goto out;
 
-   vfio_platform_get_reset(vdev);
+   ret = vfio_platform_get_reset(vdev);
+   if (ret && vdev->reset_required) {
+   pr_err("vfio: no reset function found for device %s\n",
+  vdev->name);
+   goto out;
+   }
 
mutex_init(>igate);
 
return 0;
+out:
+   iommu_group_put(group);
+   return ret;
 }
 EXPORT_SYMBOL_GPL(vfio_platform_probe_common);
 
diff --git 

[PATCH V7 6/9] vfio: platform: call _RST method when using ACPI

2016-06-12 Thread Sinan Kaya
The device tree code checks for the presence of a reset driver and calls
the of_reset function pointer by looking up the reset driver as a module.

ACPI defines _RST method to perform device level reset. After the _RST
method is executed, the OS can resume using the device. _RST method is
expected to stop DMA transfers and IRQs.

This patch introduces two functions as vfio_platform_acpi_has_reset and
vfio_platform_acpi_call_reset. The has reset method is used to declare
reset capability via the ioctl flag VFIO_DEVICE_FLAGS_RESET. The call
reset function is used to execute the _RST ACPI method.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_platform_common.c | 51 
 1 file changed, 51 insertions(+)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index e7ce2c2..0ea8c26 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -73,21 +73,66 @@ static int vfio_platform_acpi_probe(struct 
vfio_platform_device *vdev,
}
return 0;
 }
+
+static int vfio_platform_acpi_call_reset(struct vfio_platform_device *vdev,
+const char **extra_dbg)
+{
+   struct device *dev = vdev->device;
+   acpi_handle handle = ACPI_HANDLE(dev);
+   acpi_status acpi_ret;
+   unsigned long long val;
+
+   acpi_ret = acpi_evaluate_integer(handle, "_RST", NULL, );
+   if (ACPI_FAILURE(acpi_ret)) {
+   if (extra_dbg)
+   *extra_dbg = acpi_format_exception(acpi_ret);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static bool vfio_platform_acpi_has_reset(struct vfio_platform_device *vdev)
+{
+   struct device *dev = vdev->device;
+   acpi_handle handle = ACPI_HANDLE(dev);
+
+   return acpi_has_method(handle, "_RST");
+}
 #else
 static inline int vfio_platform_acpi_probe(struct vfio_platform_device *vdev,
   struct device *dev)
 {
return -ENOENT;
 }
+
+static inline
+int vfio_platform_acpi_call_reset(struct vfio_platform_device *vdev,
+ const char **extra_dbg)
+{
+   return -ENOENT;
+}
+
+static inline
+bool vfio_platform_acpi_has_reset(struct vfio_platform_device *vdev)
+{
+   return false;
+}
 #endif
 
 static bool vfio_platform_has_reset(struct vfio_platform_device *vdev)
 {
+   if (vdev->acpihid)
+   return vfio_platform_acpi_has_reset(vdev);
+
return vdev->of_reset ? true : false;
 }
 
 static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
 {
+   if (vdev->acpihid)
+   return;
+
vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
>reset_module);
if (!vdev->of_reset) {
@@ -99,6 +144,9 @@ static void vfio_platform_get_reset(struct 
vfio_platform_device *vdev)
 
 static void vfio_platform_put_reset(struct vfio_platform_device *vdev)
 {
+   if (vdev->acpihid)
+   return;
+
if (vdev->of_reset)
module_put(vdev->reset_module);
 }
@@ -177,6 +225,9 @@ static int vfio_platform_call_reset(struct 
vfio_platform_device *vdev,
if (vdev->of_reset) {
dev_info(vdev->device, "reset\n");
return vdev->of_reset(vdev);
+   } else if (vdev->acpihid) {
+   dev_info(vdev->device, "reset\n");
+   return vfio_platform_acpi_call_reset(vdev, extra_dbg);
}
 
dev_warn(vdev->device, "no reset function found!\n");
-- 
1.8.2.1



[PATCH V7 8/9] vfio: platform: check reset call return code during open

2016-06-12 Thread Sinan Kaya
Open call is ignoring the return code from reset call and can
potentially continue even though reset call failed.

If reset_required module parameter is set, this patch is going
to validate the return code and will abort open if reset fails.

Signed-off-by: Sinan Kaya 
---
 drivers/vfio/platform/vfio_platform_common.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index d84c399..d3141e7 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -264,6 +264,8 @@ static int vfio_platform_open(void *device_data)
mutex_lock(_lock);
 
if (!vdev->refcnt) {
+   const char *extra_dbg = NULL;
+
ret = vfio_platform_regions_init(vdev);
if (ret)
goto err_reg;
@@ -272,7 +274,12 @@ static int vfio_platform_open(void *device_data)
if (ret)
goto err_irq;
 
-   vfio_platform_call_reset(vdev, NULL);
+   ret = vfio_platform_call_reset(vdev, _dbg);
+   if (ret && vdev->reset_required) {
+   dev_warn(vdev->device, "reset driver is required and 
reset call failed in open (%d) %s\n",
+ret, extra_dbg ? extra_dbg : "");
+   goto err_rst;
+   }
}
 
vdev->refcnt++;
@@ -280,6 +287,8 @@ static int vfio_platform_open(void *device_data)
mutex_unlock(_lock);
return 0;
 
+err_rst:
+   vfio_platform_irq_cleanup(vdev);
 err_irq:
vfio_platform_regions_cleanup(vdev);
 err_reg:
-- 
1.8.2.1



[PATCH V7 7/9] vfio, platform: make reset driver a requirement by default

2016-06-12 Thread Sinan Kaya
The code was allowing platform devices to be used without a supporting
VFIO reset driver. The hardware can be left in some inconsistent state
after a guest machine abort.

The reset driver will put the hardware back to safe state and disable
interrupts before returning the control back to the host machine.

Adding a new reset_required kernel module option to AMBA and platform
VFIO drivers with a default value of true.

New requirements are:
1. A reset function needs to be implemented by the corresponding driver
via DT/ACPI.
2. The reset function needs to be discovered via DT/ACPI.

The probe of the driver will fail if any of the above conditions are
not satisfied.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_amba.c |  5 +
 drivers/vfio/platform/vfio_platform.c |  5 +
 drivers/vfio/platform/vfio_platform_common.c  | 22 +++---
 drivers/vfio/platform/vfio_platform_private.h |  1 +
 4 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/drivers/vfio/platform/vfio_amba.c 
b/drivers/vfio/platform/vfio_amba.c
index a66479b..7585902 100644
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -23,6 +23,10 @@
 #define DRIVER_AUTHOR   "Antonios Motakis "
 #define DRIVER_DESC "VFIO for AMBA devices - User Level meta-driver"
 
+static bool reset_required = true;
+module_param(reset_required, bool, 0644);
+MODULE_PARM_DESC(reset_required, "override reset requirement (default: 1)");
+
 /* probing devices from the AMBA bus */
 
 static struct resource *get_amba_resource(struct vfio_platform_device *vdev,
@@ -68,6 +72,7 @@ static int vfio_amba_probe(struct amba_device *adev, const 
struct amba_id *id)
vdev->get_resource = get_amba_resource;
vdev->get_irq = get_amba_irq;
vdev->parent_module = THIS_MODULE;
+   vdev->reset_required = reset_required;
 
ret = vfio_platform_probe_common(vdev, >dev);
if (ret) {
diff --git a/drivers/vfio/platform/vfio_platform.c 
b/drivers/vfio/platform/vfio_platform.c
index b1cc3a7..ef89146 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -23,6 +23,10 @@
 #define DRIVER_AUTHOR   "Antonios Motakis "
 #define DRIVER_DESC "VFIO for platform devices - User Level meta-driver"
 
+static bool reset_required = true;
+module_param(reset_required, bool, 0644);
+MODULE_PARM_DESC(reset_required, "override reset requirement (default: 1)");
+
 /* probing devices from the linux platform bus */
 
 static struct resource *get_platform_resource(struct vfio_platform_device 
*vdev,
@@ -66,6 +70,7 @@ static int vfio_platform_probe(struct platform_device *pdev)
vdev->get_resource = get_platform_resource;
vdev->get_irq = get_platform_irq;
vdev->parent_module = THIS_MODULE;
+   vdev->reset_required = reset_required;
 
ret = vfio_platform_probe_common(vdev, >dev);
if (ret)
diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index 0ea8c26..d84c399 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -128,10 +128,10 @@ static bool vfio_platform_has_reset(struct 
vfio_platform_device *vdev)
return vdev->of_reset ? true : false;
 }
 
-static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
+static int vfio_platform_get_reset(struct vfio_platform_device *vdev)
 {
if (vdev->acpihid)
-   return;
+   return vfio_platform_acpi_has_reset(vdev) ? 0 : -ENOENT;
 
vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
>reset_module);
@@ -140,6 +140,8 @@ static void vfio_platform_get_reset(struct 
vfio_platform_device *vdev)
vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
>reset_module);
}
+
+   return vdev->of_reset ? 0 : -ENOENT;
 }
 
 static void vfio_platform_put_reset(struct vfio_platform_device *vdev)
@@ -670,16 +672,22 @@ int vfio_platform_probe_common(struct 
vfio_platform_device *vdev,
}
 
ret = vfio_add_group_dev(dev, _platform_ops, vdev);
-   if (ret) {
-   iommu_group_put(group);
-   return ret;
-   }
+   if (ret)
+   goto out;
 
-   vfio_platform_get_reset(vdev);
+   ret = vfio_platform_get_reset(vdev);
+   if (ret && vdev->reset_required) {
+   pr_err("vfio: no reset function found for device %s\n",
+  vdev->name);
+   goto out;
+   }
 
mutex_init(>igate);
 
return 0;
+out:
+   iommu_group_put(group);
+   return ret;
 }
 EXPORT_SYMBOL_GPL(vfio_platform_probe_common);
 
diff --git a/drivers/vfio/platform/vfio_platform_private.h 
b/drivers/vfio/platform/vfio_platform_private.h
index 

[PATCH V7 6/9] vfio: platform: call _RST method when using ACPI

2016-06-12 Thread Sinan Kaya
The device tree code checks for the presence of a reset driver and calls
the of_reset function pointer by looking up the reset driver as a module.

ACPI defines _RST method to perform device level reset. After the _RST
method is executed, the OS can resume using the device. _RST method is
expected to stop DMA transfers and IRQs.

This patch introduces two functions as vfio_platform_acpi_has_reset and
vfio_platform_acpi_call_reset. The has reset method is used to declare
reset capability via the ioctl flag VFIO_DEVICE_FLAGS_RESET. The call
reset function is used to execute the _RST ACPI method.

Signed-off-by: Sinan Kaya 
Reviewed-by: Eric Auger 
---
 drivers/vfio/platform/vfio_platform_common.c | 51 
 1 file changed, 51 insertions(+)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index e7ce2c2..0ea8c26 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -73,21 +73,66 @@ static int vfio_platform_acpi_probe(struct 
vfio_platform_device *vdev,
}
return 0;
 }
+
+static int vfio_platform_acpi_call_reset(struct vfio_platform_device *vdev,
+const char **extra_dbg)
+{
+   struct device *dev = vdev->device;
+   acpi_handle handle = ACPI_HANDLE(dev);
+   acpi_status acpi_ret;
+   unsigned long long val;
+
+   acpi_ret = acpi_evaluate_integer(handle, "_RST", NULL, );
+   if (ACPI_FAILURE(acpi_ret)) {
+   if (extra_dbg)
+   *extra_dbg = acpi_format_exception(acpi_ret);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static bool vfio_platform_acpi_has_reset(struct vfio_platform_device *vdev)
+{
+   struct device *dev = vdev->device;
+   acpi_handle handle = ACPI_HANDLE(dev);
+
+   return acpi_has_method(handle, "_RST");
+}
 #else
 static inline int vfio_platform_acpi_probe(struct vfio_platform_device *vdev,
   struct device *dev)
 {
return -ENOENT;
 }
+
+static inline
+int vfio_platform_acpi_call_reset(struct vfio_platform_device *vdev,
+ const char **extra_dbg)
+{
+   return -ENOENT;
+}
+
+static inline
+bool vfio_platform_acpi_has_reset(struct vfio_platform_device *vdev)
+{
+   return false;
+}
 #endif
 
 static bool vfio_platform_has_reset(struct vfio_platform_device *vdev)
 {
+   if (vdev->acpihid)
+   return vfio_platform_acpi_has_reset(vdev);
+
return vdev->of_reset ? true : false;
 }
 
 static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
 {
+   if (vdev->acpihid)
+   return;
+
vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
>reset_module);
if (!vdev->of_reset) {
@@ -99,6 +144,9 @@ static void vfio_platform_get_reset(struct 
vfio_platform_device *vdev)
 
 static void vfio_platform_put_reset(struct vfio_platform_device *vdev)
 {
+   if (vdev->acpihid)
+   return;
+
if (vdev->of_reset)
module_put(vdev->reset_module);
 }
@@ -177,6 +225,9 @@ static int vfio_platform_call_reset(struct 
vfio_platform_device *vdev,
if (vdev->of_reset) {
dev_info(vdev->device, "reset\n");
return vdev->of_reset(vdev);
+   } else if (vdev->acpihid) {
+   dev_info(vdev->device, "reset\n");
+   return vfio_platform_acpi_call_reset(vdev, extra_dbg);
}
 
dev_warn(vdev->device, "no reset function found!\n");
-- 
1.8.2.1



[PATCH V7 8/9] vfio: platform: check reset call return code during open

2016-06-12 Thread Sinan Kaya
Open call is ignoring the return code from reset call and can
potentially continue even though reset call failed.

If reset_required module parameter is set, this patch is going
to validate the return code and will abort open if reset fails.

Signed-off-by: Sinan Kaya 
---
 drivers/vfio/platform/vfio_platform_common.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index d84c399..d3141e7 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -264,6 +264,8 @@ static int vfio_platform_open(void *device_data)
mutex_lock(_lock);
 
if (!vdev->refcnt) {
+   const char *extra_dbg = NULL;
+
ret = vfio_platform_regions_init(vdev);
if (ret)
goto err_reg;
@@ -272,7 +274,12 @@ static int vfio_platform_open(void *device_data)
if (ret)
goto err_irq;
 
-   vfio_platform_call_reset(vdev, NULL);
+   ret = vfio_platform_call_reset(vdev, _dbg);
+   if (ret && vdev->reset_required) {
+   dev_warn(vdev->device, "reset driver is required and 
reset call failed in open (%d) %s\n",
+ret, extra_dbg ? extra_dbg : "");
+   goto err_rst;
+   }
}
 
vdev->refcnt++;
@@ -280,6 +287,8 @@ static int vfio_platform_open(void *device_data)
mutex_unlock(_lock);
return 0;
 
+err_rst:
+   vfio_platform_irq_cleanup(vdev);
 err_irq:
vfio_platform_regions_cleanup(vdev);
 err_reg:
-- 
1.8.2.1



Re: [PATCH v13 08/10] arm64: Add trampoline code for kretprobes

2016-06-12 Thread David Long

On 06/07/2016 06:38 AM, Masami Hiramatsu wrote:

On Thu,  2 Jun 2016 23:26:22 -0400
David Long  wrote:


From: William Cohen 

The trampoline code is used by kretprobes to capture a return from a probed
function.  This is done by saving the registers, calling the handler, and
restoring the registers. The code then returns to the original saved caller
return address. It is necessary to do this directly instead of using a
software breakpoint because the code used in processing that breakpoint
could itself be kprobe'd and cause a problematic reentry into the debug
exception handler.


OK, I think we had discussed why this was not included to the next patch.
(Not like to merge patches from different person?)


Yes, and adding the trampoline support before making use of it seemed OK 
to me even if it wasn't strictly necessary.




Acked-by: Masami Hiramatsu 

Thanks,



Signed-off-by: William Cohen 
Signed-off-by: David A. Long 
---
  arch/arm64/include/asm/kprobes.h   |  2 +
  arch/arm64/kernel/Makefile |  1 +
  arch/arm64/kernel/asm-offsets.c| 11 +
  arch/arm64/kernel/kprobes.c|  5 ++
  arch/arm64/kernel/kprobes_trampoline.S | 85 ++
  5 files changed, 104 insertions(+)
  create mode 100644 arch/arm64/kernel/kprobes_trampoline.S

diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index 79c9511..61b4915 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -56,5 +56,7 @@ int kprobe_exceptions_notify(struct notifier_block *self,
 unsigned long val, void *data);
  int kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr);
  int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr);
+void kretprobe_trampoline(void);
+void __kprobes *trampoline_probe_handler(struct pt_regs *regs);

  #endif /* _ARM_KPROBES_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 46724a1..75751b7 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -38,6 +38,7 @@ arm64-obj-$(CONFIG_CPU_IDLE)  += cpuidle.o
  arm64-obj-$(CONFIG_JUMP_LABEL)+= jump_label.o
  arm64-obj-$(CONFIG_KGDB)  += kgdb.o
  arm64-obj-$(CONFIG_KPROBES)   += kprobes.o kprobes-arm64.o
\
+  kprobes_trampoline.o 
\
   probes-simulate-insn.o
  arm64-obj-$(CONFIG_EFI)   += efi.o efi-entry.stub.o
  arm64-obj-$(CONFIG_PCI)   += pci.o
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f8e5d47..03dfa27 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -51,6 +51,17 @@ int main(void)
DEFINE(S_X5,offsetof(struct pt_regs, regs[5]));
DEFINE(S_X6,offsetof(struct pt_regs, regs[6]));
DEFINE(S_X7,offsetof(struct pt_regs, regs[7]));
+  DEFINE(S_X8, offsetof(struct pt_regs, regs[8]));
+  DEFINE(S_X10,offsetof(struct pt_regs, regs[10]));
+  DEFINE(S_X12,offsetof(struct pt_regs, regs[12]));
+  DEFINE(S_X14,offsetof(struct pt_regs, regs[14]));
+  DEFINE(S_X16,offsetof(struct pt_regs, regs[16]));
+  DEFINE(S_X18,offsetof(struct pt_regs, regs[18]));
+  DEFINE(S_X20,offsetof(struct pt_regs, regs[20]));
+  DEFINE(S_X22,offsetof(struct pt_regs, regs[22]));
+  DEFINE(S_X24,offsetof(struct pt_regs, regs[24]));
+  DEFINE(S_X26,offsetof(struct pt_regs, regs[26]));
+  DEFINE(S_X28,offsetof(struct pt_regs, regs[28]));
DEFINE(S_LR,offsetof(struct pt_regs, regs[30]));
DEFINE(S_SP,offsetof(struct pt_regs, sp));
  #ifdef CONFIG_COMPAT
diff --git a/arch/arm64/kernel/kprobes.c b/arch/arm64/kernel/kprobes.c
index 9d0ad47..b35f76f 100644
--- a/arch/arm64/kernel/kprobes.c
+++ b/arch/arm64/kernel/kprobes.c
@@ -575,6 +575,11 @@ bool arch_within_kprobe_blacklist(unsigned long addr)
return false;
  }

+void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
+{
+   return NULL;
+}
+
  int __init arch_init_kprobes(void)
  {
return 0;
diff --git a/arch/arm64/kernel/kprobes_trampoline.S 
b/arch/arm64/kernel/kprobes_trampoline.S
new file mode 100644
index 000..ba37d85
--- /dev/null
+++ b/arch/arm64/kernel/kprobes_trampoline.S
@@ -0,0 +1,85 @@
+/*
+ * trampoline entry and return code for kretprobes.
+ */
+
+#include 
+#include 
+#include 
+
+   .text
+
+.macro save_all_base_regs
+   stp x0, x1, 

Re: [PATCH v13 08/10] arm64: Add trampoline code for kretprobes

2016-06-12 Thread David Long

On 06/07/2016 06:38 AM, Masami Hiramatsu wrote:

On Thu,  2 Jun 2016 23:26:22 -0400
David Long  wrote:


From: William Cohen 

The trampoline code is used by kretprobes to capture a return from a probed
function.  This is done by saving the registers, calling the handler, and
restoring the registers. The code then returns to the original saved caller
return address. It is necessary to do this directly instead of using a
software breakpoint because the code used in processing that breakpoint
could itself be kprobe'd and cause a problematic reentry into the debug
exception handler.


OK, I think we had discussed why this was not included to the next patch.
(Not like to merge patches from different person?)


Yes, and adding the trampoline support before making use of it seemed OK 
to me even if it wasn't strictly necessary.




Acked-by: Masami Hiramatsu 

Thanks,



Signed-off-by: William Cohen 
Signed-off-by: David A. Long 
---
  arch/arm64/include/asm/kprobes.h   |  2 +
  arch/arm64/kernel/Makefile |  1 +
  arch/arm64/kernel/asm-offsets.c| 11 +
  arch/arm64/kernel/kprobes.c|  5 ++
  arch/arm64/kernel/kprobes_trampoline.S | 85 ++
  5 files changed, 104 insertions(+)
  create mode 100644 arch/arm64/kernel/kprobes_trampoline.S

diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index 79c9511..61b4915 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -56,5 +56,7 @@ int kprobe_exceptions_notify(struct notifier_block *self,
 unsigned long val, void *data);
  int kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr);
  int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr);
+void kretprobe_trampoline(void);
+void __kprobes *trampoline_probe_handler(struct pt_regs *regs);

  #endif /* _ARM_KPROBES_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 46724a1..75751b7 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -38,6 +38,7 @@ arm64-obj-$(CONFIG_CPU_IDLE)  += cpuidle.o
  arm64-obj-$(CONFIG_JUMP_LABEL)+= jump_label.o
  arm64-obj-$(CONFIG_KGDB)  += kgdb.o
  arm64-obj-$(CONFIG_KPROBES)   += kprobes.o kprobes-arm64.o
\
+  kprobes_trampoline.o 
\
   probes-simulate-insn.o
  arm64-obj-$(CONFIG_EFI)   += efi.o efi-entry.stub.o
  arm64-obj-$(CONFIG_PCI)   += pci.o
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f8e5d47..03dfa27 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -51,6 +51,17 @@ int main(void)
DEFINE(S_X5,offsetof(struct pt_regs, regs[5]));
DEFINE(S_X6,offsetof(struct pt_regs, regs[6]));
DEFINE(S_X7,offsetof(struct pt_regs, regs[7]));
+  DEFINE(S_X8, offsetof(struct pt_regs, regs[8]));
+  DEFINE(S_X10,offsetof(struct pt_regs, regs[10]));
+  DEFINE(S_X12,offsetof(struct pt_regs, regs[12]));
+  DEFINE(S_X14,offsetof(struct pt_regs, regs[14]));
+  DEFINE(S_X16,offsetof(struct pt_regs, regs[16]));
+  DEFINE(S_X18,offsetof(struct pt_regs, regs[18]));
+  DEFINE(S_X20,offsetof(struct pt_regs, regs[20]));
+  DEFINE(S_X22,offsetof(struct pt_regs, regs[22]));
+  DEFINE(S_X24,offsetof(struct pt_regs, regs[24]));
+  DEFINE(S_X26,offsetof(struct pt_regs, regs[26]));
+  DEFINE(S_X28,offsetof(struct pt_regs, regs[28]));
DEFINE(S_LR,offsetof(struct pt_regs, regs[30]));
DEFINE(S_SP,offsetof(struct pt_regs, sp));
  #ifdef CONFIG_COMPAT
diff --git a/arch/arm64/kernel/kprobes.c b/arch/arm64/kernel/kprobes.c
index 9d0ad47..b35f76f 100644
--- a/arch/arm64/kernel/kprobes.c
+++ b/arch/arm64/kernel/kprobes.c
@@ -575,6 +575,11 @@ bool arch_within_kprobe_blacklist(unsigned long addr)
return false;
  }

+void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
+{
+   return NULL;
+}
+
  int __init arch_init_kprobes(void)
  {
return 0;
diff --git a/arch/arm64/kernel/kprobes_trampoline.S 
b/arch/arm64/kernel/kprobes_trampoline.S
new file mode 100644
index 000..ba37d85
--- /dev/null
+++ b/arch/arm64/kernel/kprobes_trampoline.S
@@ -0,0 +1,85 @@
+/*
+ * trampoline entry and return code for kretprobes.
+ */
+
+#include 
+#include 
+#include 
+
+   .text
+
+.macro save_all_base_regs
+   stp x0, x1, [sp, #S_X0]
+   stp x2, x3, [sp, #S_X2]
+   stp x4, x5, [sp, #S_X4]
+   stp x6, x7, [sp, 

Re: [PATCH v13 03/10] arm64: add conditional instruction simulation support

2016-06-12 Thread David Long

On 06/03/2016 11:53 PM, Masami Hiramatsu wrote:

On Thu,  2 Jun 2016 23:26:17 -0400
David Long  wrote:


From: "David A. Long" 

Cease using the arm32 arm_check_condition() function and replace it with
a local version for use in deprecated instruction support on arm64. Also
make the function table used by this available for future use by kprobes
and/or uprobes.

This function is dervied from code written by Sandeepa Prabhu.



Basically looks good to me. I have some comments;


Signed-off-by: Sandeepa Prabhu 
Signed-off-by: David A. Long 
---
  arch/arm64/include/asm/insn.h|  3 ++
  arch/arm64/kernel/Makefile   |  3 +-
  arch/arm64/kernel/armv8_deprecated.c | 19 ++-
  arch/arm64/kernel/insn.c | 98 
  4 files changed, 119 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 9785d10..98e4edd 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -406,6 +406,9 @@ u32 aarch64_extract_system_register(u32 insn);
  u32 aarch32_insn_extract_reg_num(u32 insn, int offset);
  u32 aarch32_insn_mcr_extract_opc2(u32 insn);
  u32 aarch32_insn_mcr_extract_crm(u32 insn);
+
+typedef bool (pstate_check_t)(unsigned long);
+extern pstate_check_t * const opcode_condition_checks[16];


Are those condition checkers only for aarch32 opcode? or
general for aarch64 too? If it is only for aarch32, we'd better
add aarch32 prefix.



I have this vague recollection there once was a reason for this but I 
can't for the life of me remember why. I altered the symbol name to 
something that begins with aarch32.



  #endif /* __ASSEMBLY__ */

  #endif/* __ASM_INSN_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 2173149..4653aca 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -26,8 +26,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)

  arm64-obj-$(CONFIG_COMPAT)+= sys32.o kuser32.o signal32.o 
\
-  sys_compat.o entry32.o   
\
-  ../../arm/kernel/opcodes.o
+  sys_compat.o entry32.o
  arm64-obj-$(CONFIG_FUNCTION_TRACER)   += ftrace.o entry-ftrace.o
  arm64-obj-$(CONFIG_MODULES)   += arm64ksyms.o module.o
  arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
diff --git a/arch/arm64/kernel/armv8_deprecated.c 
b/arch/arm64/kernel/armv8_deprecated.c
index c37202c..88b9165 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -366,6 +366,21 @@ static int emulate_swpX(unsigned int address, unsigned int 
*data,
return res;
  }

+#defineARM_OPCODE_CONDITION_UNCOND 0xf
+
+static unsigned int __kprobes arm32_check_condition(u32 opcode, u32 psr)


Would you be OK for using arm32 instead of aarch32 prefix?


I think you meant the opposite of that?  I guess that would make sense, 
and would be simple enough since it's an internal function.  I will 
change arm32 to aarch32.





+{
+   u32 cc_bits  = opcode >> 28;
+
+   if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) {
+   if ((*opcode_condition_checks[cc_bits])(psr))
+   return ARM_OPCODE_CONDTEST_PASS;
+   else
+   return ARM_OPCODE_CONDTEST_FAIL;
+   }
+   return ARM_OPCODE_CONDTEST_UNCOND;
+}


Thank you,



Thanks,
-dl


Re: [PATCH v13 03/10] arm64: add conditional instruction simulation support

2016-06-12 Thread David Long

On 06/03/2016 11:53 PM, Masami Hiramatsu wrote:

On Thu,  2 Jun 2016 23:26:17 -0400
David Long  wrote:


From: "David A. Long" 

Cease using the arm32 arm_check_condition() function and replace it with
a local version for use in deprecated instruction support on arm64. Also
make the function table used by this available for future use by kprobes
and/or uprobes.

This function is dervied from code written by Sandeepa Prabhu.



Basically looks good to me. I have some comments;


Signed-off-by: Sandeepa Prabhu 
Signed-off-by: David A. Long 
---
  arch/arm64/include/asm/insn.h|  3 ++
  arch/arm64/kernel/Makefile   |  3 +-
  arch/arm64/kernel/armv8_deprecated.c | 19 ++-
  arch/arm64/kernel/insn.c | 98 
  4 files changed, 119 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 9785d10..98e4edd 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -406,6 +406,9 @@ u32 aarch64_extract_system_register(u32 insn);
  u32 aarch32_insn_extract_reg_num(u32 insn, int offset);
  u32 aarch32_insn_mcr_extract_opc2(u32 insn);
  u32 aarch32_insn_mcr_extract_crm(u32 insn);
+
+typedef bool (pstate_check_t)(unsigned long);
+extern pstate_check_t * const opcode_condition_checks[16];


Are those condition checkers only for aarch32 opcode? or
general for aarch64 too? If it is only for aarch32, we'd better
add aarch32 prefix.



I have this vague recollection there once was a reason for this but I 
can't for the life of me remember why. I altered the symbol name to 
something that begins with aarch32.



  #endif /* __ASSEMBLY__ */

  #endif/* __ASM_INSN_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 2173149..4653aca 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -26,8 +26,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)

  arm64-obj-$(CONFIG_COMPAT)+= sys32.o kuser32.o signal32.o 
\
-  sys_compat.o entry32.o   
\
-  ../../arm/kernel/opcodes.o
+  sys_compat.o entry32.o
  arm64-obj-$(CONFIG_FUNCTION_TRACER)   += ftrace.o entry-ftrace.o
  arm64-obj-$(CONFIG_MODULES)   += arm64ksyms.o module.o
  arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
diff --git a/arch/arm64/kernel/armv8_deprecated.c 
b/arch/arm64/kernel/armv8_deprecated.c
index c37202c..88b9165 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -366,6 +366,21 @@ static int emulate_swpX(unsigned int address, unsigned int 
*data,
return res;
  }

+#defineARM_OPCODE_CONDITION_UNCOND 0xf
+
+static unsigned int __kprobes arm32_check_condition(u32 opcode, u32 psr)


Would you be OK for using arm32 instead of aarch32 prefix?


I think you meant the opposite of that?  I guess that would make sense, 
and would be simple enough since it's an internal function.  I will 
change arm32 to aarch32.





+{
+   u32 cc_bits  = opcode >> 28;
+
+   if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) {
+   if ((*opcode_condition_checks[cc_bits])(psr))
+   return ARM_OPCODE_CONDTEST_PASS;
+   else
+   return ARM_OPCODE_CONDTEST_FAIL;
+   }
+   return ARM_OPCODE_CONDTEST_UNCOND;
+}


Thank you,



Thanks,
-dl


Re: [alsa-devel] [PATCH] ASoC: intel: fix build when ACPI is not enabled

2016-06-12 Thread Vinod Koul
On Thu, Jun 09, 2016 at 05:01:38PM -0700, Randy Dunlap wrote:
> From: Randy Dunlap 
> 
> kconfig tools generate the following warning when CONFIG_ACPI is not
> enabled:
> 
> warning: (SND_SOC_INTEL_BYTCR_RT5640_MACH && SND_SOC_INTEL_BYTCR_RT5651_MACH 
> && SND_SOC_INTEL_CHT_BSW_RT5672_MACH && SND_SOC_INTEL_CHT_BSW_RT5645_MACH && 
> SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH) selects SND_SST_IPC_ACPI which has 
> unmet direct dependencies (SOUND && !M68K && !UML && SND && SND_SOC && ACPI)
> 
> causing these build errors:
> 
> In file included from ../sound/soc/intel/atom/sst/sst_acpi.c:40:0:
> ../include/acpi/acpi_bus.h:65:20: error: conflicting types for 
> 'acpi_evaluate_dsm'
>  union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
> In file included from ../sound/soc/intel/atom/sst/sst_acpi.c:31:0:
> ../include/linux/acpi.h:676:34: note: previous definition of 
> 'acpi_evaluate_dsm' was here
>  static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle,
> 
> I am told that ACPI is a requirement for these drivers, so make that
> explicit. Also end help text sentences with a period.
> 
> Signed-off-by: Randy Dunlap 
> Cc:   Jie Yang 
> Cc:   Pierre-Louis Bossart 
> Cc:   alsa-de...@alsa-project.org
> ---
>  sound/soc/intel/Kconfig |   12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> Should SND_SOC_INTEL_SST and possibly other similar drivers get this
> same change?

The SND_SST_IPC_ACPI has depends on ACPI.

Can you send me your config for this.


> 
> --- linux-next-20160607.orig/sound/soc/intel/Kconfig
> +++ linux-next-20160607/sound/soc/intel/Kconfig
> @@ -128,28 +128,28 @@ config SND_SOC_INTEL_BROADWELL_MACH
>  
>  config SND_SOC_INTEL_BYTCR_RT5640_MACH
>  tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with 
> RT5640 codec"
> - depends on X86 && I2C
> + depends on X86 && I2C && ACPI
>   select SND_SOC_RT5640
>   select SND_SST_MFLD_PLATFORM
>   select SND_SST_IPC_ACPI
> - select SND_SOC_INTEL_SST_MATCH if ACPI
> + select SND_SOC_INTEL_SST_MATCH
>   help
>This adds support for ASoC machine driver for Intel(R) Baytrail 
> and Baytrail-CR
>platforms with RT5640 audio codec.
> -  Say Y if you have such a device
> +  Say Y if you have such a device.
>If unsure select "N".
>  
>  config SND_SOC_INTEL_BYTCR_RT5651_MACH
>  tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with 
> RT5651 codec"
> - depends on X86 && I2C
> + depends on X86 && I2C && ACPI
>   select SND_SOC_RT5651
>   select SND_SST_MFLD_PLATFORM
>   select SND_SST_IPC_ACPI
> - select SND_SOC_INTEL_SST_MATCH if ACPI
> + select SND_SOC_INTEL_SST_MATCH
>   help
>This adds support for ASoC machine driver for Intel(R) Baytrail 
> and Baytrail-CR
>platforms with RT5651 audio codec.
> -  Say Y if you have such a device
> +  Say Y if you have such a device.
>If unsure select "N".
>  
>  config SND_SOC_INTEL_CHT_BSW_RT5672_MACH
> ___
> Alsa-devel mailing list
> alsa-de...@alsa-project.org
> http://mailman.alsa-project.org/mailman/listinfo/alsa-devel

-- 
~Vinod


Re: [alsa-devel] [PATCH] ASoC: intel: fix build when ACPI is not enabled

2016-06-12 Thread Vinod Koul
On Thu, Jun 09, 2016 at 05:01:38PM -0700, Randy Dunlap wrote:
> From: Randy Dunlap 
> 
> kconfig tools generate the following warning when CONFIG_ACPI is not
> enabled:
> 
> warning: (SND_SOC_INTEL_BYTCR_RT5640_MACH && SND_SOC_INTEL_BYTCR_RT5651_MACH 
> && SND_SOC_INTEL_CHT_BSW_RT5672_MACH && SND_SOC_INTEL_CHT_BSW_RT5645_MACH && 
> SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH) selects SND_SST_IPC_ACPI which has 
> unmet direct dependencies (SOUND && !M68K && !UML && SND && SND_SOC && ACPI)
> 
> causing these build errors:
> 
> In file included from ../sound/soc/intel/atom/sst/sst_acpi.c:40:0:
> ../include/acpi/acpi_bus.h:65:20: error: conflicting types for 
> 'acpi_evaluate_dsm'
>  union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
> In file included from ../sound/soc/intel/atom/sst/sst_acpi.c:31:0:
> ../include/linux/acpi.h:676:34: note: previous definition of 
> 'acpi_evaluate_dsm' was here
>  static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle,
> 
> I am told that ACPI is a requirement for these drivers, so make that
> explicit. Also end help text sentences with a period.
> 
> Signed-off-by: Randy Dunlap 
> Cc:   Jie Yang 
> Cc:   Pierre-Louis Bossart 
> Cc:   alsa-de...@alsa-project.org
> ---
>  sound/soc/intel/Kconfig |   12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> Should SND_SOC_INTEL_SST and possibly other similar drivers get this
> same change?

The SND_SST_IPC_ACPI has depends on ACPI.

Can you send me your config for this.


> 
> --- linux-next-20160607.orig/sound/soc/intel/Kconfig
> +++ linux-next-20160607/sound/soc/intel/Kconfig
> @@ -128,28 +128,28 @@ config SND_SOC_INTEL_BROADWELL_MACH
>  
>  config SND_SOC_INTEL_BYTCR_RT5640_MACH
>  tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with 
> RT5640 codec"
> - depends on X86 && I2C
> + depends on X86 && I2C && ACPI
>   select SND_SOC_RT5640
>   select SND_SST_MFLD_PLATFORM
>   select SND_SST_IPC_ACPI
> - select SND_SOC_INTEL_SST_MATCH if ACPI
> + select SND_SOC_INTEL_SST_MATCH
>   help
>This adds support for ASoC machine driver for Intel(R) Baytrail 
> and Baytrail-CR
>platforms with RT5640 audio codec.
> -  Say Y if you have such a device
> +  Say Y if you have such a device.
>If unsure select "N".
>  
>  config SND_SOC_INTEL_BYTCR_RT5651_MACH
>  tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with 
> RT5651 codec"
> - depends on X86 && I2C
> + depends on X86 && I2C && ACPI
>   select SND_SOC_RT5651
>   select SND_SST_MFLD_PLATFORM
>   select SND_SST_IPC_ACPI
> - select SND_SOC_INTEL_SST_MATCH if ACPI
> + select SND_SOC_INTEL_SST_MATCH
>   help
>This adds support for ASoC machine driver for Intel(R) Baytrail 
> and Baytrail-CR
>platforms with RT5651 audio codec.
> -  Say Y if you have such a device
> +  Say Y if you have such a device.
>If unsure select "N".
>  
>  config SND_SOC_INTEL_CHT_BSW_RT5672_MACH
> ___
> Alsa-devel mailing list
> alsa-de...@alsa-project.org
> http://mailman.alsa-project.org/mailman/listinfo/alsa-devel

-- 
~Vinod


Re: [PATCH v13 05/10] arm64: Kprobes with single stepping support

2016-06-12 Thread David Long

On 06/07/2016 09:07 PM, Masami Hiramatsu wrote:

On Thu,  2 Jun 2016 23:26:19 -0400
David Long  wrote:


From: Sandeepa Prabhu 

Add support for basic kernel probes(kprobes) and jump probes
(jprobes) for ARM64.

Kprobes utilizes software breakpoint and single step debug
exceptions supported on ARM v8.

A software breakpoint is placed at the probe address to trap the
kernel execution into the kprobe handler.

ARM v8 supports enabling single stepping before the break exception
return (ERET), with next PC in exception return address (ELR_EL1). The
kprobe handler prepares an executable memory slot for out-of-line
execution with a copy of the original instruction being probed, and
enables single stepping. The PC is set to the out-of-line slot address
before the ERET. With this scheme, the instruction is executed with the
exact same register context except for the PC (and DAIF) registers.

Debug mask (PSTATE.D) is enabled only when single stepping a recursive
kprobe, e.g.: during kprobes reenter so that probed instruction can be
single stepped within the kprobe handler -exception- context.
The recursion depth of kprobe is always 2, i.e. upon probe re-entry,
any further re-entry is prevented by not calling handlers and the case
counted as a missed kprobe).

Single stepping from the x-o-l slot has a drawback for PC-relative accesses
like branching and symbolic literals access as the offset from the new PC
(slot address) may not be ensured to fit in the immediate value of
the opcode. Such instructions need simulation, so reject
probing them.

Instructions generating exceptions or cpu mode change are rejected
for probing.

Exclusive load/store instructions are rejected too.  Additionally, the
code is checked to see if it is inside an exclusive load/store sequence
(code from Pratyush).

System instructions are mostly enabled for stepping, except MSR/MRS
accesses to "DAIF" flags in PSTATE, which are not safe for
probing.

Thanks to Steve Capper and Pratyush Anand for several suggested
Changes.


Basically looks good to me.
I have some trivial comments.



Signed-off-by: Sandeepa Prabhu 
Signed-off-by: David A. Long 
Signed-off-by: Pratyush Anand 
---
  arch/arm64/Kconfig  |   1 +
  arch/arm64/include/asm/debug-monitors.h |   5 +
  arch/arm64/include/asm/insn.h   |   4 +-
  arch/arm64/include/asm/kprobes.h|  60 
  arch/arm64/include/asm/probes.h |  44 +++
  arch/arm64/kernel/Makefile  |   1 +
  arch/arm64/kernel/debug-monitors.c  |  18 +-
  arch/arm64/kernel/kprobes-arm64.c   | 144 +
  arch/arm64/kernel/kprobes-arm64.h   |  35 +++
  arch/arm64/kernel/kprobes.c | 526 


Not sure why kprobes.c and kprobes-arm64.c are splitted.




This comes from the model of the arm32 kprobes code where handling of 
the low-level instruction simulation is implemented in separate files 
for 32-bit vs. thumb instructions.  It should make a little more sense 
in the future when additional instruction simulation code will hopefully 
be added for those instructions we cannot currently single-step 
out-of-line.  It also probably *could* be merged into one file.



  arch/arm64/kernel/vmlinux.lds.S |   1 +
  arch/arm64/mm/fault.c   |  27 +-
  12 files changed, 861 insertions(+), 5 deletions(-)
  create mode 100644 arch/arm64/include/asm/kprobes.h
  create mode 100644 arch/arm64/include/asm/probes.h
  create mode 100644 arch/arm64/kernel/kprobes-arm64.c
  create mode 100644 arch/arm64/kernel/kprobes-arm64.h
  create mode 100644 arch/arm64/kernel/kprobes.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0f7a624..5496b75 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -88,6 +88,7 @@ config ARM64
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RCU_TABLE_FREE
select HAVE_SYSCALL_TRACEPOINTS
+   select HAVE_KPROBES
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
diff --git a/arch/arm64/include/asm/debug-monitors.h 
b/arch/arm64/include/asm/debug-monitors.h
index 2fcb9b7..4b6b3f7 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -66,6 +66,11 @@

  #define CACHE_FLUSH_IS_SAFE   1

+/* kprobes BRK opcodes with ESR encoding  */
+#define BRK64_ESR_MASK 0x
+#define BRK64_ESR_KPROBES  0x0004
+#define BRK64_OPCODE_KPROBES   (AARCH64_BREAK_MON | (BRK64_ESR_KPROBES << 5))
+
  /* AArch32 */
  #define DBG_ESR_EVT_BKPT  0x4
  #define DBG_ESR_EVT_VECC  0x5
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 98e4edd..be2d2b9 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -253,6 +253,8 @@ __AARCH64_INSN_FUNCS(ldr_reg,   0x3FE0EC00, 

Re: [PATCH v13 05/10] arm64: Kprobes with single stepping support

2016-06-12 Thread David Long

On 06/07/2016 09:07 PM, Masami Hiramatsu wrote:

On Thu,  2 Jun 2016 23:26:19 -0400
David Long  wrote:


From: Sandeepa Prabhu 

Add support for basic kernel probes(kprobes) and jump probes
(jprobes) for ARM64.

Kprobes utilizes software breakpoint and single step debug
exceptions supported on ARM v8.

A software breakpoint is placed at the probe address to trap the
kernel execution into the kprobe handler.

ARM v8 supports enabling single stepping before the break exception
return (ERET), with next PC in exception return address (ELR_EL1). The
kprobe handler prepares an executable memory slot for out-of-line
execution with a copy of the original instruction being probed, and
enables single stepping. The PC is set to the out-of-line slot address
before the ERET. With this scheme, the instruction is executed with the
exact same register context except for the PC (and DAIF) registers.

Debug mask (PSTATE.D) is enabled only when single stepping a recursive
kprobe, e.g.: during kprobes reenter so that probed instruction can be
single stepped within the kprobe handler -exception- context.
The recursion depth of kprobe is always 2, i.e. upon probe re-entry,
any further re-entry is prevented by not calling handlers and the case
counted as a missed kprobe).

Single stepping from the x-o-l slot has a drawback for PC-relative accesses
like branching and symbolic literals access as the offset from the new PC
(slot address) may not be ensured to fit in the immediate value of
the opcode. Such instructions need simulation, so reject
probing them.

Instructions generating exceptions or cpu mode change are rejected
for probing.

Exclusive load/store instructions are rejected too.  Additionally, the
code is checked to see if it is inside an exclusive load/store sequence
(code from Pratyush).

System instructions are mostly enabled for stepping, except MSR/MRS
accesses to "DAIF" flags in PSTATE, which are not safe for
probing.

Thanks to Steve Capper and Pratyush Anand for several suggested
Changes.


Basically looks good to me.
I have some trivial comments.



Signed-off-by: Sandeepa Prabhu 
Signed-off-by: David A. Long 
Signed-off-by: Pratyush Anand 
---
  arch/arm64/Kconfig  |   1 +
  arch/arm64/include/asm/debug-monitors.h |   5 +
  arch/arm64/include/asm/insn.h   |   4 +-
  arch/arm64/include/asm/kprobes.h|  60 
  arch/arm64/include/asm/probes.h |  44 +++
  arch/arm64/kernel/Makefile  |   1 +
  arch/arm64/kernel/debug-monitors.c  |  18 +-
  arch/arm64/kernel/kprobes-arm64.c   | 144 +
  arch/arm64/kernel/kprobes-arm64.h   |  35 +++
  arch/arm64/kernel/kprobes.c | 526 


Not sure why kprobes.c and kprobes-arm64.c are splitted.




This comes from the model of the arm32 kprobes code where handling of 
the low-level instruction simulation is implemented in separate files 
for 32-bit vs. thumb instructions.  It should make a little more sense 
in the future when additional instruction simulation code will hopefully 
be added for those instructions we cannot currently single-step 
out-of-line.  It also probably *could* be merged into one file.



  arch/arm64/kernel/vmlinux.lds.S |   1 +
  arch/arm64/mm/fault.c   |  27 +-
  12 files changed, 861 insertions(+), 5 deletions(-)
  create mode 100644 arch/arm64/include/asm/kprobes.h
  create mode 100644 arch/arm64/include/asm/probes.h
  create mode 100644 arch/arm64/kernel/kprobes-arm64.c
  create mode 100644 arch/arm64/kernel/kprobes-arm64.h
  create mode 100644 arch/arm64/kernel/kprobes.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0f7a624..5496b75 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -88,6 +88,7 @@ config ARM64
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RCU_TABLE_FREE
select HAVE_SYSCALL_TRACEPOINTS
+   select HAVE_KPROBES
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
diff --git a/arch/arm64/include/asm/debug-monitors.h 
b/arch/arm64/include/asm/debug-monitors.h
index 2fcb9b7..4b6b3f7 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -66,6 +66,11 @@

  #define CACHE_FLUSH_IS_SAFE   1

+/* kprobes BRK opcodes with ESR encoding  */
+#define BRK64_ESR_MASK 0x
+#define BRK64_ESR_KPROBES  0x0004
+#define BRK64_OPCODE_KPROBES   (AARCH64_BREAK_MON | (BRK64_ESR_KPROBES << 5))
+
  /* AArch32 */
  #define DBG_ESR_EVT_BKPT  0x4
  #define DBG_ESR_EVT_VECC  0x5
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 98e4edd..be2d2b9 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -253,6 +253,8 @@ __AARCH64_INSN_FUNCS(ldr_reg,   0x3FE0EC00, 0x38606800)
  __AARCH64_INSN_FUNCS(ldr_lit, 0xBF00, 0x1800)
  __AARCH64_INSN_FUNCS(ldrsw_lit,   0xFF00, 0x9800)

Re: [PATCH] Input: axp20x-pek: use devm_add_action_or_reset

2016-06-12 Thread Chen-Yu Tsai
On Sun, Jun 12, 2016 at 10:12 PM, Sudip Mukherjee
 wrote:
> If devm_add_action() fails we are explicitly calling
> axp20x_remove_sysfs_group() to free the resources allocated. Lets use
> the helper devm_add_action_or_reset() and return directly in case of
> error, as we know that the cleanup function has been already called by
> the helper if there was any error.
>
> Signed-off-by: Sudip Mukherjee 

Acked-by: Chen-Yu Tsai 


Re: [PATCH] Input: axp20x-pek: use devm_add_action_or_reset

2016-06-12 Thread Chen-Yu Tsai
On Sun, Jun 12, 2016 at 10:12 PM, Sudip Mukherjee
 wrote:
> If devm_add_action() fails we are explicitly calling
> axp20x_remove_sysfs_group() to free the resources allocated. Lets use
> the helper devm_add_action_or_reset() and return directly in case of
> error, as we know that the cleanup function has been already called by
> the helper if there was any error.
>
> Signed-off-by: Sudip Mukherjee 

Acked-by: Chen-Yu Tsai 


Re: [RFC v4 01/14] regulator: of: Add helper for getting all supplies

2016-06-12 Thread Peter Chen
On Sun, Jun 12, 2016 at 03:29:01PM +0800, Peter Chen wrote:
> On Fri, Jun 10, 2016 at 12:30:56PM -0500, Rob Herring wrote:
> > On Thu, Jun 09, 2016 at 01:42:02PM +0200, Krzysztof Kozlowski wrote:
> > > On 06/09/2016 12:29 PM, Mark Brown wrote:
> > > > On Thu, Jun 09, 2016 at 11:44:18AM +0200, Krzysztof Kozlowski wrote:
> > > >> Few drivers have a need of getting regulator supplies without knowing
> > > >> their names:
> > > >> 1. The Simple Framebuffer driver works on setup provided by bootloader
> > > >>(outside of scope of kernel);
> > > >> 2. Generic power sequence driver may be attached to any device node.
> > > >>
> > > >> Add a Device Tree helper for parsing "-supply" properties and returning
> > > >> allocated bulk regulator consumers.
> > > > 
> > > > I'm still very concerned that this is just an invitation to people to
> > > > write half baked regulator consumers and half baked DTs to go along with
> > > > it, making it a standard API that doesn't have big red flags on it that
> > > > will flag up when "normal" drivers use it is not good.  Right now this
> > > > just looks like a standard API and people are going to just start using
> > > > it.  If we are going to do this perhaps we need a separate header or
> > > > something to help flag this up.
> > > 
> > > No problem, I can move it to a special header.  Actually, if you dislike
> > > this as an API, it does not have to be in header at all.  I can just
> > > duplicate the simplefb code.
> > > 
> > > > In the case of power sequences I'd expect the sequences to perform
> > > > operations on named supplies - the core shouldn't know what the supplies
> > > > are but the thing specifying the sequence should.
> > > 
> > > Hm, so maybe passing names like:
> > > 
> > > usb3503@08 {
> > >   reset-gpios = < 5 GPIO_ACTIVE_HIGH>;
> > >   initial-mode = <1>;
> > >   vdd-supply = <_reg>;
> > >   foo-supply = <_reg>;
> > > 
> > > power-sequence;
> > >   power-sequence-supplies = "vdd", "foo";
> > 
> > This alone would be fine as it is just one property, but then what's 
> > next? power-sequence-delay, power-sequence-clocks, etc. What if you 
> > need to express ordering relationship of supplies, clocks, gpios? We end 
> > up with a scripting language in DT and we don't want to have that.
> > 
> 
> Can we do things like below:
> 
> - DT describes hardware elements (clock, gpios, etc) for power sequence, and 
> we
> need a node for power sequence.
> - Power sequence framework handles getting hardware elements.

Framework may do few things, since hardware elements are also different
for devices.

> - Power sequence platform driver handles special sequence for devices,
> and we can create some generic drivers for generic devices.
> 

So, my suggestion is do like mmc does (like this patch set does). The
reasons like belows:

- This piece of power sequence code needs to work like device driver, not
library, it is easy to manage resources using device driver.
- The device on the bus has still not been found, so this piece of code
can't be in device driver on each subsystem.
- We need to have a place for these power sequences drivers

Ideally, I hope it can work like regulator class, but it seems hard to
compatible with current mmc-pwrseq DT node.

-- 

Best Regards,
Peter Chen


Re: [RFC v4 01/14] regulator: of: Add helper for getting all supplies

2016-06-12 Thread Peter Chen
On Sun, Jun 12, 2016 at 03:29:01PM +0800, Peter Chen wrote:
> On Fri, Jun 10, 2016 at 12:30:56PM -0500, Rob Herring wrote:
> > On Thu, Jun 09, 2016 at 01:42:02PM +0200, Krzysztof Kozlowski wrote:
> > > On 06/09/2016 12:29 PM, Mark Brown wrote:
> > > > On Thu, Jun 09, 2016 at 11:44:18AM +0200, Krzysztof Kozlowski wrote:
> > > >> Few drivers have a need of getting regulator supplies without knowing
> > > >> their names:
> > > >> 1. The Simple Framebuffer driver works on setup provided by bootloader
> > > >>(outside of scope of kernel);
> > > >> 2. Generic power sequence driver may be attached to any device node.
> > > >>
> > > >> Add a Device Tree helper for parsing "-supply" properties and returning
> > > >> allocated bulk regulator consumers.
> > > > 
> > > > I'm still very concerned that this is just an invitation to people to
> > > > write half baked regulator consumers and half baked DTs to go along with
> > > > it, making it a standard API that doesn't have big red flags on it that
> > > > will flag up when "normal" drivers use it is not good.  Right now this
> > > > just looks like a standard API and people are going to just start using
> > > > it.  If we are going to do this perhaps we need a separate header or
> > > > something to help flag this up.
> > > 
> > > No problem, I can move it to a special header.  Actually, if you dislike
> > > this as an API, it does not have to be in header at all.  I can just
> > > duplicate the simplefb code.
> > > 
> > > > In the case of power sequences I'd expect the sequences to perform
> > > > operations on named supplies - the core shouldn't know what the supplies
> > > > are but the thing specifying the sequence should.
> > > 
> > > Hm, so maybe passing names like:
> > > 
> > > usb3503@08 {
> > >   reset-gpios = < 5 GPIO_ACTIVE_HIGH>;
> > >   initial-mode = <1>;
> > >   vdd-supply = <_reg>;
> > >   foo-supply = <_reg>;
> > > 
> > > power-sequence;
> > >   power-sequence-supplies = "vdd", "foo";
> > 
> > This alone would be fine as it is just one property, but then what's 
> > next? power-sequence-delay, power-sequence-clocks, etc. What if you 
> > need to express ordering relationship of supplies, clocks, gpios? We end 
> > up with a scripting language in DT and we don't want to have that.
> > 
> 
> Can we do things like below:
> 
> - DT describes hardware elements (clock, gpios, etc) for power sequence, and 
> we
> need a node for power sequence.
> - Power sequence framework handles getting hardware elements.

Framework may do few things, since hardware elements are also different
for devices.

> - Power sequence platform driver handles special sequence for devices,
> and we can create some generic drivers for generic devices.
> 

So, my suggestion is do like mmc does (like this patch set does). The
reasons like belows:

- This piece of power sequence code needs to work like device driver, not
library, it is easy to manage resources using device driver.
- The device on the bus has still not been found, so this piece of code
can't be in device driver on each subsystem.
- We need to have a place for these power sequences drivers

Ideally, I hope it can work like regulator class, but it seems hard to
compatible with current mmc-pwrseq DT node.

-- 

Best Regards,
Peter Chen


Re: [PATCH] mm/zsmalloc: add trace events for zs_compact

2016-06-12 Thread Minchan Kim
On Tue, Jun 07, 2016 at 04:56:44PM +0800, Ganesh Mahendran wrote:
> Currently zsmalloc is widely used in android device.
> Sometimes, we want to see how frequently zs_compact is
> triggered or how may pages freed by zs_compact(), or which
> zsmalloc pool is compacted.
> 
> Most of the time, user can get the brief information from
> trace_mm_shrink_slab_[start | end], but in some senario,
> they do not use zsmalloc shrinker, but trigger compaction manually.
> So add some trace events in zs_compact is convenient. Also we
> can add some zsmalloc specific information(pool name, total compact
> pages, etc) in zsmalloc trace.
> 
> This patch add two trace events for zs_compact(), below the trace log:
> -
> root@land:/ # cat /d/tracing/trace
>  kswapd0-125   [007] ...1   174.176979: zsmalloc_compact_start: pool 
> zram0
>  kswapd0-125   [007] ...1   174.181967: zsmalloc_compact_end: pool 
> zram0: 608 pages compacted(total 1794)
>  kswapd0-125   [000] ...1   184.134475: zsmalloc_compact_start: pool 
> zram0
>  kswapd0-125   [000] ...1   184.135010: zsmalloc_compact_end: pool 
> zram0: 62 pages compacted(total 1856)
>  kswapd0-125   [003] ...1   226.927221: zsmalloc_compact_start: pool 
> zram0
>  kswapd0-125   [003] ...1   226.928575: zsmalloc_compact_end: pool 
> zram0: 250 pages compacted(total 2106)
> -
> 
> Signed-off-by: Ganesh Mahendran 
> ---
>  include/trace/events/zsmalloc.h | 56 
> +
>  mm/zsmalloc.c   | 10 
>  2 files changed, 66 insertions(+)
>  create mode 100644 include/trace/events/zsmalloc.h
> 
> diff --git a/include/trace/events/zsmalloc.h b/include/trace/events/zsmalloc.h
> new file mode 100644
> index 000..3b6f14e
> --- /dev/null
> +++ b/include/trace/events/zsmalloc.h
> @@ -0,0 +1,56 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM zsmalloc
> +
> +#if !defined(_TRACE_ZSMALLOC_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_ZSMALLOC_H
> +
> +#include 
> +#include 
> +
> +TRACE_EVENT(zsmalloc_compact_start,

I prefer zs_compact_start.

> +
> + TP_PROTO(const char *pool_name),
> +
> + TP_ARGS(pool_name),
> +
> + TP_STRUCT__entry(
> + __field(const char *, pool_name)
> + ),
> +
> + TP_fast_assign(
> + __entry->pool_name = pool_name;
> + ),
> +
> + TP_printk("pool %s",
> +   __entry->pool_name)
> +);
> +
> +TRACE_EVENT(zsmalloc_compact_end,
> +
> + TP_PROTO(const char *pool_name, unsigned long pages_compacted,
> + unsigned long pages_total_compacted),

Hmm, do we really need pages_total_compacted?

> +
> + TP_ARGS(pool_name, pages_compacted, pages_total_compacted),
> +
> + TP_STRUCT__entry(
> + __field(const char *, pool_name)
> + __field(unsigned long, pages_compacted)
> + __field(unsigned long, pages_total_compacted)
> + ),
> +
> + TP_fast_assign(
> + __entry->pool_name = pool_name;
> + __entry->pages_compacted = pages_compacted;
> + __entry->pages_total_compacted = pages_total_compacted;
> + ),
> +
> + TP_printk("pool %s: %ld pages compacted(total %ld)",
> +   __entry->pool_name,
> +   __entry->pages_compacted,
> +   __entry->pages_total_compacted)
> +);
> +
> +#endif /* _TRACE_ZSMALLOC_H */
> +
> +/* This part must be outside protection */
> +#include 
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 213d0e1..441b9f7 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -30,6 +30,8 @@
>  
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>  
> +#define CREATE_TRACE_POINTS
> +
>  #include 
>  #include 
>  #include 
> @@ -52,6 +54,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define ZSPAGE_MAGIC 0x58
>  
> @@ -2330,6 +2333,9 @@ unsigned long zs_compact(struct zs_pool *pool)
>  {
>   int i;
>   struct size_class *class;
> + unsigned long pages_compacted_before = pool->stats.pages_compacted;
> +
> + trace_zsmalloc_compact_start(pool->name);

How about moving it into __zs_compact with size_class information?
It would be more useful, I think.

>  
>   for (i = zs_size_classes - 1; i >= 0; i--) {
>   class = pool->size_class[i];
> @@ -2340,6 +2346,10 @@ unsigned long zs_compact(struct zs_pool *pool)
>   __zs_compact(pool, class);
>   }
>  
> + trace_zsmalloc_compact_end(pool->name,
> + pool->stats.pages_compacted - pages_compacted_before,
> + pool->stats.pages_compacted);
> +
>   return pool->stats.pages_compacted;
>  }
>  EXPORT_SYMBOL_GPL(zs_compact);
> -- 
> 1.9.1
> 


Re: [PATCH] mm/zsmalloc: add trace events for zs_compact

2016-06-12 Thread Minchan Kim
On Tue, Jun 07, 2016 at 04:56:44PM +0800, Ganesh Mahendran wrote:
> Currently zsmalloc is widely used in android device.
> Sometimes, we want to see how frequently zs_compact is
> triggered or how may pages freed by zs_compact(), or which
> zsmalloc pool is compacted.
> 
> Most of the time, user can get the brief information from
> trace_mm_shrink_slab_[start | end], but in some senario,
> they do not use zsmalloc shrinker, but trigger compaction manually.
> So add some trace events in zs_compact is convenient. Also we
> can add some zsmalloc specific information(pool name, total compact
> pages, etc) in zsmalloc trace.
> 
> This patch add two trace events for zs_compact(), below the trace log:
> -
> root@land:/ # cat /d/tracing/trace
>  kswapd0-125   [007] ...1   174.176979: zsmalloc_compact_start: pool 
> zram0
>  kswapd0-125   [007] ...1   174.181967: zsmalloc_compact_end: pool 
> zram0: 608 pages compacted(total 1794)
>  kswapd0-125   [000] ...1   184.134475: zsmalloc_compact_start: pool 
> zram0
>  kswapd0-125   [000] ...1   184.135010: zsmalloc_compact_end: pool 
> zram0: 62 pages compacted(total 1856)
>  kswapd0-125   [003] ...1   226.927221: zsmalloc_compact_start: pool 
> zram0
>  kswapd0-125   [003] ...1   226.928575: zsmalloc_compact_end: pool 
> zram0: 250 pages compacted(total 2106)
> -
> 
> Signed-off-by: Ganesh Mahendran 
> ---
>  include/trace/events/zsmalloc.h | 56 
> +
>  mm/zsmalloc.c   | 10 
>  2 files changed, 66 insertions(+)
>  create mode 100644 include/trace/events/zsmalloc.h
> 
> diff --git a/include/trace/events/zsmalloc.h b/include/trace/events/zsmalloc.h
> new file mode 100644
> index 000..3b6f14e
> --- /dev/null
> +++ b/include/trace/events/zsmalloc.h
> @@ -0,0 +1,56 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM zsmalloc
> +
> +#if !defined(_TRACE_ZSMALLOC_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_ZSMALLOC_H
> +
> +#include 
> +#include 
> +
> +TRACE_EVENT(zsmalloc_compact_start,

I prefer zs_compact_start.

> +
> + TP_PROTO(const char *pool_name),
> +
> + TP_ARGS(pool_name),
> +
> + TP_STRUCT__entry(
> + __field(const char *, pool_name)
> + ),
> +
> + TP_fast_assign(
> + __entry->pool_name = pool_name;
> + ),
> +
> + TP_printk("pool %s",
> +   __entry->pool_name)
> +);
> +
> +TRACE_EVENT(zsmalloc_compact_end,
> +
> + TP_PROTO(const char *pool_name, unsigned long pages_compacted,
> + unsigned long pages_total_compacted),

Hmm, do we really need pages_total_compacted?

> +
> + TP_ARGS(pool_name, pages_compacted, pages_total_compacted),
> +
> + TP_STRUCT__entry(
> + __field(const char *, pool_name)
> + __field(unsigned long, pages_compacted)
> + __field(unsigned long, pages_total_compacted)
> + ),
> +
> + TP_fast_assign(
> + __entry->pool_name = pool_name;
> + __entry->pages_compacted = pages_compacted;
> + __entry->pages_total_compacted = pages_total_compacted;
> + ),
> +
> + TP_printk("pool %s: %ld pages compacted(total %ld)",
> +   __entry->pool_name,
> +   __entry->pages_compacted,
> +   __entry->pages_total_compacted)
> +);
> +
> +#endif /* _TRACE_ZSMALLOC_H */
> +
> +/* This part must be outside protection */
> +#include 
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 213d0e1..441b9f7 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -30,6 +30,8 @@
>  
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>  
> +#define CREATE_TRACE_POINTS
> +
>  #include 
>  #include 
>  #include 
> @@ -52,6 +54,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define ZSPAGE_MAGIC 0x58
>  
> @@ -2330,6 +2333,9 @@ unsigned long zs_compact(struct zs_pool *pool)
>  {
>   int i;
>   struct size_class *class;
> + unsigned long pages_compacted_before = pool->stats.pages_compacted;
> +
> + trace_zsmalloc_compact_start(pool->name);

How about moving it into __zs_compact with size_class information?
It would be more useful, I think.

>  
>   for (i = zs_size_classes - 1; i >= 0; i--) {
>   class = pool->size_class[i];
> @@ -2340,6 +2346,10 @@ unsigned long zs_compact(struct zs_pool *pool)
>   __zs_compact(pool, class);
>   }
>  
> + trace_zsmalloc_compact_end(pool->name,
> + pool->stats.pages_compacted - pages_compacted_before,
> + pool->stats.pages_compacted);
> +
>   return pool->stats.pages_compacted;
>  }
>  EXPORT_SYMBOL_GPL(zs_compact);
> -- 
> 1.9.1
> 


Re: [PATCH V6 5/8] vfio: platform: call _RST method when using ACPI

2016-06-12 Thread Sinan Kaya
On 6/8/2016 6:31 PM, Rafael J. Wysocki wrote:
> On Sun, May 29, 2016 at 12:01 AM, Sinan Kaya  wrote:
>> The device tree code checks for the presence of a reset driver and calls
>> the of_reset function pointer by looking up the reset driver as a module.
>>
>> ACPI defines _RST method to perform device level reset. After the _RST
>> method is executed, the OS can resume using the device. _RST method is
>> expected to stop DMA transfers and IRQs.
>>
>> This patch checks the presence of _RST method and calls the _RST
>> method when reset is requested.
> 

A little bit of misinformation here. The current code is checking the presence
during probe time. If the presence of _RST method is required then probe is
aborted. Otherwise, probe will complete execution.

When reset call is to be executed, presence of _RST method is no longer checked.
Instead, the method is directly called. 

I was talking about the contribution of this patch as both here. I'll clarify
the commit message. 

> You could check if _RST is present at probe time and store the ACPI
> handle of it instead of the HID pointer.
> 
> This way you wouldn't need to repeat that check every time reset is used.
> 

Based on the requirement that the code can be executed without the presence
of _RST method for development purposes, I'm hesitant to use the handle of the
reset method as a gating factor.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project


Re: [PATCH V6 5/8] vfio: platform: call _RST method when using ACPI

2016-06-12 Thread Sinan Kaya
On 6/8/2016 6:31 PM, Rafael J. Wysocki wrote:
> On Sun, May 29, 2016 at 12:01 AM, Sinan Kaya  wrote:
>> The device tree code checks for the presence of a reset driver and calls
>> the of_reset function pointer by looking up the reset driver as a module.
>>
>> ACPI defines _RST method to perform device level reset. After the _RST
>> method is executed, the OS can resume using the device. _RST method is
>> expected to stop DMA transfers and IRQs.
>>
>> This patch checks the presence of _RST method and calls the _RST
>> method when reset is requested.
> 

A little bit of misinformation here. The current code is checking the presence
during probe time. If the presence of _RST method is required then probe is
aborted. Otherwise, probe will complete execution.

When reset call is to be executed, presence of _RST method is no longer checked.
Instead, the method is directly called. 

I was talking about the contribution of this patch as both here. I'll clarify
the commit message. 

> You could check if _RST is present at probe time and store the ACPI
> handle of it instead of the HID pointer.
> 
> This way you wouldn't need to repeat that check every time reset is used.
> 

Based on the requirement that the code can be executed without the presence
of _RST method for development purposes, I'm hesitant to use the handle of the
reset method as a gating factor.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project


Re: [PATCH v5 3/3] sched/cputime: Add steal time support to full dynticks CPU time accounting

2016-06-12 Thread Wanpeng Li
2016-06-08 18:14 GMT+08:00 Paolo Bonzini :
>
>
> On 08/06/2016 05:05, Wanpeng Li wrote:
>> From: Wanpeng Li 
>>
>> This patch adds guest steal-time support to full dynticks CPU
>> time accounting. After the following commit:
>>
>> ff9a9b4c4334 ("sched, time: Switch VIRT_CPU_ACCOUNTING_GEN to jiffy 
>> granularity")
>>
>> ... time sampling became jiffy based, even if it's still listened
>> to ring boundaries, so steal_account_process_tick() is reused
>> to account how many 'ticks' are stolen-time, after the last accumulation.
>>
>> Suggested-by: Rik van Riel 
>> Cc: Ingo Molnar 
>> Cc: Peter Zijlstra (Intel) 
>> Cc: Rik van Riel 
>> Cc: Thomas Gleixner 
>> Cc: Frederic Weisbecker 
>> Cc: Paolo Bonzini 
>> Cc: Radim Krčmář 
>> Signed-off-by: Wanpeng Li 
>> ---
>> v4 -> v5:
>>  * apply same logic to account_idle_time, so change get_vtime_delta instead
>> v3 -> v4:
>>  * fix grammar errors, thanks Ingo
>>  * cleanup fragile codes, thanks Ingo
>> v2 -> v3:
>>  * convert steal time jiffies to cputime
>> v1 -> v2:
>>  * fix divide zero bug, thanks Rik
>>
>>  kernel/sched/cputime.c | 13 +
>>  1 file changed, 9 insertions(+), 4 deletions(-)
>>
>> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
>> index 75f98c5..b62f9f8 100644
>> --- a/kernel/sched/cputime.c
>> +++ b/kernel/sched/cputime.c
>> @@ -257,7 +257,7 @@ void account_idle_time(cputime_t cputime)
>>   cpustat[CPUTIME_IDLE] += (__force u64) cputime;
>>  }
>>
>> -static __always_inline bool steal_account_process_tick(void)
>> +static __always_inline unsigned long steal_account_process_tick(void)
>>  {
>>  #ifdef CONFIG_PARAVIRT
>>   if (static_key_false(_steal_enabled)) {
>> @@ -279,7 +279,7 @@ static __always_inline bool 
>> steal_account_process_tick(void)
>>   return steal_jiffies;
>>   }
>>  #endif
>> - return false;
>> + return 0;
>>  }
>>
>>  /*
>> @@ -681,12 +681,17 @@ static cputime_t vtime_delta(struct task_struct *tsk)
>>  static cputime_t get_vtime_delta(struct task_struct *tsk)
>>  {
>>   unsigned long now = READ_ONCE(jiffies);
>> - unsigned long delta = now - tsk->vtime_snap;
>> + cputime_t delta_time, steal_time;
>>
>> + steal_time = jiffies_to_cputime(steal_account_process_tick());
>> + delta_time = jiffies_to_cputime(now - tsk->vtime_snap);
>>   WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
>>   tsk->vtime_snap = now;
>>
>> - return jiffies_to_cputime(delta);
>> + if (steal_time < delta_time)
>> + delta_time -= steal_time;
>> +
>> + return delta_time;
>
> I think this is wrong.  If you get more steal time than delta time
> (which as Rik noticed can happen due to partial jiffies), you will end
> up accounting things twice, once in steal_account_process_tick and once
> here.  In other words you'll get the exact bug you're trying to fix.
>
> The right thing is to add a max_jiffies argument to
> steal_account_process_tick.  steal_account_process_tick will not attempt
> to remove more than max_jiffies.  Here you pass delta_jiffies (i.e. now
> - tsk->vtime_snap) to steal_account_process_tick, existing callers can
> pass ULONG_MAX.  You can then
>
> return jiffies_to_cputime(delta_jiffies - steal_jiffies);
>
> in get_vtime_delta and not worry about underflow.

Do you mean something like below, actually I see delta_jiffies <
steal_jiffies sometimes.

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 75f98c5..a7606a9 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -257,7 +257,7 @@ void account_idle_time(cputime_t cputime)
  cpustat[CPUTIME_IDLE] += (__force u64) cputime;
 }

-static __always_inline bool steal_account_process_tick(void)
+static __always_inline unsigned long
steal_account_process_tick(unsigned long max_jiffies)
 {
 #ifdef CONFIG_PARAVIRT
  if (static_key_false(_steal_enabled)) {
@@ -272,14 +272,14 @@ static __always_inline bool
steal_account_process_tick(void)
  * time in jiffies. Lets cast the result to jiffies
  * granularity and account the rest on the next rounds.
  */
- steal_jiffies = nsecs_to_jiffies(steal);
+ steal_jiffies = min(nsecs_to_jiffies(steal), max_jiffies);
  this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);

  account_steal_time(jiffies_to_cputime(steal_jiffies));
  return steal_jiffies;
  }
 #endif
- return false;
+ return 0;
 }

 /*
@@ -346,7 +346,7 @@ static void irqtime_account_process_tick(struct
task_struct *p, int user_tick,
  u64 cputime = (__force u64) cputime_one_jiffy;
  u64 *cpustat = kcpustat_this_cpu->cpustat;

- if (steal_account_process_tick())
+ if (steal_account_process_tick(ULONG_MAX))
  return;

  cputime *= ticks;
@@ -477,7 +477,7 @@ void account_process_tick(struct task_struct *p,
int user_tick)
  

Re: [PATCH v5 3/3] sched/cputime: Add steal time support to full dynticks CPU time accounting

2016-06-12 Thread Wanpeng Li
2016-06-08 18:14 GMT+08:00 Paolo Bonzini :
>
>
> On 08/06/2016 05:05, Wanpeng Li wrote:
>> From: Wanpeng Li 
>>
>> This patch adds guest steal-time support to full dynticks CPU
>> time accounting. After the following commit:
>>
>> ff9a9b4c4334 ("sched, time: Switch VIRT_CPU_ACCOUNTING_GEN to jiffy 
>> granularity")
>>
>> ... time sampling became jiffy based, even if it's still listened
>> to ring boundaries, so steal_account_process_tick() is reused
>> to account how many 'ticks' are stolen-time, after the last accumulation.
>>
>> Suggested-by: Rik van Riel 
>> Cc: Ingo Molnar 
>> Cc: Peter Zijlstra (Intel) 
>> Cc: Rik van Riel 
>> Cc: Thomas Gleixner 
>> Cc: Frederic Weisbecker 
>> Cc: Paolo Bonzini 
>> Cc: Radim Krčmář 
>> Signed-off-by: Wanpeng Li 
>> ---
>> v4 -> v5:
>>  * apply same logic to account_idle_time, so change get_vtime_delta instead
>> v3 -> v4:
>>  * fix grammar errors, thanks Ingo
>>  * cleanup fragile codes, thanks Ingo
>> v2 -> v3:
>>  * convert steal time jiffies to cputime
>> v1 -> v2:
>>  * fix divide zero bug, thanks Rik
>>
>>  kernel/sched/cputime.c | 13 +
>>  1 file changed, 9 insertions(+), 4 deletions(-)
>>
>> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
>> index 75f98c5..b62f9f8 100644
>> --- a/kernel/sched/cputime.c
>> +++ b/kernel/sched/cputime.c
>> @@ -257,7 +257,7 @@ void account_idle_time(cputime_t cputime)
>>   cpustat[CPUTIME_IDLE] += (__force u64) cputime;
>>  }
>>
>> -static __always_inline bool steal_account_process_tick(void)
>> +static __always_inline unsigned long steal_account_process_tick(void)
>>  {
>>  #ifdef CONFIG_PARAVIRT
>>   if (static_key_false(_steal_enabled)) {
>> @@ -279,7 +279,7 @@ static __always_inline bool 
>> steal_account_process_tick(void)
>>   return steal_jiffies;
>>   }
>>  #endif
>> - return false;
>> + return 0;
>>  }
>>
>>  /*
>> @@ -681,12 +681,17 @@ static cputime_t vtime_delta(struct task_struct *tsk)
>>  static cputime_t get_vtime_delta(struct task_struct *tsk)
>>  {
>>   unsigned long now = READ_ONCE(jiffies);
>> - unsigned long delta = now - tsk->vtime_snap;
>> + cputime_t delta_time, steal_time;
>>
>> + steal_time = jiffies_to_cputime(steal_account_process_tick());
>> + delta_time = jiffies_to_cputime(now - tsk->vtime_snap);
>>   WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
>>   tsk->vtime_snap = now;
>>
>> - return jiffies_to_cputime(delta);
>> + if (steal_time < delta_time)
>> + delta_time -= steal_time;
>> +
>> + return delta_time;
>
> I think this is wrong.  If you get more steal time than delta time
> (which as Rik noticed can happen due to partial jiffies), you will end
> up accounting things twice, once in steal_account_process_tick and once
> here.  In other words you'll get the exact bug you're trying to fix.
>
> The right thing is to add a max_jiffies argument to
> steal_account_process_tick.  steal_account_process_tick will not attempt
> to remove more than max_jiffies.  Here you pass delta_jiffies (i.e. now
> - tsk->vtime_snap) to steal_account_process_tick, existing callers can
> pass ULONG_MAX.  You can then
>
> return jiffies_to_cputime(delta_jiffies - steal_jiffies);
>
> in get_vtime_delta and not worry about underflow.

Do you mean something like below, actually I see delta_jiffies <
steal_jiffies sometimes.

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 75f98c5..a7606a9 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -257,7 +257,7 @@ void account_idle_time(cputime_t cputime)
  cpustat[CPUTIME_IDLE] += (__force u64) cputime;
 }

-static __always_inline bool steal_account_process_tick(void)
+static __always_inline unsigned long
steal_account_process_tick(unsigned long max_jiffies)
 {
 #ifdef CONFIG_PARAVIRT
  if (static_key_false(_steal_enabled)) {
@@ -272,14 +272,14 @@ static __always_inline bool
steal_account_process_tick(void)
  * time in jiffies. Lets cast the result to jiffies
  * granularity and account the rest on the next rounds.
  */
- steal_jiffies = nsecs_to_jiffies(steal);
+ steal_jiffies = min(nsecs_to_jiffies(steal), max_jiffies);
  this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);

  account_steal_time(jiffies_to_cputime(steal_jiffies));
  return steal_jiffies;
  }
 #endif
- return false;
+ return 0;
 }

 /*
@@ -346,7 +346,7 @@ static void irqtime_account_process_tick(struct
task_struct *p, int user_tick,
  u64 cputime = (__force u64) cputime_one_jiffy;
  u64 *cpustat = kcpustat_this_cpu->cpustat;

- if (steal_account_process_tick())
+ if (steal_account_process_tick(ULONG_MAX))
  return;

  cputime *= ticks;
@@ -477,7 +477,7 @@ void account_process_tick(struct task_struct *p,
int user_tick)
  return;
  }

- if (steal_account_process_tick())
+ if (steal_account_process_tick(ULONG_MAX))
  return;

  if (user_tick)
@@ -681,12 +681,14 @@ static cputime_t vtime_delta(struct task_struct *tsk)
 static cputime_t 

Re: [PATCH] zsmalloc: keep first object offset in struct page

2016-06-12 Thread Minchan Kim
Andrew,

Please fold it to zsmalloc: page migration support.

Thanks.

On Mon, Jun 13, 2016 at 12:20:15PM +0900, Minchan Kim wrote:
> In early draft of zspage migration, we couldn't use page._mapcount
> because it was used for storing movable flag so we added runtime
> calculation to get first object offset in a page but it causes rather
> many instruction and even bug.
> 
> Since then, we don't use page._mapcount as page flag any more so now
> there is no problem to use the field to store first object offset.
> 
> Cc: Sergey Senozhatsky 
> Signed-off-by: Minchan Kim 
> ---
>  mm/zsmalloc.c | 44 
>  1 file changed, 16 insertions(+), 28 deletions(-)
> 
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 6a58edc9a015..4b70fcbfb69b 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -512,6 +512,16 @@ static inline struct page *get_first_page(struct zspage 
> *zspage)
>   return first_page;
>  }
>  
> +static inline int get_first_obj_offset(struct page *page)
> +{
> + return page->units;
> +}
> +
> +static inline void set_first_obj_offset(struct page *page, int offset)
> +{
> + page->units = offset;
> +}
> +
>  static inline unsigned int get_freeobj(struct zspage *zspage)
>  {
>   return zspage->freeobj;
> @@ -872,31 +882,6 @@ static struct page *get_next_page(struct page *page)
>   return page->freelist;
>  }
>  
> -/* Get byte offset of first object in the @page */
> -static int get_first_obj_offset(struct size_class *class,
> - struct page *first_page, struct page *page)
> -{
> - int pos;
> - int page_idx = 0;
> - int ofs = 0;
> - struct page *cursor = first_page;
> -
> - if (first_page == page)
> - goto out;
> -
> - while (page != cursor) {
> - page_idx++;
> - cursor = get_next_page(cursor);
> - }
> -
> - pos = class->objs_per_zspage * class->size *
> - page_idx / class->pages_per_zspage;
> -
> - ofs = (pos + class->size) % PAGE_SIZE;
> -out:
> - return ofs;
> -}
> -
>  /**
>   * obj_to_location - get (, ) from encoded object value
>   * @page: page object resides in zspage
> @@ -966,6 +951,7 @@ static void reset_page(struct page *page)
>   clear_bit(PG_private, >flags);
>   clear_bit(PG_private_2, >flags);
>   set_page_private(page, 0);
> + page_mapcount_reset(page);
>   ClearPageHugeObject(page);
>   page->freelist = NULL;
>  }
> @@ -1064,6 +1050,8 @@ static void init_zspage(struct size_class *class, 
> struct zspage *zspage)
>   struct link_free *link;
>   void *vaddr;
>  
> + set_first_obj_offset(page, off);
> +
>   vaddr = kmap_atomic(page);
>   link = (struct link_free *)vaddr + off / sizeof(*link);
>  
> @@ -1762,9 +1750,8 @@ static unsigned long find_alloced_obj(struct size_class 
> *class,
>   int offset = 0;
>   unsigned long handle = 0;
>   void *addr = kmap_atomic(page);
> - struct zspage *zspage = get_zspage(page);
>  
> - offset = get_first_obj_offset(class, get_first_page(zspage), page);
> + offset = get_first_obj_offset(page);
>   offset += class->size * index;
>  
>   while (offset < PAGE_SIZE) {
> @@ -1976,6 +1963,7 @@ static void replace_sub_page(struct size_class *class, 
> struct zspage *zspage,
>   } while ((page = get_next_page(page)) != NULL);
>  
>   create_page_chain(class, zspage, pages);
> + set_first_obj_offset(newpage, get_first_obj_offset(oldpage));
>   if (unlikely(PageHugeObject(oldpage)))
>   newpage->index = oldpage->index;
>   __SetPageMovable(newpage, page_mapping(oldpage));
> @@ -2062,7 +2050,7 @@ int zs_page_migrate(struct address_space *mapping, 
> struct page *newpage,
>   get_zspage_mapping(zspage, _idx, );
>   pool = mapping->private_data;
>   class = pool->size_class[class_idx];
> - offset = get_first_obj_offset(class, get_first_page(zspage), page);
> + offset = get_first_obj_offset(page);
>  
>   spin_lock(>lock);
>   if (!get_zspage_inuse(zspage)) {
> -- 
> 1.9.1
> 


Re: [PATCH] zsmalloc: keep first object offset in struct page

2016-06-12 Thread Minchan Kim
Andrew,

Please fold it to zsmalloc: page migration support.

Thanks.

On Mon, Jun 13, 2016 at 12:20:15PM +0900, Minchan Kim wrote:
> In early draft of zspage migration, we couldn't use page._mapcount
> because it was used for storing movable flag so we added runtime
> calculation to get first object offset in a page but it causes rather
> many instruction and even bug.
> 
> Since then, we don't use page._mapcount as page flag any more so now
> there is no problem to use the field to store first object offset.
> 
> Cc: Sergey Senozhatsky 
> Signed-off-by: Minchan Kim 
> ---
>  mm/zsmalloc.c | 44 
>  1 file changed, 16 insertions(+), 28 deletions(-)
> 
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 6a58edc9a015..4b70fcbfb69b 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -512,6 +512,16 @@ static inline struct page *get_first_page(struct zspage 
> *zspage)
>   return first_page;
>  }
>  
> +static inline int get_first_obj_offset(struct page *page)
> +{
> + return page->units;
> +}
> +
> +static inline void set_first_obj_offset(struct page *page, int offset)
> +{
> + page->units = offset;
> +}
> +
>  static inline unsigned int get_freeobj(struct zspage *zspage)
>  {
>   return zspage->freeobj;
> @@ -872,31 +882,6 @@ static struct page *get_next_page(struct page *page)
>   return page->freelist;
>  }
>  
> -/* Get byte offset of first object in the @page */
> -static int get_first_obj_offset(struct size_class *class,
> - struct page *first_page, struct page *page)
> -{
> - int pos;
> - int page_idx = 0;
> - int ofs = 0;
> - struct page *cursor = first_page;
> -
> - if (first_page == page)
> - goto out;
> -
> - while (page != cursor) {
> - page_idx++;
> - cursor = get_next_page(cursor);
> - }
> -
> - pos = class->objs_per_zspage * class->size *
> - page_idx / class->pages_per_zspage;
> -
> - ofs = (pos + class->size) % PAGE_SIZE;
> -out:
> - return ofs;
> -}
> -
>  /**
>   * obj_to_location - get (, ) from encoded object value
>   * @page: page object resides in zspage
> @@ -966,6 +951,7 @@ static void reset_page(struct page *page)
>   clear_bit(PG_private, >flags);
>   clear_bit(PG_private_2, >flags);
>   set_page_private(page, 0);
> + page_mapcount_reset(page);
>   ClearPageHugeObject(page);
>   page->freelist = NULL;
>  }
> @@ -1064,6 +1050,8 @@ static void init_zspage(struct size_class *class, 
> struct zspage *zspage)
>   struct link_free *link;
>   void *vaddr;
>  
> + set_first_obj_offset(page, off);
> +
>   vaddr = kmap_atomic(page);
>   link = (struct link_free *)vaddr + off / sizeof(*link);
>  
> @@ -1762,9 +1750,8 @@ static unsigned long find_alloced_obj(struct size_class 
> *class,
>   int offset = 0;
>   unsigned long handle = 0;
>   void *addr = kmap_atomic(page);
> - struct zspage *zspage = get_zspage(page);
>  
> - offset = get_first_obj_offset(class, get_first_page(zspage), page);
> + offset = get_first_obj_offset(page);
>   offset += class->size * index;
>  
>   while (offset < PAGE_SIZE) {
> @@ -1976,6 +1963,7 @@ static void replace_sub_page(struct size_class *class, 
> struct zspage *zspage,
>   } while ((page = get_next_page(page)) != NULL);
>  
>   create_page_chain(class, zspage, pages);
> + set_first_obj_offset(newpage, get_first_obj_offset(oldpage));
>   if (unlikely(PageHugeObject(oldpage)))
>   newpage->index = oldpage->index;
>   __SetPageMovable(newpage, page_mapping(oldpage));
> @@ -2062,7 +2050,7 @@ int zs_page_migrate(struct address_space *mapping, 
> struct page *newpage,
>   get_zspage_mapping(zspage, _idx, );
>   pool = mapping->private_data;
>   class = pool->size_class[class_idx];
> - offset = get_first_obj_offset(class, get_first_page(zspage), page);
> + offset = get_first_obj_offset(page);
>  
>   spin_lock(>lock);
>   if (!get_zspage_inuse(zspage)) {
> -- 
> 1.9.1
> 


Re: [PATCH] clk: rockchip: add pclk_vio_grf to critical clock on the RK3399

2016-06-12 Thread Xing Zheng

Hi Doug,

On 2016年06月13日 11:10, Xing Zheng wrote:

Hi Doug,

On 2016年06月13日 05:32, Doug Anderson wrote:

Xing,

On Sun, Jun 12, 2016 at 2:48 AM, Xing 
Zheng  wrote:

The pclk_vio_grf supply power for GRF IOs, if it is disabled, will
cause abnormal operation of the GRF.

The clock tree of the pclk_vio like this:
  | --> pclk_vio_grf
... pclk_vio | --> pclk_mipi_dsi1
  | --> pclk_mipi_dsi0

and the pclk_mipi_dsi0 and pclk_mipi_dsi1 don't have the flag
CLK_IGNORE_UNUSED, and they will be disabled by clk_disable_unused
when startup:
clk_disable_unused
   --> clk_disable_unprepare
 --> clk_disable
   --> clk_core_disable(core->parent)

then, the pclk_vio_grf also is disabled. Therefore, we need to add
pclk_vio_grf to critical clock and avoid to disable pclk_vio and
pclk_vio_grf.

Tested-by: Yakir Yang
Signed-off-by: Yakir Yang
Signed-off-by: Brian Norris
Signed-off-by: Xing Zheng
---

  drivers/clk/rockchip/clk-rk3399.c |1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/clk/rockchip/clk-rk3399.c 
b/drivers/clk/rockchip/clk-rk3399.c

index b6742fa..7ecb12c3 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -1485,6 +1485,7 @@ static const char *const 
rk3399_cru_critical_clocks[] __initconst = {

 "gpll_hclk_perilp1_src",
 "gpll_aclk_perilp0_src",
 "gpll_aclk_perihp_src",
+   "pclk_vio_grf",

This clock is only needed when doing video output (like eDP), right?
That means it is not really a critical clock.  Critical clocks are
supposed to be ones that are needed for the basic functioning of the
system and that can never be turned off in any circumstances. In this
case, if someone were running a rk3399 device and didn't have any
video output they would want this clock off.

Can you figure out in exactly which circumstances this clock needs to
be on and then add a proper consumer of this clock?  For instance, if
this clock is needed whenever the VOP is outputting data, then the VOP
should be a client and should turn this clock on and off when video is
being output.  If this clock is needed whenever you access VOP
registers, then the VOP should be a client and turn this clock on
around register accesses.


Additional, we are discussing that we should turn the "pclk_vio" on and 
off in

video drivers when the video consumer needs to this clock.

Thanks.




Yes, the pclk_vio_grf is needed for doing video output.
andpclk_vio_grf supply for: grf_soc_con9, 20~26, grf_hdcp

From our design folks, we have many GRF registers in different power 
domains,
and these GRF gates should be always enabled. In this case, we can 
avoid some
of the operations GRF registers exception problems, and it is only a 
very small

increase in  power consumption (aboult <=1ma).

I will refer the latest TRM to update a new patch for always enable 
these GRFs.


Please drop this patch.


--
- Xing Zheng




Re: [PATCH] clk: rockchip: add pclk_vio_grf to critical clock on the RK3399

2016-06-12 Thread Xing Zheng

Hi Doug,

On 2016年06月13日 11:10, Xing Zheng wrote:

Hi Doug,

On 2016年06月13日 05:32, Doug Anderson wrote:

Xing,

On Sun, Jun 12, 2016 at 2:48 AM, Xing 
Zheng  wrote:

The pclk_vio_grf supply power for GRF IOs, if it is disabled, will
cause abnormal operation of the GRF.

The clock tree of the pclk_vio like this:
  | --> pclk_vio_grf
... pclk_vio | --> pclk_mipi_dsi1
  | --> pclk_mipi_dsi0

and the pclk_mipi_dsi0 and pclk_mipi_dsi1 don't have the flag
CLK_IGNORE_UNUSED, and they will be disabled by clk_disable_unused
when startup:
clk_disable_unused
   --> clk_disable_unprepare
 --> clk_disable
   --> clk_core_disable(core->parent)

then, the pclk_vio_grf also is disabled. Therefore, we need to add
pclk_vio_grf to critical clock and avoid to disable pclk_vio and
pclk_vio_grf.

Tested-by: Yakir Yang
Signed-off-by: Yakir Yang
Signed-off-by: Brian Norris
Signed-off-by: Xing Zheng
---

  drivers/clk/rockchip/clk-rk3399.c |1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/clk/rockchip/clk-rk3399.c 
b/drivers/clk/rockchip/clk-rk3399.c

index b6742fa..7ecb12c3 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -1485,6 +1485,7 @@ static const char *const 
rk3399_cru_critical_clocks[] __initconst = {

 "gpll_hclk_perilp1_src",
 "gpll_aclk_perilp0_src",
 "gpll_aclk_perihp_src",
+   "pclk_vio_grf",

This clock is only needed when doing video output (like eDP), right?
That means it is not really a critical clock.  Critical clocks are
supposed to be ones that are needed for the basic functioning of the
system and that can never be turned off in any circumstances. In this
case, if someone were running a rk3399 device and didn't have any
video output they would want this clock off.

Can you figure out in exactly which circumstances this clock needs to
be on and then add a proper consumer of this clock?  For instance, if
this clock is needed whenever the VOP is outputting data, then the VOP
should be a client and should turn this clock on and off when video is
being output.  If this clock is needed whenever you access VOP
registers, then the VOP should be a client and turn this clock on
around register accesses.


Additional, we are discussing that we should turn the "pclk_vio" on and 
off in

video drivers when the video consumer needs to this clock.

Thanks.




Yes, the pclk_vio_grf is needed for doing video output.
andpclk_vio_grf supply for: grf_soc_con9, 20~26, grf_hdcp

From our design folks, we have many GRF registers in different power 
domains,
and these GRF gates should be always enabled. In this case, we can 
avoid some
of the operations GRF registers exception problems, and it is only a 
very small

increase in  power consumption (aboult <=1ma).

I will refer the latest TRM to update a new patch for always enable 
these GRFs.


Please drop this patch.


--
- Xing Zheng




[PATCH] zsmalloc: keep first object offset in struct page

2016-06-12 Thread Minchan Kim
In early draft of zspage migration, we couldn't use page._mapcount
because it was used for storing movable flag so we added runtime
calculation to get first object offset in a page but it causes rather
many instruction and even bug.

Since then, we don't use page._mapcount as page flag any more so now
there is no problem to use the field to store first object offset.

Cc: Sergey Senozhatsky 
Signed-off-by: Minchan Kim 
---
 mm/zsmalloc.c | 44 
 1 file changed, 16 insertions(+), 28 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 6a58edc9a015..4b70fcbfb69b 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -512,6 +512,16 @@ static inline struct page *get_first_page(struct zspage 
*zspage)
return first_page;
 }
 
+static inline int get_first_obj_offset(struct page *page)
+{
+   return page->units;
+}
+
+static inline void set_first_obj_offset(struct page *page, int offset)
+{
+   page->units = offset;
+}
+
 static inline unsigned int get_freeobj(struct zspage *zspage)
 {
return zspage->freeobj;
@@ -872,31 +882,6 @@ static struct page *get_next_page(struct page *page)
return page->freelist;
 }
 
-/* Get byte offset of first object in the @page */
-static int get_first_obj_offset(struct size_class *class,
-   struct page *first_page, struct page *page)
-{
-   int pos;
-   int page_idx = 0;
-   int ofs = 0;
-   struct page *cursor = first_page;
-
-   if (first_page == page)
-   goto out;
-
-   while (page != cursor) {
-   page_idx++;
-   cursor = get_next_page(cursor);
-   }
-
-   pos = class->objs_per_zspage * class->size *
-   page_idx / class->pages_per_zspage;
-
-   ofs = (pos + class->size) % PAGE_SIZE;
-out:
-   return ofs;
-}
-
 /**
  * obj_to_location - get (, ) from encoded object value
  * @page: page object resides in zspage
@@ -966,6 +951,7 @@ static void reset_page(struct page *page)
clear_bit(PG_private, >flags);
clear_bit(PG_private_2, >flags);
set_page_private(page, 0);
+   page_mapcount_reset(page);
ClearPageHugeObject(page);
page->freelist = NULL;
 }
@@ -1064,6 +1050,8 @@ static void init_zspage(struct size_class *class, struct 
zspage *zspage)
struct link_free *link;
void *vaddr;
 
+   set_first_obj_offset(page, off);
+
vaddr = kmap_atomic(page);
link = (struct link_free *)vaddr + off / sizeof(*link);
 
@@ -1762,9 +1750,8 @@ static unsigned long find_alloced_obj(struct size_class 
*class,
int offset = 0;
unsigned long handle = 0;
void *addr = kmap_atomic(page);
-   struct zspage *zspage = get_zspage(page);
 
-   offset = get_first_obj_offset(class, get_first_page(zspage), page);
+   offset = get_first_obj_offset(page);
offset += class->size * index;
 
while (offset < PAGE_SIZE) {
@@ -1976,6 +1963,7 @@ static void replace_sub_page(struct size_class *class, 
struct zspage *zspage,
} while ((page = get_next_page(page)) != NULL);
 
create_page_chain(class, zspage, pages);
+   set_first_obj_offset(newpage, get_first_obj_offset(oldpage));
if (unlikely(PageHugeObject(oldpage)))
newpage->index = oldpage->index;
__SetPageMovable(newpage, page_mapping(oldpage));
@@ -2062,7 +2050,7 @@ int zs_page_migrate(struct address_space *mapping, struct 
page *newpage,
get_zspage_mapping(zspage, _idx, );
pool = mapping->private_data;
class = pool->size_class[class_idx];
-   offset = get_first_obj_offset(class, get_first_page(zspage), page);
+   offset = get_first_obj_offset(page);
 
spin_lock(>lock);
if (!get_zspage_inuse(zspage)) {
-- 
1.9.1



[PATCH v2] vfs: add simple direct-mapped dcache lookup front-end

2016-06-12 Thread George Spelvin
This is an old patch by Linus that he asked if I could fix the race
conditions in.  Posted for comment on the RCU abuse (search for "Evil
RCU Hack") and performance in general.

[Linus speaking, Thu May 31, 2012]

I've pushed __d_lookup_rcu() just about as far as I could, and it still
had some problems.

The problems were mainly due to:

- the complexity of the slow-case handling causes register spills,
- the hash chain lookup loop causes not only register pressure, but
  also the extra magic "mask off lock bit from the hash chain head
  pointer" etc. logic, and
- the hash list needs to be dynamically sized (we want *big* caches, but
  you can't use the same size for big and small machines), which causes
  the initial hash lookup itself to be more complex.

This looks like a viable solution to all three problems, and it is
actually surprisingly simple: make a trivial fixed-size direct-mapped L1
dentry cache.  No chains, no locking, no nothing.

This gives measurable improvement on my microbenchmark, and gets good
hit-rates on both kernel compiles and even on something like "updatedb",
which I'd have expected to be one of the worst possible cases.
Apparently updatedb still ends up looking up the same files (/etc/fstab
etc) a lot.  So those good hit-rates seem to often be due to really
stupid programming, but hey, I think we all agree that "stupid
programming" is likely the common case that we generally do need to also
optimize for ;)

For my kernel compile benchmark ("make -j" on a fully built tree), the
profile shows (this is kernel-only profile, so user space overhead
removed):

8.19%  [k] link_path_walk
7.74%  [k] __d_lookup_rcu
5.66%  [k] selinux_inode_permission
3.73%  [k] do_lookup
2.86%  [k] path_lookupat
2.72%  [k] avc_has_perm_noaudit
2.71%  [k] inode_has_perm.isra.49.constprop.71
2.68%  [k] avc_lookup
2.51%  [k] generic_permission
...
0.78%  [k] __d_lookup_rcu_slow
...

where "__d_lookup_rcu_slow()" is the exact same old __d_lookup_rcu(), so
it's not really "slow", but it's quite noticeably slower than the new
streamlined __d_lookup_rcu().  And as you can tell, that means that we
must have a 90%+ hitrate in the new L1 dcache lookup, since we only see
10% as much time in the slow routine as in the L1 front-end.

[George Spelvin speaking]

I fixed the race conditions in Linus's code, added Kconfig support,
and a number of comments.

I have two concerns about the performance of this code:

1) Since it was first written, RCU dcache lookup has gone completely
   lockless and is even faster.
2) By adding a shared data structure that is written randomly by multiple
   CPUs, this patch undoes a lot of that optimization.  Even if it's a
   win on a single-socket system, it may be a net loss on a larger one.

Thanks to Fengguang Wu's 0day build bot for catching a race that
triggered a WARN_ON in v1 of this patch.  dentry_delay_free is now
much more careful about what it writes over.

Cc: Al Viro 
Cc: Nick Piggin 
Cc: Miklos Szeredi 
Cc: Paul E. McKenney 
Cc: Randy Dunlap 
Signed-off-by: Linus Torvalds 
Signed-off-by: George Spelvin 
---
The 0day build bot caught a race condition I added: dentry_delay_free
was too eager to set the callback pointer, tripping the
WARN_ON(!hlist_unhashed) in dentry_free().  Updated to be more
conservative, with analysis.

That actually slightly reduced the overhead.  v1 had to have
dentry_free adjust d_name.name in unusual cases to enable the
delayed callback to know which final callback to invoke.

v2 doesn't need that; it inspects the function pointer instead.

 fs/Kconfig  |  28 
 fs/dcache.c | 226 ++--
 2 files changed, 250 insertions(+), 4 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index b8fcb416..f0c8ed44 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -8,6 +8,34 @@ menu "File systems"
 config DCACHE_WORD_ACCESS
bool
 
+config L1_DCACHE_BITS
+   int "Dcache level-1 cache size (bits)"
+   range 0 20
+   default 0 if !EXPERT
+   default 0 if NUMA
+   default 10 if BASE_SMALL
+   default 13
+   help
+ The Linux kernel maintains a large cache of "dentries"
+ (directory entries) for the performance-critical task of
+ converting file names to inodes.  This option enables a smaller
+ direct-mapped "level-1 cache" in front of the main dcache.
+
+ (This software "dcache" is quite different from the CPU's data
+ cache, or "D-cache".  Sorry for the confusingly similar names.)
+
+ This option specifies the size of this cache, as a power of 2.
+ For example, 13 means 2^13 = 8192 entries in the L1 dcache.
+ Specify 0 to turn off the L1 dcache entirely.
+
+ The cost of enabling this is one pointer per entry, plus a
+ small 

[PATCH] zsmalloc: keep first object offset in struct page

2016-06-12 Thread Minchan Kim
In early draft of zspage migration, we couldn't use page._mapcount
because it was used for storing movable flag so we added runtime
calculation to get first object offset in a page but it causes rather
many instruction and even bug.

Since then, we don't use page._mapcount as page flag any more so now
there is no problem to use the field to store first object offset.

Cc: Sergey Senozhatsky 
Signed-off-by: Minchan Kim 
---
 mm/zsmalloc.c | 44 
 1 file changed, 16 insertions(+), 28 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 6a58edc9a015..4b70fcbfb69b 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -512,6 +512,16 @@ static inline struct page *get_first_page(struct zspage 
*zspage)
return first_page;
 }
 
+static inline int get_first_obj_offset(struct page *page)
+{
+   return page->units;
+}
+
+static inline void set_first_obj_offset(struct page *page, int offset)
+{
+   page->units = offset;
+}
+
 static inline unsigned int get_freeobj(struct zspage *zspage)
 {
return zspage->freeobj;
@@ -872,31 +882,6 @@ static struct page *get_next_page(struct page *page)
return page->freelist;
 }
 
-/* Get byte offset of first object in the @page */
-static int get_first_obj_offset(struct size_class *class,
-   struct page *first_page, struct page *page)
-{
-   int pos;
-   int page_idx = 0;
-   int ofs = 0;
-   struct page *cursor = first_page;
-
-   if (first_page == page)
-   goto out;
-
-   while (page != cursor) {
-   page_idx++;
-   cursor = get_next_page(cursor);
-   }
-
-   pos = class->objs_per_zspage * class->size *
-   page_idx / class->pages_per_zspage;
-
-   ofs = (pos + class->size) % PAGE_SIZE;
-out:
-   return ofs;
-}
-
 /**
  * obj_to_location - get (, ) from encoded object value
  * @page: page object resides in zspage
@@ -966,6 +951,7 @@ static void reset_page(struct page *page)
clear_bit(PG_private, >flags);
clear_bit(PG_private_2, >flags);
set_page_private(page, 0);
+   page_mapcount_reset(page);
ClearPageHugeObject(page);
page->freelist = NULL;
 }
@@ -1064,6 +1050,8 @@ static void init_zspage(struct size_class *class, struct 
zspage *zspage)
struct link_free *link;
void *vaddr;
 
+   set_first_obj_offset(page, off);
+
vaddr = kmap_atomic(page);
link = (struct link_free *)vaddr + off / sizeof(*link);
 
@@ -1762,9 +1750,8 @@ static unsigned long find_alloced_obj(struct size_class 
*class,
int offset = 0;
unsigned long handle = 0;
void *addr = kmap_atomic(page);
-   struct zspage *zspage = get_zspage(page);
 
-   offset = get_first_obj_offset(class, get_first_page(zspage), page);
+   offset = get_first_obj_offset(page);
offset += class->size * index;
 
while (offset < PAGE_SIZE) {
@@ -1976,6 +1963,7 @@ static void replace_sub_page(struct size_class *class, 
struct zspage *zspage,
} while ((page = get_next_page(page)) != NULL);
 
create_page_chain(class, zspage, pages);
+   set_first_obj_offset(newpage, get_first_obj_offset(oldpage));
if (unlikely(PageHugeObject(oldpage)))
newpage->index = oldpage->index;
__SetPageMovable(newpage, page_mapping(oldpage));
@@ -2062,7 +2050,7 @@ int zs_page_migrate(struct address_space *mapping, struct 
page *newpage,
get_zspage_mapping(zspage, _idx, );
pool = mapping->private_data;
class = pool->size_class[class_idx];
-   offset = get_first_obj_offset(class, get_first_page(zspage), page);
+   offset = get_first_obj_offset(page);
 
spin_lock(>lock);
if (!get_zspage_inuse(zspage)) {
-- 
1.9.1



[PATCH v2] vfs: add simple direct-mapped dcache lookup front-end

2016-06-12 Thread George Spelvin
This is an old patch by Linus that he asked if I could fix the race
conditions in.  Posted for comment on the RCU abuse (search for "Evil
RCU Hack") and performance in general.

[Linus speaking, Thu May 31, 2012]

I've pushed __d_lookup_rcu() just about as far as I could, and it still
had some problems.

The problems were mainly due to:

- the complexity of the slow-case handling causes register spills,
- the hash chain lookup loop causes not only register pressure, but
  also the extra magic "mask off lock bit from the hash chain head
  pointer" etc. logic, and
- the hash list needs to be dynamically sized (we want *big* caches, but
  you can't use the same size for big and small machines), which causes
  the initial hash lookup itself to be more complex.

This looks like a viable solution to all three problems, and it is
actually surprisingly simple: make a trivial fixed-size direct-mapped L1
dentry cache.  No chains, no locking, no nothing.

This gives measurable improvement on my microbenchmark, and gets good
hit-rates on both kernel compiles and even on something like "updatedb",
which I'd have expected to be one of the worst possible cases.
Apparently updatedb still ends up looking up the same files (/etc/fstab
etc) a lot.  So those good hit-rates seem to often be due to really
stupid programming, but hey, I think we all agree that "stupid
programming" is likely the common case that we generally do need to also
optimize for ;)

For my kernel compile benchmark ("make -j" on a fully built tree), the
profile shows (this is kernel-only profile, so user space overhead
removed):

8.19%  [k] link_path_walk
7.74%  [k] __d_lookup_rcu
5.66%  [k] selinux_inode_permission
3.73%  [k] do_lookup
2.86%  [k] path_lookupat
2.72%  [k] avc_has_perm_noaudit
2.71%  [k] inode_has_perm.isra.49.constprop.71
2.68%  [k] avc_lookup
2.51%  [k] generic_permission
...
0.78%  [k] __d_lookup_rcu_slow
...

where "__d_lookup_rcu_slow()" is the exact same old __d_lookup_rcu(), so
it's not really "slow", but it's quite noticeably slower than the new
streamlined __d_lookup_rcu().  And as you can tell, that means that we
must have a 90%+ hitrate in the new L1 dcache lookup, since we only see
10% as much time in the slow routine as in the L1 front-end.

[George Spelvin speaking]

I fixed the race conditions in Linus's code, added Kconfig support,
and a number of comments.

I have two concerns about the performance of this code:

1) Since it was first written, RCU dcache lookup has gone completely
   lockless and is even faster.
2) By adding a shared data structure that is written randomly by multiple
   CPUs, this patch undoes a lot of that optimization.  Even if it's a
   win on a single-socket system, it may be a net loss on a larger one.

Thanks to Fengguang Wu's 0day build bot for catching a race that
triggered a WARN_ON in v1 of this patch.  dentry_delay_free is now
much more careful about what it writes over.

Cc: Al Viro 
Cc: Nick Piggin 
Cc: Miklos Szeredi 
Cc: Paul E. McKenney 
Cc: Randy Dunlap 
Signed-off-by: Linus Torvalds 
Signed-off-by: George Spelvin 
---
The 0day build bot caught a race condition I added: dentry_delay_free
was too eager to set the callback pointer, tripping the
WARN_ON(!hlist_unhashed) in dentry_free().  Updated to be more
conservative, with analysis.

That actually slightly reduced the overhead.  v1 had to have
dentry_free adjust d_name.name in unusual cases to enable the
delayed callback to know which final callback to invoke.

v2 doesn't need that; it inspects the function pointer instead.

 fs/Kconfig  |  28 
 fs/dcache.c | 226 ++--
 2 files changed, 250 insertions(+), 4 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index b8fcb416..f0c8ed44 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -8,6 +8,34 @@ menu "File systems"
 config DCACHE_WORD_ACCESS
bool
 
+config L1_DCACHE_BITS
+   int "Dcache level-1 cache size (bits)"
+   range 0 20
+   default 0 if !EXPERT
+   default 0 if NUMA
+   default 10 if BASE_SMALL
+   default 13
+   help
+ The Linux kernel maintains a large cache of "dentries"
+ (directory entries) for the performance-critical task of
+ converting file names to inodes.  This option enables a smaller
+ direct-mapped "level-1 cache" in front of the main dcache.
+
+ (This software "dcache" is quite different from the CPU's data
+ cache, or "D-cache".  Sorry for the confusingly similar names.)
+
+ This option specifies the size of this cache, as a power of 2.
+ For example, 13 means 2^13 = 8192 entries in the L1 dcache.
+ Specify 0 to turn off the L1 dcache entirely.
+
+ The cost of enabling this is one pointer per entry, plus a
+ small amount of code.
+
+ This is an experimental feature which hopes to speed up
+ single-socket machines.  On larger systems, the extra updates
+ generated 

Re: [PATCH v4] udp reuseport: fix packet of same flow hashed to different socket

2016-06-12 Thread Eric Dumazet
On Mon, 2016-06-13 at 11:02 +0800, Su Xuemin wrote:
> From: "Su, Xuemin" 
> 
> There is a corner case in which udp packets belonging to a same
> flow are hashed to different socket when hslot->count changes from 10
> to 11:
...
> Signed-off-by: Su, Xuemin 
> Signed-off-by: Eric Dumazet 
> ---
> I use this tree to generate the patch:
>   git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
> 
>  net/ipv4/udp.c | 73 
> +-
>  net/ipv6/udp.c | 71 +---
>  2 files changed, 32 insertions(+), 112 deletions(-)

Very nice simplification of UDP stack, thanks a lot for finalizing this.





Re: [PATCH v4] udp reuseport: fix packet of same flow hashed to different socket

2016-06-12 Thread Eric Dumazet
On Mon, 2016-06-13 at 11:02 +0800, Su Xuemin wrote:
> From: "Su, Xuemin" 
> 
> There is a corner case in which udp packets belonging to a same
> flow are hashed to different socket when hslot->count changes from 10
> to 11:
...
> Signed-off-by: Su, Xuemin 
> Signed-off-by: Eric Dumazet 
> ---
> I use this tree to generate the patch:
>   git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
> 
>  net/ipv4/udp.c | 73 
> +-
>  net/ipv6/udp.c | 71 +---
>  2 files changed, 32 insertions(+), 112 deletions(-)

Very nice simplification of UDP stack, thanks a lot for finalizing this.





Re: [PATCH V6 7/8] vfio: platform: check reset call return code during open

2016-06-12 Thread Sinan Kaya
On 6/7/2016 4:21 PM, Auger Eric wrote:
>> -vfio_platform_call_reset(vdev, NULL);
>> > +  ret = vfio_platform_call_reset(vdev, _dbg);
>> > +  if (ret && vdev->reset_required) {
>> > +  dev_warn(vdev->device, "reset driver is required and 
>> > reset call failed in open (%d) %s\n",
>> > +   ret, extra_dbg ? extra_dbg : "");
>> > +  goto err_irq;
> I am afraid you need to tear down the resources allocated by 
> vfio_platform_irq_init. 
> 
> Best Regards
> 
> Eric

I added this to the error path and replaced the goto above with err_rst.

+err_rst:
+vfio_platform_irq_cleanup(vdev);
err_irq:
vfio_platform_regions_cleanup(vdev);

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project


Re: [PATCH V6 7/8] vfio: platform: check reset call return code during open

2016-06-12 Thread Sinan Kaya
On 6/7/2016 4:21 PM, Auger Eric wrote:
>> -vfio_platform_call_reset(vdev, NULL);
>> > +  ret = vfio_platform_call_reset(vdev, _dbg);
>> > +  if (ret && vdev->reset_required) {
>> > +  dev_warn(vdev->device, "reset driver is required and 
>> > reset call failed in open (%d) %s\n",
>> > +   ret, extra_dbg ? extra_dbg : "");
>> > +  goto err_irq;
> I am afraid you need to tear down the resources allocated by 
> vfio_platform_irq_init. 
> 
> Best Regards
> 
> Eric

I added this to the error path and replaced the goto above with err_rst.

+err_rst:
+vfio_platform_irq_cleanup(vdev);
err_irq:
vfio_platform_regions_cleanup(vdev);

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project


Re: [PATCH] clk: rockchip: add pclk_vio_grf to critical clock on the RK3399

2016-06-12 Thread Xing Zheng

Hi Doug,

On 2016年06月13日 05:32, Doug Anderson wrote:

Xing,

On Sun, Jun 12, 2016 at 2:48 AM, Xing Zheng  wrote:

The pclk_vio_grf supply power for GRF IOs, if it is disabled, will
cause abnormal operation of the GRF.

The clock tree of the pclk_vio like this:
  | --> pclk_vio_grf
... pclk_vio | --> pclk_mipi_dsi1
  | --> pclk_mipi_dsi0

and the pclk_mipi_dsi0 and pclk_mipi_dsi1 don't have the flag
CLK_IGNORE_UNUSED, and they will be disabled by clk_disable_unused
when startup:
clk_disable_unused
   --> clk_disable_unprepare
 --> clk_disable
   --> clk_core_disable(core->parent)

then, the pclk_vio_grf also is disabled. Therefore, we need to add
pclk_vio_grf to critical clock and avoid to disable pclk_vio and
pclk_vio_grf.

Tested-by: Yakir Yang
Signed-off-by: Yakir Yang
Signed-off-by: Brian Norris
Signed-off-by: Xing Zheng
---

  drivers/clk/rockchip/clk-rk3399.c |1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/clk/rockchip/clk-rk3399.c 
b/drivers/clk/rockchip/clk-rk3399.c
index b6742fa..7ecb12c3 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -1485,6 +1485,7 @@ static const char *const rk3399_cru_critical_clocks[] 
__initconst = {
 "gpll_hclk_perilp1_src",
 "gpll_aclk_perilp0_src",
 "gpll_aclk_perihp_src",
+   "pclk_vio_grf",

This clock is only needed when doing video output (like eDP), right?
That means it is not really a critical clock.  Critical clocks are
supposed to be ones that are needed for the basic functioning of the
system and that can never be turned off in any circumstances.  In this
case, if someone were running a rk3399 device and didn't have any
video output they would want this clock off.

Can you figure out in exactly which circumstances this clock needs to
be on and then add a proper consumer of this clock?  For instance, if
this clock is needed whenever the VOP is outputting data, then the VOP
should be a client and should turn this clock on and off when video is
being output.  If this clock is needed whenever you access VOP
registers, then the VOP should be a client and turn this clock on
around register accesses.

-Doug


Yes, the pclk_vio_grf is needed for doing video output.
andpclk_vio_grf supply for: grf_soc_con9, 20~26, grf_hdcp

From our design folks, we have many GRF registers in different power 
domains,
and these GRF gates should be always enabled. In this case, we can avoid 
some
of the operations GRF registers exception problems, and it is only a 
very small

increase in  power consumption (aboult <=1ma).

I will refer the latest TRM to update a new patch for always enable 
these GRFs.


Please drop this patch.

Thanks.

--
- Xing Zheng




Re: [PATCH] clk: rockchip: add pclk_vio_grf to critical clock on the RK3399

2016-06-12 Thread Xing Zheng

Hi Doug,

On 2016年06月13日 05:32, Doug Anderson wrote:

Xing,

On Sun, Jun 12, 2016 at 2:48 AM, Xing Zheng  wrote:

The pclk_vio_grf supply power for GRF IOs, if it is disabled, will
cause abnormal operation of the GRF.

The clock tree of the pclk_vio like this:
  | --> pclk_vio_grf
... pclk_vio | --> pclk_mipi_dsi1
  | --> pclk_mipi_dsi0

and the pclk_mipi_dsi0 and pclk_mipi_dsi1 don't have the flag
CLK_IGNORE_UNUSED, and they will be disabled by clk_disable_unused
when startup:
clk_disable_unused
   --> clk_disable_unprepare
 --> clk_disable
   --> clk_core_disable(core->parent)

then, the pclk_vio_grf also is disabled. Therefore, we need to add
pclk_vio_grf to critical clock and avoid to disable pclk_vio and
pclk_vio_grf.

Tested-by: Yakir Yang
Signed-off-by: Yakir Yang
Signed-off-by: Brian Norris
Signed-off-by: Xing Zheng
---

  drivers/clk/rockchip/clk-rk3399.c |1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/clk/rockchip/clk-rk3399.c 
b/drivers/clk/rockchip/clk-rk3399.c
index b6742fa..7ecb12c3 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -1485,6 +1485,7 @@ static const char *const rk3399_cru_critical_clocks[] 
__initconst = {
 "gpll_hclk_perilp1_src",
 "gpll_aclk_perilp0_src",
 "gpll_aclk_perihp_src",
+   "pclk_vio_grf",

This clock is only needed when doing video output (like eDP), right?
That means it is not really a critical clock.  Critical clocks are
supposed to be ones that are needed for the basic functioning of the
system and that can never be turned off in any circumstances.  In this
case, if someone were running a rk3399 device and didn't have any
video output they would want this clock off.

Can you figure out in exactly which circumstances this clock needs to
be on and then add a proper consumer of this clock?  For instance, if
this clock is needed whenever the VOP is outputting data, then the VOP
should be a client and should turn this clock on and off when video is
being output.  If this clock is needed whenever you access VOP
registers, then the VOP should be a client and turn this clock on
around register accesses.

-Doug


Yes, the pclk_vio_grf is needed for doing video output.
andpclk_vio_grf supply for: grf_soc_con9, 20~26, grf_hdcp

From our design folks, we have many GRF registers in different power 
domains,
and these GRF gates should be always enabled. In this case, we can avoid 
some
of the operations GRF registers exception problems, and it is only a 
very small

increase in  power consumption (aboult <=1ma).

I will refer the latest TRM to update a new patch for always enable 
these GRFs.


Please drop this patch.

Thanks.

--
- Xing Zheng




Re: [PATCH 16/23] arm64: ilp32: introduce binfmt_ilp32.c

2016-06-12 Thread Zhangjian (Bamvor)

Hi, Yury

On 2016/5/24 8:04, Yury Norov wrote:

to handle ILP32 binaries

Signed-off-by: Yury Norov 
---
  arch/arm64/kernel/Makefile   |  1 +
  arch/arm64/kernel/binfmt_ilp32.c | 91 
  2 files changed, 92 insertions(+)
  create mode 100644 arch/arm64/kernel/binfmt_ilp32.c

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 6bc9738..9dfdf86 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -28,6 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
  arm64-obj-$(CONFIG_AARCH32_EL0)   += sys32.o kuser32.o signal32.o 
\
   sys_compat.o entry32.o   
\
   ../../arm/kernel/opcodes.o 
binfmt_elf32.o
+arm64-obj-$(CONFIG_ARM64_ILP32)+= binfmt_ilp32.o
  arm64-obj-$(CONFIG_FUNCTION_TRACER)   += ftrace.o entry-ftrace.o
  arm64-obj-$(CONFIG_MODULES)   += arm64ksyms.o module.o
  arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
diff --git a/arch/arm64/kernel/binfmt_ilp32.c b/arch/arm64/kernel/binfmt_ilp32.c
new file mode 100644
index 000..a934fd4
--- /dev/null
+++ b/arch/arm64/kernel/binfmt_ilp32.c
@@ -0,0 +1,91 @@
+/*
+ * Support for ILP32 Linux/aarch64 ELF binaries.
+ */
+
+#include 
+#include 
+
+#undef ELF_CLASS
+#define ELF_CLASS  ELFCLASS32
+
+#undef elfhdr
+#undef elf_phdr
+#undef elf_shdr
+#undef elf_note
+#undef elf_addr_t
+#define elfhdr elf32_hdr
+#define elf_phdr   elf32_phdr
+#define elf_shdr   elf32_shdr
+#define elf_note   elf32_note
+#define elf_addr_t Elf32_Addr
+
+/*
+ * Some data types as stored in coredump.
+ */
+#define user_long_tcompat_long_t
+#define user_siginfo_t compat_siginfo_t
+#define copy_siginfo_to_user   copy_siginfo_to_user32
+
+/*
+ * The machine-dependent core note format types are defined in 
elfcore-compat.h,
+ * which requires asm/elf.h to define compat_elf_gregset_t et al.
+ */
+#define elf_prstatus   compat_elf_prstatus
+#define elf_prpsinfo   compat_elf_prpsinfo
+
+/*
+ * Compat version of cputime_to_compat_timeval, perhaps this
+ * should be an inline in .
+ */
+static void cputime_to_compat_timeval(const cputime_t cputime,
+ struct compat_timeval *value)
+{
+   struct timeval tv;
+   cputime_to_timeval(cputime, );
+   value->tv_sec = tv.tv_sec;
+   value->tv_usec = tv.tv_usec;
+}
+
+#undef cputime_to_timeval
+#define cputime_to_timeval cputime_to_compat_timeval
+
+/* AARCH64 ILP32 EABI. */
+#undef elf_check_arch
+#define elf_check_arch(x)  (((x)->e_machine == EM_AARCH64)  \
+   && (x)->e_ident[EI_CLASS] == ELFCLASS32)
+
+#undef SET_PERSONALITY
+#define SET_PERSONALITY(ex)\
+do {   \
+   set_thread_flag(TIF_32BIT_AARCH64); \
+   clear_thread_flag(TIF_32BIT);   \
+} while (0)
+
+#undef ARCH_DLINFO
+#define ARCH_DLINFO\
+do {   \
+   NEW_AUX_ENT(AT_SYSINFO_EHDR,\
+   (elf_addr_t)(long)current->mm->context.vdso); \
+} while (0)
+
+#ifdef __AARCH64EB__
+#define COMPAT_ELF_PLATFORM("aarch64_be:ilp32")
+#else
+#define COMPAT_ELF_PLATFORM("aarch64:ilp32")
+#endif

fs/binfmt_elf.c use ELF_PLATFORM instead of the COMPAT one. Should we define
ELF_PLATFORM directly?
#undef ELF_PLATFORM
#ifdef __AARCH64EB__
#define ELF_PLATFORM("aarch64_be:ilp32")
#else
#define ELF_PLATFORM("aarch64:ilp32")
#endif

Regards

Bamvor

+
+#undef ELF_HWCAP
+#undef ELF_HWCAP2
+#define ELF_HWCAP  ((u32) elf_hwcap)
+#define ELF_HWCAP2 ((u32) (elf_hwcap >> 32))
+
+/*
+ * Rename a few of the symbols that binfmt_elf.c will define.
+ * These are all local so the names don't really matter, but it
+ * might make some debugging less confusing not to duplicate them.
+ */
+#define elf_format compat_elf_format
+#define init_elf_binfmtinit_compat_elf_binfmt
+#define exit_elf_binfmtexit_compat_elf_binfmt
+
+#include "../../../fs/binfmt_elf.c"





Re: [PATCH 16/23] arm64: ilp32: introduce binfmt_ilp32.c

2016-06-12 Thread Zhangjian (Bamvor)

Hi, Yury

On 2016/5/24 8:04, Yury Norov wrote:

to handle ILP32 binaries

Signed-off-by: Yury Norov 
---
  arch/arm64/kernel/Makefile   |  1 +
  arch/arm64/kernel/binfmt_ilp32.c | 91 
  2 files changed, 92 insertions(+)
  create mode 100644 arch/arm64/kernel/binfmt_ilp32.c

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 6bc9738..9dfdf86 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -28,6 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
  arm64-obj-$(CONFIG_AARCH32_EL0)   += sys32.o kuser32.o signal32.o 
\
   sys_compat.o entry32.o   
\
   ../../arm/kernel/opcodes.o 
binfmt_elf32.o
+arm64-obj-$(CONFIG_ARM64_ILP32)+= binfmt_ilp32.o
  arm64-obj-$(CONFIG_FUNCTION_TRACER)   += ftrace.o entry-ftrace.o
  arm64-obj-$(CONFIG_MODULES)   += arm64ksyms.o module.o
  arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
diff --git a/arch/arm64/kernel/binfmt_ilp32.c b/arch/arm64/kernel/binfmt_ilp32.c
new file mode 100644
index 000..a934fd4
--- /dev/null
+++ b/arch/arm64/kernel/binfmt_ilp32.c
@@ -0,0 +1,91 @@
+/*
+ * Support for ILP32 Linux/aarch64 ELF binaries.
+ */
+
+#include 
+#include 
+
+#undef ELF_CLASS
+#define ELF_CLASS  ELFCLASS32
+
+#undef elfhdr
+#undef elf_phdr
+#undef elf_shdr
+#undef elf_note
+#undef elf_addr_t
+#define elfhdr elf32_hdr
+#define elf_phdr   elf32_phdr
+#define elf_shdr   elf32_shdr
+#define elf_note   elf32_note
+#define elf_addr_t Elf32_Addr
+
+/*
+ * Some data types as stored in coredump.
+ */
+#define user_long_tcompat_long_t
+#define user_siginfo_t compat_siginfo_t
+#define copy_siginfo_to_user   copy_siginfo_to_user32
+
+/*
+ * The machine-dependent core note format types are defined in 
elfcore-compat.h,
+ * which requires asm/elf.h to define compat_elf_gregset_t et al.
+ */
+#define elf_prstatus   compat_elf_prstatus
+#define elf_prpsinfo   compat_elf_prpsinfo
+
+/*
+ * Compat version of cputime_to_compat_timeval, perhaps this
+ * should be an inline in .
+ */
+static void cputime_to_compat_timeval(const cputime_t cputime,
+ struct compat_timeval *value)
+{
+   struct timeval tv;
+   cputime_to_timeval(cputime, );
+   value->tv_sec = tv.tv_sec;
+   value->tv_usec = tv.tv_usec;
+}
+
+#undef cputime_to_timeval
+#define cputime_to_timeval cputime_to_compat_timeval
+
+/* AARCH64 ILP32 EABI. */
+#undef elf_check_arch
+#define elf_check_arch(x)  (((x)->e_machine == EM_AARCH64)  \
+   && (x)->e_ident[EI_CLASS] == ELFCLASS32)
+
+#undef SET_PERSONALITY
+#define SET_PERSONALITY(ex)\
+do {   \
+   set_thread_flag(TIF_32BIT_AARCH64); \
+   clear_thread_flag(TIF_32BIT);   \
+} while (0)
+
+#undef ARCH_DLINFO
+#define ARCH_DLINFO\
+do {   \
+   NEW_AUX_ENT(AT_SYSINFO_EHDR,\
+   (elf_addr_t)(long)current->mm->context.vdso); \
+} while (0)
+
+#ifdef __AARCH64EB__
+#define COMPAT_ELF_PLATFORM("aarch64_be:ilp32")
+#else
+#define COMPAT_ELF_PLATFORM("aarch64:ilp32")
+#endif

fs/binfmt_elf.c use ELF_PLATFORM instead of the COMPAT one. Should we define
ELF_PLATFORM directly?
#undef ELF_PLATFORM
#ifdef __AARCH64EB__
#define ELF_PLATFORM("aarch64_be:ilp32")
#else
#define ELF_PLATFORM("aarch64:ilp32")
#endif

Regards

Bamvor

+
+#undef ELF_HWCAP
+#undef ELF_HWCAP2
+#define ELF_HWCAP  ((u32) elf_hwcap)
+#define ELF_HWCAP2 ((u32) (elf_hwcap >> 32))
+
+/*
+ * Rename a few of the symbols that binfmt_elf.c will define.
+ * These are all local so the names don't really matter, but it
+ * might make some debugging less confusing not to duplicate them.
+ */
+#define elf_format compat_elf_format
+#define init_elf_binfmtinit_compat_elf_binfmt
+#define exit_elf_binfmtexit_compat_elf_binfmt
+
+#include "../../../fs/binfmt_elf.c"





[PATCH v4] udp reuseport: fix packet of same flow hashed to different socket

2016-06-12 Thread Su Xuemin
From: "Su, Xuemin" 

There is a corner case in which udp packets belonging to a same
flow are hashed to different socket when hslot->count changes from 10
to 11:

1) When hslot->count <= 10, __udp_lib_lookup() searches udp_table->hash,
and always passes 'daddr' to udp_ehashfn().

2) When hslot->count > 10, __udp_lib_lookup() searches udp_table->hash2,
but may pass 'INADDR_ANY' to udp_ehashfn() if the sockets are bound to
INADDR_ANY instead of some specific addr.

That means when hslot->count changes from 10 to 11, the hash calculated by
udp_ehashfn() is also changed, and the udp packets belonging to a same
flow will be hashed to different socket.

This is easily reproduced:
1) Create 10 udp sockets and bind all of them to 0.0.0.0:4.
2) From the same host send udp packets to 127.0.0.1:4, record the
socket index which receives the packets.
3) Create 1 more udp socket and bind it to 0.0.0.0:44096. The number 44096
is 4 + UDP_HASH_SIZE(4096), this makes the new socket put into the
same hslot as the aformentioned 10 sockets, and makes the hslot->count
change from 10 to 11.
4) From the same host send udp packets to 127.0.0.1:4, and the socket
index which receives the packets will be different from the one received
in step 2.
This should not happen as the socket bound to 0.0.0.0:44096 should not
change the behavior of the sockets bound to 0.0.0.0:4.

It's the same case for IPv6, and this patch also fixes that.

Signed-off-by: Su, Xuemin 
Signed-off-by: Eric Dumazet 
---
I use this tree to generate the patch:
  git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

 net/ipv4/udp.c | 73 +-
 net/ipv6/udp.c | 71 +---
 2 files changed, 32 insertions(+), 112 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0ff31d9..55ec77c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -391,9 +391,9 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
 }
 
-static inline int compute_score(struct sock *sk, struct net *net,
-   __be32 saddr, unsigned short hnum, __be16 sport,
-   __be32 daddr, __be16 dport, int dif)
+static int compute_score(struct sock *sk, struct net *net,
+__be32 saddr, __be16 sport,
+__be32 daddr, unsigned short hnum, int dif)
 {
int score;
struct inet_sock *inet;
@@ -434,52 +434,6 @@ static inline int compute_score(struct sock *sk, struct 
net *net,
return score;
 }
 
-/*
- * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, 
inet_num)
- */
-static inline int compute_score2(struct sock *sk, struct net *net,
-__be32 saddr, __be16 sport,
-__be32 daddr, unsigned int hnum, int dif)
-{
-   int score;
-   struct inet_sock *inet;
-
-   if (!net_eq(sock_net(sk), net) ||
-   ipv6_only_sock(sk))
-   return -1;
-
-   inet = inet_sk(sk);
-
-   if (inet->inet_rcv_saddr != daddr ||
-   inet->inet_num != hnum)
-   return -1;
-
-   score = (sk->sk_family == PF_INET) ? 2 : 1;
-
-   if (inet->inet_daddr) {
-   if (inet->inet_daddr != saddr)
-   return -1;
-   score += 4;
-   }
-
-   if (inet->inet_dport) {
-   if (inet->inet_dport != sport)
-   return -1;
-   score += 4;
-   }
-
-   if (sk->sk_bound_dev_if) {
-   if (sk->sk_bound_dev_if != dif)
-   return -1;
-   score += 4;
-   }
-
-   if (sk->sk_incoming_cpu == raw_smp_processor_id())
-   score++;
-
-   return score;
-}
-
 static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
   const __u16 lport, const __be32 faddr,
   const __be16 fport)
@@ -492,11 +446,11 @@ static u32 udp_ehashfn(const struct net *net, const 
__be32 laddr,
  udp_ehash_secret + net_hash_mix(net));
 }
 
-/* called with read_rcu_lock() */
+/* called with rcu_read_lock() */
 static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum, int dif,
-   struct udp_hslot *hslot2, unsigned int slot2,
+   struct udp_hslot *hslot2,
struct sk_buff *skb)
 {
struct sock *sk, *result;
@@ -506,7 +460,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
result = NULL;
badness = 0;
udp_portaddr_for_each_entry_rcu(sk, >head) {
-   score = compute_score2(sk, net, saddr, sport,
+   score = compute_score(sk, net, 

[PATCH v4] udp reuseport: fix packet of same flow hashed to different socket

2016-06-12 Thread Su Xuemin
From: "Su, Xuemin" 

There is a corner case in which udp packets belonging to a same
flow are hashed to different socket when hslot->count changes from 10
to 11:

1) When hslot->count <= 10, __udp_lib_lookup() searches udp_table->hash,
and always passes 'daddr' to udp_ehashfn().

2) When hslot->count > 10, __udp_lib_lookup() searches udp_table->hash2,
but may pass 'INADDR_ANY' to udp_ehashfn() if the sockets are bound to
INADDR_ANY instead of some specific addr.

That means when hslot->count changes from 10 to 11, the hash calculated by
udp_ehashfn() is also changed, and the udp packets belonging to a same
flow will be hashed to different socket.

This is easily reproduced:
1) Create 10 udp sockets and bind all of them to 0.0.0.0:4.
2) From the same host send udp packets to 127.0.0.1:4, record the
socket index which receives the packets.
3) Create 1 more udp socket and bind it to 0.0.0.0:44096. The number 44096
is 4 + UDP_HASH_SIZE(4096), this makes the new socket put into the
same hslot as the aformentioned 10 sockets, and makes the hslot->count
change from 10 to 11.
4) From the same host send udp packets to 127.0.0.1:4, and the socket
index which receives the packets will be different from the one received
in step 2.
This should not happen as the socket bound to 0.0.0.0:44096 should not
change the behavior of the sockets bound to 0.0.0.0:4.

It's the same case for IPv6, and this patch also fixes that.

Signed-off-by: Su, Xuemin 
Signed-off-by: Eric Dumazet 
---
I use this tree to generate the patch:
  git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

 net/ipv4/udp.c | 73 +-
 net/ipv6/udp.c | 71 +---
 2 files changed, 32 insertions(+), 112 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0ff31d9..55ec77c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -391,9 +391,9 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
 }
 
-static inline int compute_score(struct sock *sk, struct net *net,
-   __be32 saddr, unsigned short hnum, __be16 sport,
-   __be32 daddr, __be16 dport, int dif)
+static int compute_score(struct sock *sk, struct net *net,
+__be32 saddr, __be16 sport,
+__be32 daddr, unsigned short hnum, int dif)
 {
int score;
struct inet_sock *inet;
@@ -434,52 +434,6 @@ static inline int compute_score(struct sock *sk, struct 
net *net,
return score;
 }
 
-/*
- * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, 
inet_num)
- */
-static inline int compute_score2(struct sock *sk, struct net *net,
-__be32 saddr, __be16 sport,
-__be32 daddr, unsigned int hnum, int dif)
-{
-   int score;
-   struct inet_sock *inet;
-
-   if (!net_eq(sock_net(sk), net) ||
-   ipv6_only_sock(sk))
-   return -1;
-
-   inet = inet_sk(sk);
-
-   if (inet->inet_rcv_saddr != daddr ||
-   inet->inet_num != hnum)
-   return -1;
-
-   score = (sk->sk_family == PF_INET) ? 2 : 1;
-
-   if (inet->inet_daddr) {
-   if (inet->inet_daddr != saddr)
-   return -1;
-   score += 4;
-   }
-
-   if (inet->inet_dport) {
-   if (inet->inet_dport != sport)
-   return -1;
-   score += 4;
-   }
-
-   if (sk->sk_bound_dev_if) {
-   if (sk->sk_bound_dev_if != dif)
-   return -1;
-   score += 4;
-   }
-
-   if (sk->sk_incoming_cpu == raw_smp_processor_id())
-   score++;
-
-   return score;
-}
-
 static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
   const __u16 lport, const __be32 faddr,
   const __be16 fport)
@@ -492,11 +446,11 @@ static u32 udp_ehashfn(const struct net *net, const 
__be32 laddr,
  udp_ehash_secret + net_hash_mix(net));
 }
 
-/* called with read_rcu_lock() */
+/* called with rcu_read_lock() */
 static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum, int dif,
-   struct udp_hslot *hslot2, unsigned int slot2,
+   struct udp_hslot *hslot2,
struct sk_buff *skb)
 {
struct sock *sk, *result;
@@ -506,7 +460,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
result = NULL;
badness = 0;
udp_portaddr_for_each_entry_rcu(sk, >head) {
-   score = compute_score2(sk, net, saddr, sport,
+   score = compute_score(sk, net, saddr, sport,
  daddr, hnum, dif);

Re: [linux-sunxi] [PATCH] [V2] ARM: dts: sun7i: Add dts file for Bananapi M1 Plus board

2016-06-12 Thread Chen-Yu Tsai
On Fri, Jun 10, 2016 at 5:38 PM, Maxime Ripard
 wrote:
> On Thu, Jun 02, 2016 at 11:15:55AM +0200, Bernhard Nortmann wrote:
>> Am 02.06.2016 um 10:16 schrieb Maxime Ripard:
>> >[...]
>> >Yes, everything that is shared with the banana-pro (which, judging
>> >from Bernhard, is pretty much everything but a GPIO) should be merged
>> >in the banapro DT.
>> >
>> >Maxime
>> >
>>
>> Don't take my word for granted, as I do not own this hardware or know it
>> particularly well. There is no doubt that "BPi-M1+" and "Banana Pro" are
>> very similar, but if in doubt the information form the wiki should be
>> verified.
>
> H, ok. Chen-Yu, any input on this? You know the banana-pis much
> more than I do.

I did a comparison of the "Banana Pro" vs the "BPi-M1+".

The differences are similar to what we have with any other development
board, say the Cubietruck:

  - A different WiFi chip is used, and the BT part is not hooked up.
  - Different GPIOs for external power regulator/switches
  - Different GPIOs for LEDs
  - Different peripherals exposed on the headers.

IMO There's no need to merge or have a common .dtsi for the two boards.
They (and all the other development boards) look similar because
everyone is following the basic set by Allwinner's reference design,
like which MMC controller and pins are used for SD/MMC, which ones
are used for SDIO-based WiFi, and so on.

I did a version completely from scratch using just the schematics:

  https://github.com/wens/linux/commits/bpi-m1-plus

Please ignore the last commit. It's just me playing with NTP support
for GPS time.


Regards
ChenYu


Re: [linux-sunxi] [PATCH] [V2] ARM: dts: sun7i: Add dts file for Bananapi M1 Plus board

2016-06-12 Thread Chen-Yu Tsai
On Fri, Jun 10, 2016 at 5:38 PM, Maxime Ripard
 wrote:
> On Thu, Jun 02, 2016 at 11:15:55AM +0200, Bernhard Nortmann wrote:
>> Am 02.06.2016 um 10:16 schrieb Maxime Ripard:
>> >[...]
>> >Yes, everything that is shared with the banana-pro (which, judging
>> >from Bernhard, is pretty much everything but a GPIO) should be merged
>> >in the banapro DT.
>> >
>> >Maxime
>> >
>>
>> Don't take my word for granted, as I do not own this hardware or know it
>> particularly well. There is no doubt that "BPi-M1+" and "Banana Pro" are
>> very similar, but if in doubt the information form the wiki should be
>> verified.
>
> H, ok. Chen-Yu, any input on this? You know the banana-pis much
> more than I do.

I did a comparison of the "Banana Pro" vs the "BPi-M1+".

The differences are similar to what we have with any other development
board, say the Cubietruck:

  - A different WiFi chip is used, and the BT part is not hooked up.
  - Different GPIOs for external power regulator/switches
  - Different GPIOs for LEDs
  - Different peripherals exposed on the headers.

IMO There's no need to merge or have a common .dtsi for the two boards.
They (and all the other development boards) look similar because
everyone is following the basic set by Allwinner's reference design,
like which MMC controller and pins are used for SD/MMC, which ones
are used for SDIO-based WiFi, and so on.

I did a version completely from scratch using just the schematics:

  https://github.com/wens/linux/commits/bpi-m1-plus

Please ignore the last commit. It's just me playing with NTP support
for GPS time.


Regards
ChenYu


Re: [PATCH 01/11] clk: imx: clk-pllv3: fix incorrect handle of enet powerdown bit

2016-06-12 Thread Shawn Guo
On Sun, Jun 12, 2016 at 10:56:38PM +0800, Dong Aisheng wrote:
> Hi Shawn,
> 
> On Wed, Jun 8, 2016 at 10:33 PM, Dong Aisheng  wrote:
> > After commit f53947456f98 ("ARM: clk: imx: update pllv3 to support imx7"),
> > the former used BM_PLL_POWER bit is not correct anymore for IMX7 ENET.
> > Instead, pll->powerdown holds the correct bit, so using powerdown bit
> > in clk_pllv3_{prepare | unprepare} functions.
> >
> > Fixes: f53947456f98 ("ARM: clk: imx: update pllv3 to support imx7")
> > Signed-off-by: Dong Aisheng 
> 
> Any comments about this one?

Sorry.  I thought I had applied it.  Applied it now, thanks.

Shawn


Re: [PATCH 01/11] clk: imx: clk-pllv3: fix incorrect handle of enet powerdown bit

2016-06-12 Thread Shawn Guo
On Sun, Jun 12, 2016 at 10:56:38PM +0800, Dong Aisheng wrote:
> Hi Shawn,
> 
> On Wed, Jun 8, 2016 at 10:33 PM, Dong Aisheng  wrote:
> > After commit f53947456f98 ("ARM: clk: imx: update pllv3 to support imx7"),
> > the former used BM_PLL_POWER bit is not correct anymore for IMX7 ENET.
> > Instead, pll->powerdown holds the correct bit, so using powerdown bit
> > in clk_pllv3_{prepare | unprepare} functions.
> >
> > Fixes: f53947456f98 ("ARM: clk: imx: update pllv3 to support imx7")
> > Signed-off-by: Dong Aisheng 
> 
> Any comments about this one?

Sorry.  I thought I had applied it.  Applied it now, thanks.

Shawn


Re: [PATCH V6 6/8] vfio, platform: make reset driver a requirement by default

2016-06-12 Thread Sinan Kaya
On 6/7/2016 3:59 PM, Auger Eric wrote:
>> -vfio_platform_get_reset(vdev);
>> > +  ret = vfio_platform_get_reset(vdev);
>> > +  if (ret && vdev->reset_required) {
>> > +  pr_err("vfio: no reset function found for device %s\n",
>> > + vdev->name);
>> > +  iommu_group_put(group);
>> > +  return ret;
> nit: in case you respin you can factorize the group put and return ret in a 
> goto label
> (since also used above).
> 
> Besides Reviewed-by: Eric Auger 

thanks, done. I'll respin with your request.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project


Re: [PATCH V6 6/8] vfio, platform: make reset driver a requirement by default

2016-06-12 Thread Sinan Kaya
On 6/7/2016 3:59 PM, Auger Eric wrote:
>> -vfio_platform_get_reset(vdev);
>> > +  ret = vfio_platform_get_reset(vdev);
>> > +  if (ret && vdev->reset_required) {
>> > +  pr_err("vfio: no reset function found for device %s\n",
>> > + vdev->name);
>> > +  iommu_group_put(group);
>> > +  return ret;
> nit: in case you respin you can factorize the group put and return ret in a 
> goto label
> (since also used above).
> 
> Besides Reviewed-by: Eric Auger 

thanks, done. I'll respin with your request.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project


Re: [very-RFC 6/8] Add TSN event-tracing

2016-06-12 Thread Steven Rostedt
On Sun, 12 Jun 2016 23:25:10 +0200
Henrik Austad  wrote:

> > > +#include 
> > > +#include 
> > > +/* #include  */
> > > +
> > > +/* FIXME: update to TRACE_CLASS to reduce overhead */  
> > 
> > I'm curious to why I didn't do this now. A class would make less
> > duplication of typing too ;-)  
> 
> Yeah, I found this in a really great article written by some tracing-dude, 
> I hear he talks really, really fast!

I plead the 5th!

> 
> https://lwn.net/Articles/381064/
> 
> > > +TRACE_EVENT(tsn_buffer_write,
> > > +
> > > + TP_PROTO(struct tsn_link *link,
> > > + size_t bytes),
> > > +
> > > + TP_ARGS(link, bytes),
> > > +
> > > + TP_STRUCT__entry(
> > > + __field(u64, stream_id)
> > > + __field(size_t, size)
> > > + __field(size_t, bsize)
> > > + __field(size_t, size_left)
> > > + __field(void *, buffer)
> > > + __field(void *, head)
> > > + __field(void *, tail)
> > > + __field(void *, end)
> > > + ),
> > > +
> > > + TP_fast_assign(
> > > + __entry->stream_id = link->stream_id;
> > > + __entry->size = bytes;
> > > + __entry->bsize = link->used_buffer_size;
> > > + __entry->size_left = (link->head - link->tail) % 
> > > link->used_buffer_size;  
> > 
> > Move this logic into the print statement, since you save head and tail.  
> 
> Ok, any particular reason?

Because it removes calculations during the trace. The calculations done
in TP_printk() are done at the time of reading the trace, and
calculations done in TP_fast_assign() are done during the recording and
hence adding more overhead to the trace itself.


> 
> > > + __entry->buffer = link->buffer;
> > > + __entry->head = link->head;
> > > + __entry->tail = link->tail;
> > > + __entry->end = link->end;
> > > + ),
> > > +
> > > + TP_printk("stream_id=%llu, copy=%zd, buffer: %zd, avail=%zd, 
> > > [buffer=%p, head=%p, tail=%p, end=%p]",
> > > + __entry->stream_id, __entry->size, __entry->bsize, 
> > > __entry->size_left,  
> > 
> >  __entry->stream_id, __entry->size, __entry->bsize,
> >  (__entry->head - __entry->tail) % __entry->bsize,
> >   
> 
> Ok, so is this about saving space by dropping one intermediate value, or is 
> it some other point I'm missing here?

Nope, just moving the overhead from the recording of the trace to the
reading of the trace.

> 
> > > + __entry->buffer,__entry->head, __entry->tail,  __entry->end)
> > > +
> > > + );
> > > +


> > > +
> > > + TP_fast_assign(
> > > + __entry->stream_id = link->stream_id;
> > > + __entry->vlan_tag = (skb_vlan_tag_present(skb) ? 
> > > skb_vlan_tag_get(skb) : 0);
> > > + __entry->bytes = bytes;
> > > + __entry->data_len = skb->data_len;
> > > + __entry->headlen = skb_headlen(skb);
> > > + __entry->protocol = ntohs(vlan_get_protocol(skb));  
> > 
> > Maybe it would be better to do the ntohs() in the TP_printk() as well.
> >   
> > > + __entry->prot_native = ntohs(skb->protocol);  
> > 
> > here too.
> >   
> > > + __entry->tx_idx = skb_get_queue_mapping(skb);
> > > +
> > > + __entry->mac_len = skb->mac_len;
> > > + __entry->hdr_len = skb->hdr_len;
> > > + __entry->vlan_tci = skb->vlan_tci;
> > > + __entry->mac_header = skb->mac_header;
> > > + __entry->tail = (unsigned int)skb->tail;
> > > + __entry->end  = (unsigned int)skb->end;
> > > + __entry->truesize = skb->truesize;
> > > + ),
> > > +
> > > + 
> > > TP_printk("stream_id=%llu,vlan_tag=0x%04x,data_size=%zd,data_len=%zd,headlen=%u,proto=0x%04x
> > >  
> > > (0x%04x),tx_idx=%d,mac_len=%u,hdr_len=%u,vlan_tci=0x%02x,mac_header=0x%02x,tail=%u,end=%u,truesize=%u",
> > > + __entry->stream_id,
> > > + __entry->vlan_tag,
> > > + __entry->bytes,
> > > + __entry->data_len,
> > > + __entry->headlen,
> > > + __entry->protocol,
> > > + __entry->prot_native, __entry->tx_idx,
> > > + __entry->mac_len,
> > > + __entry->hdr_len,
> > > + __entry->vlan_tci,
> > > + __entry->mac_header,  
> > 
> > Is this an ether mac header? If so we support %M. But as it's defined
> > as only u16, it doesn't seem like it can be.  
> 
> Actually, looking at the output, I'm not quite sure what it is that I 
> wanted to grab with that, the skb->mac_header should give an offset into 
> the header-area of skb, so it should be a constant offset from skb->head 
> (that is an actual pointer).
> 
> I *think* I wanted to make sure I updated things correctly so that the 
> offset didn't suddenly change, but the fact that I'm no longer sure 
> indicates that I should just drop that one. That whole printout is too long 
> anyway..
> 
> Thanks for pointing a finger at this!
> 
> 
> I'm still a bit stymied as to why logic should be in TP_printk() and not 
> TP_fast_assign(). Not that I really 

Re: [very-RFC 6/8] Add TSN event-tracing

2016-06-12 Thread Steven Rostedt
On Sun, 12 Jun 2016 23:25:10 +0200
Henrik Austad  wrote:

> > > +#include 
> > > +#include 
> > > +/* #include  */
> > > +
> > > +/* FIXME: update to TRACE_CLASS to reduce overhead */  
> > 
> > I'm curious to why I didn't do this now. A class would make less
> > duplication of typing too ;-)  
> 
> Yeah, I found this in a really great article written by some tracing-dude, 
> I hear he talks really, really fast!

I plead the 5th!

> 
> https://lwn.net/Articles/381064/
> 
> > > +TRACE_EVENT(tsn_buffer_write,
> > > +
> > > + TP_PROTO(struct tsn_link *link,
> > > + size_t bytes),
> > > +
> > > + TP_ARGS(link, bytes),
> > > +
> > > + TP_STRUCT__entry(
> > > + __field(u64, stream_id)
> > > + __field(size_t, size)
> > > + __field(size_t, bsize)
> > > + __field(size_t, size_left)
> > > + __field(void *, buffer)
> > > + __field(void *, head)
> > > + __field(void *, tail)
> > > + __field(void *, end)
> > > + ),
> > > +
> > > + TP_fast_assign(
> > > + __entry->stream_id = link->stream_id;
> > > + __entry->size = bytes;
> > > + __entry->bsize = link->used_buffer_size;
> > > + __entry->size_left = (link->head - link->tail) % 
> > > link->used_buffer_size;  
> > 
> > Move this logic into the print statement, since you save head and tail.  
> 
> Ok, any particular reason?

Because it removes calculations during the trace. The calculations done
in TP_printk() are done at the time of reading the trace, and
calculations done in TP_fast_assign() are done during the recording and
hence adding more overhead to the trace itself.


> 
> > > + __entry->buffer = link->buffer;
> > > + __entry->head = link->head;
> > > + __entry->tail = link->tail;
> > > + __entry->end = link->end;
> > > + ),
> > > +
> > > + TP_printk("stream_id=%llu, copy=%zd, buffer: %zd, avail=%zd, 
> > > [buffer=%p, head=%p, tail=%p, end=%p]",
> > > + __entry->stream_id, __entry->size, __entry->bsize, 
> > > __entry->size_left,  
> > 
> >  __entry->stream_id, __entry->size, __entry->bsize,
> >  (__entry->head - __entry->tail) % __entry->bsize,
> >   
> 
> Ok, so is this about saving space by dropping one intermediate value, or is 
> it some other point I'm missing here?

Nope, just moving the overhead from the recording of the trace to the
reading of the trace.

> 
> > > + __entry->buffer,__entry->head, __entry->tail,  __entry->end)
> > > +
> > > + );
> > > +


> > > +
> > > + TP_fast_assign(
> > > + __entry->stream_id = link->stream_id;
> > > + __entry->vlan_tag = (skb_vlan_tag_present(skb) ? 
> > > skb_vlan_tag_get(skb) : 0);
> > > + __entry->bytes = bytes;
> > > + __entry->data_len = skb->data_len;
> > > + __entry->headlen = skb_headlen(skb);
> > > + __entry->protocol = ntohs(vlan_get_protocol(skb));  
> > 
> > Maybe it would be better to do the ntohs() in the TP_printk() as well.
> >   
> > > + __entry->prot_native = ntohs(skb->protocol);  
> > 
> > here too.
> >   
> > > + __entry->tx_idx = skb_get_queue_mapping(skb);
> > > +
> > > + __entry->mac_len = skb->mac_len;
> > > + __entry->hdr_len = skb->hdr_len;
> > > + __entry->vlan_tci = skb->vlan_tci;
> > > + __entry->mac_header = skb->mac_header;
> > > + __entry->tail = (unsigned int)skb->tail;
> > > + __entry->end  = (unsigned int)skb->end;
> > > + __entry->truesize = skb->truesize;
> > > + ),
> > > +
> > > + 
> > > TP_printk("stream_id=%llu,vlan_tag=0x%04x,data_size=%zd,data_len=%zd,headlen=%u,proto=0x%04x
> > >  
> > > (0x%04x),tx_idx=%d,mac_len=%u,hdr_len=%u,vlan_tci=0x%02x,mac_header=0x%02x,tail=%u,end=%u,truesize=%u",
> > > + __entry->stream_id,
> > > + __entry->vlan_tag,
> > > + __entry->bytes,
> > > + __entry->data_len,
> > > + __entry->headlen,
> > > + __entry->protocol,
> > > + __entry->prot_native, __entry->tx_idx,
> > > + __entry->mac_len,
> > > + __entry->hdr_len,
> > > + __entry->vlan_tci,
> > > + __entry->mac_header,  
> > 
> > Is this an ether mac header? If so we support %M. But as it's defined
> > as only u16, it doesn't seem like it can be.  
> 
> Actually, looking at the output, I'm not quite sure what it is that I 
> wanted to grab with that, the skb->mac_header should give an offset into 
> the header-area of skb, so it should be a constant offset from skb->head 
> (that is an actual pointer).
> 
> I *think* I wanted to make sure I updated things correctly so that the 
> offset didn't suddenly change, but the fact that I'm no longer sure 
> indicates that I should just drop that one. That whole printout is too long 
> anyway..
> 
> Thanks for pointing a finger at this!
> 
> 
> I'm still a bit stymied as to why logic should be in TP_printk() and not 
> TP_fast_assign(). Not that I really have any 

  1   2   3   4   5   6   7   >