Re: [PATCH v4 1/2] fork: extend clone3() to support CLONE_SET_TID

2019-08-09 Thread Adrian Reber
On Sat, Aug 10, 2019 at 03:10:34AM +0200, Christian Brauner wrote:
> On Thu, Aug 08, 2019 at 11:22:21PM +0200, Adrian Reber wrote:
> > The main motivation to add set_tid to clone3() is CRIU.
> > 
> > To restore a process with the same PID/TID CRIU currently uses
> > /proc/sys/kernel/ns_last_pid. It writes the desired (PID - 1) to
> > ns_last_pid and then (quickly) does a clone(). This works most of the
> > time, but it is racy. It is also slow as it requires multiple syscalls.
> > 
> > Extending clone3() to support set_tid makes it possible restore a
> > process using CRIU without accessing /proc/sys/kernel/ns_last_pid and
> > race free (as long as the desired PID/TID is available).
> > 
> > This clone3() extension places the same restrictions (CAP_SYS_ADMIN)
> > on clone3() with set_tid as they are currently in place for ns_last_pid.
> > 
> > Signed-off-by: Adrian Reber 
> > ---
> > v2:
> >  - Removed (size < sizeof(struct clone_args)) as discussed with
> >Christian and Dmitry
> >  - Added comment to ((set_tid != 1) && idr_get_cursor() <= 1) (Oleg)
> >  - Use idr_alloc() instead of idr_alloc_cyclic() (Oleg)
> > 
> > v3:
> >  - Return EEXIST if PID is already in use (Christian)
> >  - Drop CLONE_SET_TID (Christian and Oleg)
> >  - Use idr_is_empty() instead of idr_get_cursor() (Oleg)
> >  - Handle different `struct clone_args` sizes (Dmitry)
> > 
> > v4:
> >  - Rework struct size check with defines (Christian)
> >  - Reduce number of set_tid checks (Oleg)
> >  - Less parentheses and more robust code (Oleg)
> >  - Do ns_capable() on correct user_ns (Oleg, Christian)
> > ---
> >  include/linux/pid.h|  2 +-
> >  include/linux/sched/task.h |  1 +
> >  include/uapi/linux/sched.h |  1 +
> >  kernel/fork.c  | 25 +++--
> >  kernel/pid.c   | 34 +++---
> >  5 files changed, 53 insertions(+), 10 deletions(-)
> > 
> > diff --git a/include/linux/pid.h b/include/linux/pid.h
> > index 2a83e434db9d..052000db0ced 100644
> > --- a/include/linux/pid.h
> > +++ b/include/linux/pid.h
> > @@ -116,7 +116,7 @@ extern struct pid *find_vpid(int nr);
> >  extern struct pid *find_get_pid(int nr);
> >  extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
> >  
> > -extern struct pid *alloc_pid(struct pid_namespace *ns);
> > +extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t set_tid);
> >  extern void free_pid(struct pid *pid);
> >  extern void disable_pid_allocation(struct pid_namespace *ns);
> >  
> > diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
> > index 0497091e40c1..4f2a80564332 100644
> > --- a/include/linux/sched/task.h
> > +++ b/include/linux/sched/task.h
> > @@ -26,6 +26,7 @@ struct kernel_clone_args {
> > unsigned long stack;
> > unsigned long stack_size;
> > unsigned long tls;
> > +   pid_t set_tid;
> >  };
> >  
> >  /*
> > diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
> > index b3105ac1381a..e1ce103a2c47 100644
> > --- a/include/uapi/linux/sched.h
> > +++ b/include/uapi/linux/sched.h
> > @@ -45,6 +45,7 @@ struct clone_args {
> > __aligned_u64 stack;
> > __aligned_u64 stack_size;
> > __aligned_u64 tls;
> > +   __aligned_u64 set_tid;
> >  };
> >  
> >  /*
> > diff --git a/kernel/fork.c b/kernel/fork.c
> > index 2852d0e76ea3..2a03f0e201e9 100644
> > --- a/kernel/fork.c
> > +++ b/kernel/fork.c
> > @@ -117,6 +117,13 @@
> >   */
> >  #define MAX_THREADS FUTEX_TID_MASK
> >  
> > +/*
> > + * Different sizes of struct clone_args
> > + */
> > +#define CLONE3_ARGS_SIZE_V0 64
> > +/* V1 includes set_tid */
> > +#define CLONE3_ARGS_SIZE_V1 72
> > +
> >  /*
> >   * Protected counters by write_lock_irq(_lock)
> >   */
> > @@ -2031,7 +2038,13 @@ static __latent_entropy struct task_struct 
> > *copy_process(
> > stackleak_task_init(p);
> >  
> > if (pid != _struct_pid) {
> > -   pid = alloc_pid(p->nsproxy->pid_ns_for_children);
> > +   if (args->set_tid && !ns_capable(
> > +   p->nsproxy->pid_ns_for_children->user_ns,
> > +   CAP_SYS_ADMIN)) {
> > +   retval = -EPERM;
> > +   goto bad_fork_cleanup_thread;
> > +   }
> > +   pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid);
> > if (IS_ERR(pid)) {
> > retval = PTR_ERR(pid);
> > goto bad_fork_cleanup_thread;
> > @@ -2535,9 +2548,14 @@ noinline static int copy_clone_args_from_user(struct 
> > kernel_clone_args *kargs,
> > if (unlikely(size > PAGE_SIZE))
> > return -E2BIG;
> >  
> > -   if (unlikely(size < sizeof(struct clone_args)))
> > +   /* The struct needs to be at least the size of the original struct. */
> 
> I don't think you need that comment. I think the macro is pretty
> self-explanatory. If you want it to be even clearer you could even make
> it CLONE3_ARGS_SIZE_MIN but V0 is good enough. :)

Will remove the 

Re: [RFC PATCH v4 9/9] printk: use a new ringbuffer implementation

2019-08-09 Thread Thomas Gleixner
On Fri, 9 Aug 2019, Linus Torvalds wrote:
> On Thu, Aug 8, 2019 at 11:14 PM Peter Zijlstra  wrote:
> > Note that you can hook this into printk as a fake early serial device;
> > just have the serial device write to the DRAM buffer.
> 
> No, you really really can't.
...
> Even the "early console" stuff tries to honor serialization by
> console_lock and console_suspended etc. Or things like the "I'm in the
> middle of the scheduler, so I won't be doing any real logging".

If you think of it as the classic console you are right. What Peter has in
mind is the extra stuff on top of this buffer patchset, which implements
emergency write to consoles. That's an extra callback in the console
struct, which can be invoked in such situations igoring context and console
lock completely.

Right now we have an implementation for serial only, but that already is
useful. I nicely got (minimaly garbled) crash dumps out of an NMI
handler. With the current mainline console code the machine just hung.

So with this scheme we actually could hook your smart buffer into the
console stuff and still achieve what you want.

Thanks,

tglx


[PATCH v5 13/18] thermal: sun8i: add thermal driver for A64

2019-08-09 Thread Yangtao Li
From: Vasily Khoruzhick 

Thermal sensor controller in A64 is similar to H3, but it has 3 sensors.
Extend H3 functions to add support for multiple sensors.

Signed-off-by: Vasily Khoruzhick 
---
 drivers/thermal/sun8i_thermal.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index 41ce8cdc0546..3259081da841 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -515,6 +515,17 @@ static const struct ths_thermal_chip sun8i_h3_ths = {
.irq_ack = sun8i_h3_irq_ack,
 };
 
+static const struct ths_thermal_chip sun50i_a64_ths = {
+   .sensor_num = 3,
+   .offset = -2170,
+   .scale = -117,
+   .has_mod_clk = true,
+   .temp_data_base = SUN8I_THS_TEMP_DATA,
+   .calibrate = sun8i_h3_ths_calibrate,
+   .init = sun8i_h3_thermal_init,
+   .irq_ack = sun8i_h3_irq_ack,
+};
+
 static const struct ths_thermal_chip sun50i_h6_ths = {
.sensor_num = 2,
.offset = -2794,
@@ -528,6 +539,7 @@ static const struct ths_thermal_chip sun50i_h6_ths = {
 
 static const struct of_device_id of_ths_match[] = {
{ .compatible = "allwinner,sun8i-h3-ths", .data = _h3_ths },
+   { .compatible = "allwinner,sun50i-a64-ths", .data = _a64_ths },
{ .compatible = "allwinner,sun50i-h6-ths", .data = _h6_ths },
{ /* sentinel */ },
 };
-- 
2.17.1



[PATCH v5 18/18] thermal: sun8i: add support for Allwinner R40 thermal sensor

2019-08-09 Thread Yangtao Li
From: Icenowy Zheng 

The thermal sensor in Allwinner R40 SoC is quite similar to the one in
Allwinner A64 SoC, with only slightly different temperature calculation
formula.

Signed-off-by: Icenowy Zheng 
---
 drivers/thermal/sun8i_thermal.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index 78a888d85cba..0de9a56c3775 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -529,6 +529,17 @@ static const struct ths_thermal_chip sun8i_h3_ths = {
.irq_ack = sun8i_h3_irq_ack,
 };
 
+static const struct ths_thermal_chip sun8i_r40_ths = {
+   .sensor_num = 3,
+   .offset = -,
+   .scale = -113,
+   .has_mod_clk = true,
+   .temp_data_base = SUN8I_THS_TEMP_DATA,
+   .calibrate = sun8i_h3_ths_calibrate,
+   .init = sun8i_h3_thermal_init,
+   .irq_ack = sun8i_h3_irq_ack,
+};
+
 static const struct ths_thermal_chip sun50i_a64_ths = {
.sensor_num = 3,
.offset = -2170,
@@ -563,6 +574,7 @@ static const struct ths_thermal_chip sun50i_h6_ths = {
 
 static const struct of_device_id of_ths_match[] = {
{ .compatible = "allwinner,sun8i-h3-ths", .data = _h3_ths },
+   { .compatible = "allwinner,sun8i-r40-ths", .data = _r40_ths },
{ .compatible = "allwinner,sun50i-a64-ths", .data = _a64_ths },
{ .compatible = "allwinner,sun50i-h5-ths", .data = _h5_ths },
{ .compatible = "allwinner,sun50i-h6-ths", .data = _h6_ths },
-- 
2.17.1



[PATCH v5 17/18] dt-bindings: thermal: add binding document for r40 thermal controller

2019-08-09 Thread Yangtao Li
This patch adds binding document for allwinner r40 thermal controller.

Signed-off-by: Yangtao Li 
---
 Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml 
b/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
index 2e28f5b33d33..28c438d3bfea 100644
--- a/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
@@ -17,6 +17,7 @@ properties:
   compatible:
 enum:
   - allwinner,sun8i-h3-ths
+  - allwinner,sun8i-r40-ths
   - allwinner,sun50i-a64-ths
   - allwinner,sun50i-h5-ths
   - allwinner,sun50i-h6-ths
-- 
2.17.1



[PATCH v5 16/18] thermal: sun8i: add support for Allwinner H5 thermal sensor

2019-08-09 Thread Yangtao Li
From: Icenowy Zheng 

The thermal sensor in Allwinner H5 has 2 sensors, and they have a
special segmented temperature calculation formula.

Add support for this thermal sensor.

Signed-off-by: Icenowy Zheng 
---
 drivers/thermal/sun8i_thermal.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index a761e2afda08..78a888d85cba 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -99,6 +99,16 @@ static int sun8i_ths_reg2temp(struct ths_device *tmdev,
return (reg + tmdev->chip->offset) * tmdev->chip->scale;
 }
 
+static int sun50i_h5_calc_temp(int id, int reg)
+{
+   if (reg >= 0x500)
+   return -1191 * reg / 10 + 223000;
+   else if (!id)
+   return -1452 * reg / 10 + 259000;
+   else
+   return -1590 * reg / 10 + 276000;
+}
+
 static int sun8i_ths_get_temp(void *data, int *temp)
 {
struct tsensor *s = data;
@@ -530,6 +540,16 @@ static const struct ths_thermal_chip sun50i_a64_ths = {
.irq_ack = sun8i_h3_irq_ack,
 };
 
+static const struct ths_thermal_chip sun50i_h5_ths = {
+   .sensor_num = 2,
+   .has_mod_clk = true,
+   .temp_data_base = SUN8I_THS_TEMP_DATA,
+   .calibrate = sun8i_h3_ths_calibrate,
+   .init = sun8i_h3_thermal_init,
+   .irq_ack = sun8i_h3_irq_ack,
+   .calc_temp = sun50i_h5_calc_temp,
+};
+
 static const struct ths_thermal_chip sun50i_h6_ths = {
.sensor_num = 2,
.offset = -2794,
@@ -544,6 +564,7 @@ static const struct ths_thermal_chip sun50i_h6_ths = {
 static const struct of_device_id of_ths_match[] = {
{ .compatible = "allwinner,sun8i-h3-ths", .data = _h3_ths },
{ .compatible = "allwinner,sun50i-a64-ths", .data = _a64_ths },
+   { .compatible = "allwinner,sun50i-h5-ths", .data = _h5_ths },
{ .compatible = "allwinner,sun50i-h6-ths", .data = _h6_ths },
{ /* sentinel */ },
 };
-- 
2.17.1



[PATCH v5 03/18] thermal: fix indentation in makefile

2019-08-09 Thread Yangtao Li
To unify code style.

Signed-off-by: Yangtao Li 
---
 drivers/thermal/Makefile | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index fa6f8b206281..d7eafb5ef8ef 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -5,7 +5,7 @@
 
 obj-$(CONFIG_THERMAL)  += thermal_sys.o
 thermal_sys-y  += thermal_core.o thermal_sysfs.o \
-   thermal_helpers.o
+  thermal_helpers.o
 
 # interface to/from other layers providing sensors
 thermal_sys-$(CONFIG_THERMAL_HWMON)+= thermal_hwmon.o
@@ -25,11 +25,11 @@ thermal_sys-$(CONFIG_CPU_THERMAL)   += cpu_cooling.o
 thermal_sys-$(CONFIG_CLOCK_THERMAL)+= clock_cooling.o
 
 # devfreq cooling
-thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
+thermal_sys-$(CONFIG_DEVFREQ_THERMAL)  += devfreq_cooling.o
 
 # platform thermal drivers
 obj-y  += broadcom/
-obj-$(CONFIG_THERMAL_MMIO) += thermal_mmio.o
+obj-$(CONFIG_THERMAL_MMIO) += thermal_mmio.o
 obj-$(CONFIG_SPEAR_THERMAL)+= spear_thermal.o
 obj-$(CONFIG_SUN8I_THERMAL) += sun8i_thermal.o
 obj-$(CONFIG_ROCKCHIP_THERMAL) += rockchip_thermal.o
@@ -50,7 +50,7 @@ obj-$(CONFIG_TI_SOC_THERMAL)  += ti-soc-thermal/
 obj-y  += st/
 obj-$(CONFIG_QCOM_TSENS)   += qcom/
 obj-y  += tegra/
-obj-$(CONFIG_HISI_THERMAL) += hisi_thermal.o
+obj-$(CONFIG_HISI_THERMAL) += hisi_thermal.o
 obj-$(CONFIG_MTK_THERMAL)  += mtk_thermal.o
 obj-$(CONFIG_GENERIC_ADC_THERMAL)  += thermal-generic-adc.o
 obj-$(CONFIG_ZX2967_THERMAL)   += zx2967_thermal.o
-- 
2.17.1



[PATCH v5 12/18] dt-bindings: thermal: add binding document for a64 thermal controller

2019-08-09 Thread Yangtao Li
This patch adds binding document for allwinner a64 thermal controller.

Signed-off-by: Yangtao Li 
---
 Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml 
b/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
index 6624cf6b1ce8..f935b4fab8ec 100644
--- a/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
@@ -17,6 +17,7 @@ properties:
   compatible:
 enum:
   - allwinner,sun8i-h3-ths
+  - allwinner,sun50i-a64-ths
   - allwinner,sun50i-h6-ths
 
   reg:
-- 
2.17.1



[PATCH v5 08/18] thermal: sun8i: support mod clocks

2019-08-09 Thread Yangtao Li
H3 has extra clock, so introduce something in ths_thermal_chip/ths_device
and adds the process of the clock.

This is pre-work for supprt it.

Signed-off-by: Yangtao Li 
---
 drivers/thermal/sun8i_thermal.c | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index b934bc81eba7..6f4294c2aba7 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -54,6 +54,7 @@ struct tsensor {
 };
 
 struct ths_thermal_chip {
+   boolhas_mod_clk;
int sensor_num;
int offset;
int scale;
@@ -69,6 +70,7 @@ struct ths_device {
struct regmap   *regmap;
struct reset_control*reset;
struct clk  *bus_clk;
+   struct clk  *mod_clk;
struct tsensor  sensor[MAX_SENSOR_NUM];
 };
 
@@ -274,6 +276,12 @@ static int sun8i_ths_resource_init(struct ths_device 
*tmdev)
if (IS_ERR(tmdev->bus_clk))
return PTR_ERR(tmdev->bus_clk);
 
+   if (tmdev->chip->has_mod_clk) {
+   tmdev->mod_clk = devm_clk_get(>dev, "mod");
+   if (IS_ERR(tmdev->mod_clk))
+   return PTR_ERR(tmdev->mod_clk);
+   }
+
ret = reset_control_deassert(tmdev->reset);
if (ret)
return ret;
@@ -282,12 +290,18 @@ static int sun8i_ths_resource_init(struct ths_device 
*tmdev)
if (ret)
goto assert_reset;
 
-   ret = sun50i_ths_calibrate(tmdev);
+   ret = clk_prepare_enable(tmdev->mod_clk);
if (ret)
goto bus_disable;
 
+   ret = sun50i_ths_calibrate(tmdev);
+   if (ret)
+   goto mod_disable;
+
return 0;
 
+mod_disable:
+   clk_disable_unprepare(tmdev->mod_clk);
 bus_disable:
clk_disable_unprepare(tmdev->bus_clk);
 assert_reset:
@@ -395,6 +409,7 @@ static int sun8i_ths_remove(struct platform_device *pdev)
 {
struct ths_device *tmdev = platform_get_drvdata(pdev);
 
+   clk_disable_unprepare(tmdev->mod_clk);
clk_disable_unprepare(tmdev->bus_clk);
reset_control_assert(tmdev->reset);
 
-- 
2.17.1



[PATCH v5 09/18] thermal: sun8i: rework for ths calibrate func

2019-08-09 Thread Yangtao Li
Here, we do something to prepare for the subsequent
support of multiple platforms.

1) rename sun50i_ths_calibrate to sun8i_ths_calibrate, because
   this function should be suitable for all platforms now.

2) introduce calibrate callback to mask calibration method
   differences.

Signed-off-by: Yangtao Li 
---
 drivers/thermal/sun8i_thermal.c | 86 ++---
 1 file changed, 48 insertions(+), 38 deletions(-)

diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index 6f4294c2aba7..47c20c4c69e7 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -60,6 +60,8 @@ struct ths_thermal_chip {
int scale;
int ft_deviation;
int temp_data_base;
+   int (*calibrate)(struct ths_device *tmdev,
+u16 *caldata, int callen);
int (*init)(struct ths_device *tmdev);
int (*irq_ack)(struct ths_device *tmdev);
 };
@@ -152,45 +154,14 @@ static irqreturn_t sun8i_irq_thread(int irq, void *data)
return IRQ_HANDLED;
 }
 
-static int sun50i_ths_calibrate(struct ths_device *tmdev)
+static int sun50i_h6_ths_calibrate(struct ths_device *tmdev,
+  u16 *caldata, int callen)
 {
-   struct nvmem_cell *calcell;
struct device *dev = tmdev->dev;
-   u16 *caldata;
-   size_t callen;
-   int ft_temp;
-   int i, ret = 0;
-
-   calcell = devm_nvmem_cell_get(dev, "calib");
-   if (IS_ERR(calcell)) {
-   if (PTR_ERR(calcell) == -EPROBE_DEFER)
-   return -EPROBE_DEFER;
-   /*
-* Even if the external calibration data stored in sid is
-* not accessible, the THS hardware can still work, although
-* the data won't be so accurate.
-*
-* The default value of calibration register is 0x800 for
-* every sensor, and the calibration value is usually 0x7xx
-* or 0x8xx, so they won't be away from the default value
-* for a lot.
-*
-* So here we do not return error if the calibartion data is
-* not available, except the probe needs deferring.
-*/
-   goto out;
-   }
+   int i, ft_temp;
 
-   caldata = nvmem_cell_read(calcell, );
-   if (IS_ERR(caldata)) {
-   ret = PTR_ERR(caldata);
-   goto out;
-   }
-
-   if (!caldata[0] || callen < 2 + 2 * tmdev->chip->sensor_num) {
-   ret = -EINVAL;
-   goto out_free;
-   }
+   if (!caldata[0] || callen < 2 + 2 * tmdev->chip->sensor_num)
+   return -EINVAL;
 
/*
 * efuse layout:
@@ -245,7 +216,45 @@ static int sun50i_ths_calibrate(struct ths_device *tmdev)
   cdata << offset);
}
 
-out_free:
+   return 0;
+}
+
+static int sun8i_ths_calibrate(struct ths_device *tmdev)
+{
+   struct nvmem_cell *calcell;
+   struct device *dev = tmdev->dev;
+   u16 *caldata;
+   size_t callen;
+   int ret = 0;
+
+   calcell = devm_nvmem_cell_get(dev, "calib");
+   if (IS_ERR(calcell)) {
+   if (PTR_ERR(calcell) == -EPROBE_DEFER)
+   return -EPROBE_DEFER;
+   /*
+* Even if the external calibration data stored in sid is
+* not accessible, the THS hardware can still work, although
+* the data won't be so accurate.
+*
+* The default value of calibration register is 0x800 for
+* every sensor, and the calibration value is usually 0x7xx
+* or 0x8xx, so they won't be away from the default value
+* for a lot.
+*
+* So here we do not return error if the calibartion data is
+* not available, except the probe needs deferring.
+*/
+   goto out;
+   }
+
+   caldata = nvmem_cell_read(calcell, );
+   if (IS_ERR(caldata)) {
+   ret = PTR_ERR(caldata);
+   goto out;
+   }
+
+   tmdev->chip->calibrate(tmdev, caldata, callen);
+
kfree(caldata);
 out:
return ret;
@@ -294,7 +303,7 @@ static int sun8i_ths_resource_init(struct ths_device *tmdev)
if (ret)
goto bus_disable;
 
-   ret = sun50i_ths_calibrate(tmdev);
+   ret = sun8i_ths_calibrate(tmdev);
if (ret)
goto mod_disable;
 
@@ -422,6 +431,7 @@ static const struct ths_thermal_chip sun50i_h6_ths = {
.scale = -67,
.ft_deviation = SUN50I_H6_FT_DEVIATION,
.temp_data_base = SUN50I_H6_THS_TEMP_DATA,
+   .calibrate = sun50i_h6_ths_calibrate,
.init = sun50i_h6_thermal_init,
.irq_ack = 

[PATCH v5 10/18] dt-bindings: thermal: add binding document for h3 thermal controller

2019-08-09 Thread Yangtao Li
This patch adds binding document for allwinner h3 thermal controller.

Signed-off-by: Yangtao Li 
---
 .../bindings/thermal/sun8i-thermal.yaml   | 81 ++-
 1 file changed, 78 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml 
b/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
index e0973199ba3c..6624cf6b1ce8 100644
--- a/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
@@ -16,6 +16,7 @@ description: |-
 properties:
   compatible:
 enum:
+  - allwinner,sun8i-h3-ths
   - allwinner,sun50i-h6-ths
 
   reg:
@@ -28,13 +29,21 @@ properties:
 maxItems: 1
 
   clocks:
-maxItems: 1
+minItems: 1
+maxItems: 2
+items:
+  - description: ths bus clock
+  - description: ths mod clock
 
   clock-names:
-const: bus
+minItems: 1
+maxItems: 2
+items:
+  - const: bus
+  - const: mod
 
   "#thermal-sensor-cells":
-const: 1
+enum: [ 0, 1 ]
 
   nvmem-cells:
 description: ths calibrate data
@@ -51,9 +60,75 @@ required:
   - interrupts
   - "#thermal-sensor-cells"
 
+allOf:
+  - if:
+  properties:
+compatible:
+  contains:
+const: allwinner,sun50i-h6-ths
+
+then:
+  properties:
+clocks:
+  minItems: 1
+  maxItems: 1
+
+clock-names:
+  minItems: 1
+  maxItems: 1
+
+else:
+  properties:
+clocks:
+  minItems: 2
+  maxItems: 2
+
+clock-names:
+  minItems: 2
+  maxItems: 2
+
+  - if:
+  properties:
+compatible:
+  contains:
+const: allwinner,sun8i-h3-ths
+
+then:
+  properties:
+"#thermal-sensor-cells":
+  const: 0
+
+else:
+  properties:
+"#thermal-sensor-cells":
+  const: 1
+
 additionalProperties: false
 
 examples:
+  - |
+ths: ths@1c25000 {
+compatible = "allwinner,sun8i-h3-ths";
+reg = <0x01c25000 0x400>;
+clocks = < CLK_BUS_THS>, < CLK_THS>;
+clock-names = "bus", "mod";
+resets = < RST_BUS_THS>;
+interrupts = ;
+nvmem-cells = <_calib>;
+nvmem-cell-names = "calib";
+#thermal-sensor-cells = <0>;
+};
+
+sid: sid@1c14000 {
+compatible = "allwinner,sun8i-h3-sid";
+reg = <0x1c14000 0x400>;
+#address-cells = <1>;
+#size-cells = <1>;
+tsen_calib: calib@34 {
+reg = <0x34 2>;
+};
+};
+
   - |
 ths: ths@5070400 {
 compatible = "allwinner,sun50i-h6-ths";
-- 
2.17.1



[PATCH v5 06/18] thermal: sun8i: get ths init func from device compatible

2019-08-09 Thread Yangtao Li
There are some differences in register initialization for
different socs. So we get different initialization functions
from device compatible.

Signed-off-by: Yangtao Li 
---
 drivers/thermal/sun8i_thermal.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index f338fa25b98e..ad877b54f58e 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -59,6 +59,7 @@ struct ths_thermal_chip {
int scale;
int ft_deviation;
int temp_data_base;
+   int (*init)(struct ths_device *tmdev);
 };
 
 struct ths_device {
@@ -356,7 +357,7 @@ static int sun8i_ths_probe(struct platform_device *pdev)
if (irq < 0)
return irq;
 
-   ret = sun50i_h6_thermal_init(tmdev);
+   ret = tmdev->chip->init(tmdev);
if (ret)
return ret;
 
@@ -394,6 +395,7 @@ static const struct ths_thermal_chip sun50i_h6_ths = {
.scale = -67,
.ft_deviation = SUN50I_H6_FT_DEVIATION,
.temp_data_base = SUN50I_H6_THS_TEMP_DATA,
+   .init = sun50i_h6_thermal_init,
 };
 
 static const struct of_device_id of_ths_match[] = {
-- 
2.17.1



[PATCH v5 15/18] thermal: sun8i: allow to use custom temperature calculation function

2019-08-09 Thread Yangtao Li
From: Icenowy Zheng 

The H5 temperature calculation function is strange. Firstly, it's
segmented. Secondly, the formula of two sensors are different in the
second segment.

Allow to use a custom temperature calculation function, in case of
the function is complex.

Signed-off-by: Icenowy Zheng 
---
 drivers/thermal/sun8i_thermal.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index 3259081da841..a761e2afda08 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -76,6 +76,7 @@ struct ths_thermal_chip {
 u16 *caldata, int callen);
int (*init)(struct ths_device *tmdev);
int (*irq_ack)(struct ths_device *tmdev);
+   int (*calc_temp)(int id, int reg);
 };
 
 struct ths_device {
@@ -90,9 +91,12 @@ struct ths_device {
 
 /* Temp Unit: millidegree Celsius */
 static int sun8i_ths_reg2temp(struct ths_device *tmdev,
- int reg)
+ int id, int reg)
 {
-   return (reg + tmdev->chip->offset) * tmdev->chip->scale;
+   if (tmdev->chip->calc_temp)
+   return tmdev->chip->calc_temp(id, reg);
+   else
+   return (reg + tmdev->chip->offset) * tmdev->chip->scale;
 }
 
 static int sun8i_ths_get_temp(void *data, int *temp)
@@ -108,7 +112,7 @@ static int sun8i_ths_get_temp(void *data, int *temp)
if (!val)
return -EAGAIN;
 
-   *temp = sun8i_ths_reg2temp(tmdev, val);
+   *temp = sun8i_ths_reg2temp(tmdev, s->id, val);
/*
 * XX - According to the original sdk, there are some platforms(rarely)
 * that add a fixed offset value after calculating the temperature
@@ -232,7 +236,7 @@ static int sun50i_h6_ths_calibrate(struct ths_device *tmdev,
 
for (i = 0; i < tmdev->chip->sensor_num; i++) {
int reg = (int)caldata[i + 1];
-   int sensor_temp = sun8i_ths_reg2temp(tmdev, reg);
+   int sensor_temp = sun8i_ths_reg2temp(tmdev, i, reg);
int delta, cdata, offset;
 
/*
-- 
2.17.1



[PATCH v5 04/18] thermal: sun8i: get ths sensor number from device compatible

2019-08-09 Thread Yangtao Li
For different socs, the number of ths sensors is different.
So we need to do some work in order to support more soc.

Signed-off-by: Yangtao Li 
---
 drivers/thermal/sun8i_thermal.c | 28 
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index 2ce36fa3fec3..e9c2acbaac74 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -22,7 +22,6 @@
 
 #define MAX_SENSOR_NUM 4
 
-#define SUN50I_H6_SENSOR_NUM   2
 #define SUN50I_H6_OFFSET   -2794
 #define SUN50I_H6_SCALE-67
 
@@ -57,7 +56,12 @@ struct tsensor {
int id;
 };
 
+struct ths_thermal_chip {
+   int sensor_num;
+};
+
 struct ths_device {
+   const struct ths_thermal_chip   *chip;
struct device   *dev;
struct regmap   *regmap;
struct reset_control*reset;
@@ -117,7 +121,7 @@ static irqreturn_t sun50i_h6_irq_thread(int irq, void *data)
 
regmap_read(tmdev->regmap, SUN50I_H6_THS_DIS, );
 
-   for (i = 0; i < SUN50I_H6_SENSOR_NUM; i++) {
+   for (i = 0; i < tmdev->chip->sensor_num; i++) {
 
if (state & SUN50I_H6_THS_DATA_IRQ_STS(i)) {
/* clear data irq pending */
@@ -167,7 +171,7 @@ static int sun50i_ths_calibrate(struct ths_device *tmdev)
goto out;
}
 
-   if (!caldata[0] || callen < 2 + 2 * SUN50I_H6_SENSOR_NUM) {
+   if (!caldata[0] || callen < 2 + 2 * tmdev->chip->sensor_num) {
ret = -EINVAL;
goto out_free;
}
@@ -190,7 +194,7 @@ static int sun50i_ths_calibrate(struct ths_device *tmdev)
 */
ft_temp = caldata[0] & FT_TEMP_MASK;
 
-   for (i = 0; i < SUN50I_H6_SENSOR_NUM; i++) {
+   for (i = 0; i < tmdev->chip->sensor_num; i++) {
int reg = (int)caldata[i + 1];
int sensor_temp = sun8i_ths_reg2temp(tmdev, reg);
int delta, cdata, offset;
@@ -297,10 +301,10 @@ static int sun50i_h6_thermal_init(struct ths_device 
*tmdev)
regmap_write(tmdev->regmap, SUN50I_H6_THS_PC,
 SUN50I_H6_THS_PC_TEMP_PERIOD(58));
/* enable sensor */
-   val = GENMASK(SUN50I_H6_SENSOR_NUM - 1, 0);
+   val = GENMASK(tmdev->chip->sensor_num - 1, 0);
regmap_write(tmdev->regmap, SUN50I_H6_THS_ENABLE, val);
/* thermal data interrupt enable */
-   val = GENMASK(SUN50I_H6_SENSOR_NUM - 1, 0);
+   val = GENMASK(tmdev->chip->sensor_num - 1, 0);
regmap_write(tmdev->regmap, SUN50I_H6_THS_DIC, val);
 
return 0;
@@ -311,7 +315,7 @@ static int sun8i_ths_register(struct ths_device *tmdev)
struct thermal_zone_device *tzd;
int i;
 
-   for (i = 0; i < SUN50I_H6_SENSOR_NUM; i++) {
+   for (i = 0; i < tmdev->chip->sensor_num; i++) {
tmdev->sensor[i].tmdev = tmdev;
tmdev->sensor[i].id = i;
tmdev->sensor[i].tzd =
@@ -337,6 +341,10 @@ static int sun8i_ths_probe(struct platform_device *pdev)
return -ENOMEM;
 
tmdev->dev = dev;
+   tmdev->chip = of_device_get_match_data(>dev);
+   if (!tmdev->chip)
+   return -EINVAL;
+
platform_set_drvdata(pdev, tmdev);
 
ret = sun8i_ths_resource_init(tmdev);
@@ -379,8 +387,12 @@ static int sun8i_ths_remove(struct platform_device *pdev)
return 0;
 }
 
+static const struct ths_thermal_chip sun50i_h6_ths = {
+   .sensor_num = 2,
+};
+
 static const struct of_device_id of_ths_match[] = {
-   { .compatible = "allwinner,sun50i-h6-ths"},
+   { .compatible = "allwinner,sun50i-h6-ths", .data = _h6_ths },
{ /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, of_ths_match);
-- 
2.17.1



[PATCH v5 14/18] dt-bindings: thermal: add binding document for h5 thermal controller

2019-08-09 Thread Yangtao Li
This patch adds binding document for allwinner h5 thermal controller.

Signed-off-by: Yangtao Li 
---
 Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml 
b/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
index f935b4fab8ec..2e28f5b33d33 100644
--- a/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
@@ -18,6 +18,7 @@ properties:
 enum:
   - allwinner,sun8i-h3-ths
   - allwinner,sun50i-a64-ths
+  - allwinner,sun50i-h5-ths
   - allwinner,sun50i-h6-ths
 
   reg:
-- 
2.17.1



[PATCH v5 13/18] thermal: sun8i: add thermal driver for A64

2019-08-09 Thread Yangtao Li
From: Vasily Khoruzhick 

Thermal sensor controller in A64 is similar to H3, but it has 3 sensors.
Extend H3 functions to add support for multiple sensors.

Signed-off-by: Vasily Khoruzhick 
---
 drivers/thermal/sun8i_thermal.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index 41ce8cdc0546..3259081da841 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -515,6 +515,17 @@ static const struct ths_thermal_chip sun8i_h3_ths = {
.irq_ack = sun8i_h3_irq_ack,
 };
 
+static const struct ths_thermal_chip sun50i_a64_ths = {
+   .sensor_num = 3,
+   .offset = -2170,
+   .scale = -117,
+   .has_mod_clk = true,
+   .temp_data_base = SUN8I_THS_TEMP_DATA,
+   .calibrate = sun8i_h3_ths_calibrate,
+   .init = sun8i_h3_thermal_init,
+   .irq_ack = sun8i_h3_irq_ack,
+};
+
 static const struct ths_thermal_chip sun50i_h6_ths = {
.sensor_num = 2,
.offset = -2794,
@@ -528,6 +539,7 @@ static const struct ths_thermal_chip sun50i_h6_ths = {
 
 static const struct of_device_id of_ths_match[] = {
{ .compatible = "allwinner,sun8i-h3-ths", .data = _h3_ths },
+   { .compatible = "allwinner,sun50i-a64-ths", .data = _a64_ths },
{ .compatible = "allwinner,sun50i-h6-ths", .data = _h6_ths },
{ /* sentinel */ },
 };
-- 
2.17.1



[PATCH v5 11/18] thermal: sun8i: add thermal driver for h3

2019-08-09 Thread Yangtao Li
This patch adds the support for allwinner h3 thermal sensor.

Signed-off-by: Yangtao Li 
---
 drivers/thermal/sun8i_thermal.c | 91 +
 1 file changed, 91 insertions(+)

diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index 47c20c4c69e7..41ce8cdc0546 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -27,6 +27,14 @@
 #define TEMP_TO_REG672
 #define CALIBRATE_DEFAULT  0x800
 
+#define SUN8I_THS_CTRL00x00
+#define SUN8I_THS_CTRL20x40
+#define SUN8I_THS_IC   0x44
+#define SUN8I_THS_IS   0x48
+#define SUN8I_THS_MFC  0x70
+#define SUN8I_THS_TEMP_CALIB   0x74
+#define SUN8I_THS_TEMP_DATA0x80
+
 #define SUN50I_THS_CTRL0   0x00
 #define SUN50I_H6_THS_ENABLE   0x04
 #define SUN50I_H6_THS_PC   0x08
@@ -36,6 +44,10 @@
 #define SUN50I_H6_THS_TEMP_CALIB   0xa0
 #define SUN50I_H6_THS_TEMP_DATA0xc0
 
+#define SUN8I_THS_CTRL0_T_ACQ0(x)  (GENMASK(15, 0) & (x))
+#define SUN8I_THS_CTRL2_T_ACQ1(x)  ((GENMASK(15, 0) & (x)) << 16)
+#define SUN8I_THS_DATA_IRQ_STS(x)  BIT(x + 8)
+
 #define SUN50I_THS_CTRL0_T_ACQ(x)  ((GENMASK(15, 0) & (x)) << 16)
 #define SUN50I_THS_FILTER_EN   BIT(2)
 #define SUN50I_THS_FILTER_TYPE(x)  (GENMASK(1, 0) & (x))
@@ -121,6 +133,23 @@ static const struct regmap_config config = {
.fast_io = true,
 };
 
+static int sun8i_h3_irq_ack(struct ths_device *tmdev)
+{
+   int i, state, ret = 0;
+
+   regmap_read(tmdev->regmap, SUN8I_THS_IS, );
+
+   for (i = 0; i < tmdev->chip->sensor_num; i++) {
+   if (state & SUN8I_THS_DATA_IRQ_STS(i)) {
+   regmap_write(tmdev->regmap, SUN8I_THS_IS,
+SUN8I_THS_DATA_IRQ_STS(i));
+   ret |= BIT(i);
+   }
+   }
+
+   return ret;
+}
+
 static int sun50i_h6_irq_ack(struct ths_device *tmdev)
 {
int i, state, ret = 0;
@@ -154,6 +183,26 @@ static irqreturn_t sun8i_irq_thread(int irq, void *data)
return IRQ_HANDLED;
 }
 
+static int sun8i_h3_ths_calibrate(struct ths_device *tmdev,
+ u16 *caldata, int callen)
+{
+   int i;
+
+   if (!caldata[0] || callen < 2 * tmdev->chip->sensor_num)
+   return -EINVAL;
+
+   for (i = 0; i < tmdev->chip->sensor_num; i++) {
+   int offset = (i % 2) << 4;
+
+   regmap_update_bits(tmdev->regmap,
+  SUN8I_THS_TEMP_CALIB + (4 * (i >> 1)),
+  0xfff << offset,
+  caldata[i] << offset);
+   }
+
+   return 0;
+}
+
 static int sun50i_h6_ths_calibrate(struct ths_device *tmdev,
   u16 *caldata, int callen)
 {
@@ -319,6 +368,36 @@ static int sun8i_ths_resource_init(struct ths_device 
*tmdev)
return ret;
 }
 
+static int sun8i_h3_thermal_init(struct ths_device *tmdev)
+{
+   int val;
+
+   /* average over 4 samples */
+   regmap_write(tmdev->regmap, SUN8I_THS_MFC,
+SUN50I_THS_FILTER_EN |
+SUN50I_THS_FILTER_TYPE(1));
+   /*
+* period = (x + 1) * 4096 / clkin; ~10ms
+* enable data interrupt
+*/
+   val = GENMASK(7 + tmdev->chip->sensor_num, 8);
+   regmap_write(tmdev->regmap, SUN8I_THS_IC,
+SUN50I_H6_THS_PC_TEMP_PERIOD(58) | val);
+   /*
+* clkin = 24MHz
+* T acquire = clkin / (x + 1)
+*   = 20us
+* enable sensor
+*/
+   regmap_write(tmdev->regmap, SUN8I_THS_CTRL0,
+SUN8I_THS_CTRL0_T_ACQ0(479));
+   val = GENMASK(tmdev->chip->sensor_num - 1, 0);
+   regmap_write(tmdev->regmap, SUN8I_THS_CTRL2,
+SUN8I_THS_CTRL2_T_ACQ1(479) | val);
+
+   return 0;
+}
+
 static int sun50i_h6_thermal_init(struct ths_device *tmdev)
 {
int val;
@@ -425,6 +504,17 @@ static int sun8i_ths_remove(struct platform_device *pdev)
return 0;
 }
 
+static const struct ths_thermal_chip sun8i_h3_ths = {
+   .sensor_num = 1,
+   .offset = -1794,
+   .scale = -121,
+   .has_mod_clk = true,
+   .temp_data_base = SUN8I_THS_TEMP_DATA,
+   .calibrate = sun8i_h3_ths_calibrate,
+   .init = sun8i_h3_thermal_init,
+   .irq_ack = sun8i_h3_irq_ack,
+};
+
 static const struct ths_thermal_chip sun50i_h6_ths = {
.sensor_num = 2,
.offset = -2794,
@@ -437,6 +527,7 @@ static const struct ths_thermal_chip sun50i_h6_ths = {
 };
 
 static const struct of_device_id of_ths_match[] = {
+   { .compatible = 

[PATCH v5 07/18] thermal: sun8i: rework for ths irq handler func

2019-08-09 Thread Yangtao Li
Here, we do something to prepare for the subsequent
support of multiple platforms.

1) rename sun50i_h6_irq_thread to sun8i_irq_thread, because
   this function should be suitable for all platforms.

2) introduce irq_ack callback to mask interrupt register
   differences.

Signed-off-by: Yangtao Li 
---
 drivers/thermal/sun8i_thermal.c | 27 ---
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index ad877b54f58e..b934bc81eba7 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -60,6 +60,7 @@ struct ths_thermal_chip {
int ft_deviation;
int temp_data_base;
int (*init)(struct ths_device *tmdev);
+   int (*irq_ack)(struct ths_device *tmdev);
 };
 
 struct ths_device {
@@ -116,23 +117,34 @@ static const struct regmap_config config = {
.fast_io = true,
 };
 
-static irqreturn_t sun50i_h6_irq_thread(int irq, void *data)
+static int sun50i_h6_irq_ack(struct ths_device *tmdev)
 {
-   struct ths_device *tmdev = data;
-   int i, state;
+   int i, state, ret = 0;
 
regmap_read(tmdev->regmap, SUN50I_H6_THS_DIS, );
 
for (i = 0; i < tmdev->chip->sensor_num; i++) {
-
if (state & SUN50I_H6_THS_DATA_IRQ_STS(i)) {
-   /* clear data irq pending */
regmap_write(tmdev->regmap, SUN50I_H6_THS_DIS,
 SUN50I_H6_THS_DATA_IRQ_STS(i));
+   ret |= BIT(i);
+   }
+   }
+
+   return ret;
+}
 
+static irqreturn_t sun8i_irq_thread(int irq, void *data)
+{
+   struct ths_device *tmdev = data;
+   int i, state;
+
+   state = tmdev->chip->irq_ack(tmdev);
+
+   for (i = 0; i < tmdev->chip->sensor_num; i++) {
+   if (state & BIT(i))
thermal_zone_device_update(tmdev->sensor[i].tzd,
   THERMAL_EVENT_UNSPECIFIED);
-   }
}
 
return IRQ_HANDLED;
@@ -371,7 +383,7 @@ static int sun8i_ths_probe(struct platform_device *pdev)
 * the end.
 */
ret = devm_request_threaded_irq(dev, irq, NULL,
-   sun50i_h6_irq_thread,
+   sun8i_irq_thread,
IRQF_ONESHOT, "ths", tmdev);
if (ret)
return ret;
@@ -396,6 +408,7 @@ static const struct ths_thermal_chip sun50i_h6_ths = {
.ft_deviation = SUN50I_H6_FT_DEVIATION,
.temp_data_base = SUN50I_H6_THS_TEMP_DATA,
.init = sun50i_h6_thermal_init,
+   .irq_ack = sun50i_h6_irq_ack,
 };
 
 static const struct of_device_id of_ths_match[] = {
-- 
2.17.1



[PATCH v5 05/18] thermal: sun8i: rework for sun8i_ths_get_temp()

2019-08-09 Thread Yangtao Li
For different socs, the way they get and calculate the
temperature is roughly the same. So get the difference
from device compatible.

Difference point:
  1) temperature calculation formula parameters
  2) ths data register start address

Signed-off-by: Yangtao Li 
---
 drivers/thermal/sun8i_thermal.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index e9c2acbaac74..f338fa25b98e 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -22,9 +22,6 @@
 
 #define MAX_SENSOR_NUM 4
 
-#define SUN50I_H6_OFFSET   -2794
-#define SUN50I_H6_SCALE-67
-
 #define FT_TEMP_MASK   GENMASK(11, 0)
 #define TEMP_CALIB_MASKGENMASK(11, 0)
 #define TEMP_TO_REG672
@@ -58,6 +55,10 @@ struct tsensor {
 
 struct ths_thermal_chip {
int sensor_num;
+   int offset;
+   int scale;
+   int ft_deviation;
+   int temp_data_base;
 };
 
 struct ths_device {
@@ -73,7 +74,7 @@ struct ths_device {
 static int sun8i_ths_reg2temp(struct ths_device *tmdev,
  int reg)
 {
-   return (reg + SUN50I_H6_OFFSET) * SUN50I_H6_SCALE;
+   return (reg + tmdev->chip->offset) * tmdev->chip->scale;
 }
 
 static int sun8i_ths_get_temp(void *data, int *temp)
@@ -82,7 +83,7 @@ static int sun8i_ths_get_temp(void *data, int *temp)
struct ths_device *tmdev = s->tmdev;
int val;
 
-   regmap_read(tmdev->regmap, SUN50I_H6_THS_TEMP_DATA +
+   regmap_read(tmdev->regmap, tmdev->chip->temp_data_base +
0x4 * s->id, );
 
/* ths have no data yet */
@@ -98,7 +99,7 @@ static int sun8i_ths_get_temp(void *data, int *temp)
 * temperature above is also used when the sensor is calibrated. If
 * do this, the correct calibration formula is hard to know.
 */
-   *temp += SUN50I_H6_FT_DEVIATION;
+   *temp += tmdev->chip->ft_deviation;
 
return 0;
 }
@@ -389,6 +390,10 @@ static int sun8i_ths_remove(struct platform_device *pdev)
 
 static const struct ths_thermal_chip sun50i_h6_ths = {
.sensor_num = 2,
+   .offset = -2794,
+   .scale = -67,
+   .ft_deviation = SUN50I_H6_FT_DEVIATION,
+   .temp_data_base = SUN50I_H6_THS_TEMP_DATA,
 };
 
 static const struct of_device_id of_ths_match[] = {
-- 
2.17.1



[PATCH v5 00/18] add thermal driver for h6

2019-08-09 Thread Yangtao Li
This patchset add support for A64, H3, H5, H6 and R40 thermal sensor.

Thx to Icenowy and Vasily.

BTY, do a cleanup in thermal makfile.

Icenowy Zheng (3):
  thermal: sun8i: allow to use custom temperature calculation function
  thermal: sun8i: add support for Allwinner H5 thermal sensor
  thermal: sun8i: add support for Allwinner R40 thermal sensor

Vasily Khoruzhick (1):
  thermal: sun8i: add thermal driver for A64

Yangtao Li (14):
  thermal: sun8i: add thermal driver for h6
  dt-bindings: thermal: add binding document for h6 thermal controller
  thermal: fix indentation in makefile
  thermal: sun8i: get ths sensor number from device compatible
  thermal: sun8i: rework for sun8i_ths_get_temp()
  thermal: sun8i: get ths init func from device compatible
  thermal: sun8i: rework for ths irq handler func
  thermal: sun8i: support mod clocks
  thermal: sun8i: rework for ths calibrate func
  dt-bindings: thermal: add binding document for h3 thermal controller
  thermal: sun8i: add thermal driver for h3
  dt-bindings: thermal: add binding document for a64 thermal controller
  dt-bindings: thermal: add binding document for h5 thermal controller
  dt-bindings: thermal: add binding document for r40 thermal controller

 .../bindings/thermal/sun8i-thermal.yaml   | 157 +
 MAINTAINERS   |   7 +
 drivers/thermal/Kconfig   |  14 +
 drivers/thermal/Makefile  |   9 +-
 drivers/thermal/sun8i_thermal.c   | 596 ++
 5 files changed, 779 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
 create mode 100644 drivers/thermal/sun8i_thermal.c

---
v5:
-add more support
-some trival fix
---
2.17.1



[PATCH v5 02/18] dt-bindings: thermal: add binding document for h6 thermal controller

2019-08-09 Thread Yangtao Li
This patch adds binding document for allwinner h6 thermal controller.

Signed-off-by: Yangtao Li 
---
 .../bindings/thermal/sun8i-thermal.yaml   | 79 +++
 1 file changed, 79 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml

diff --git a/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml 
b/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
new file mode 100644
index ..e0973199ba3c
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/sun8i-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner SUN8I Thermal Controller Device Tree Bindings
+
+maintainers:
+  - Yangtao Li 
+
+description: |-
+  This describes the device tree binding for the Allwinner thermal
+  controller which measures the on-SoC temperatures.
+
+properties:
+  compatible:
+enum:
+  - allwinner,sun50i-h6-ths
+
+  reg:
+maxItems: 1
+
+  interrupts:
+maxItems: 1
+
+  resets:
+maxItems: 1
+
+  clocks:
+maxItems: 1
+
+  clock-names:
+const: bus
+
+  "#thermal-sensor-cells":
+const: 1
+
+  nvmem-cells:
+description: ths calibrate data
+
+  nvmem-cell-names:
+const: calib
+
+required:
+  - compatible
+  - reg
+  - reset
+  - clocks
+  - clock-names
+  - interrupts
+  - "#thermal-sensor-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+ths: ths@5070400 {
+compatible = "allwinner,sun50i-h6-ths";
+reg = <0x05070400 0x100>;
+clocks = < CLK_BUS_THS>;
+clock-names = "bus";
+resets = < RST_BUS_THS>;
+interrupts = ;
+nvmem-cells = <_calib>;
+nvmem-cell-names = "calib";
+#thermal-sensor-cells = <1>;
+};
+
+sid: sid@3006000 {
+compatible = "allwinner,sun50i-h6-sid";
+reg = <0x03006000 0x400>;
+#address-cells = <1>;
+#size-cells = <1>;
+tsen_calib: calib@14 {
+reg = <0x14 6>;
+};
+};
+...
-- 
2.17.1



[PATCH v5 01/18] thermal: sun8i: add thermal driver for h6

2019-08-09 Thread Yangtao Li
This patch adds the support for allwinner thermal sensor, within
allwinner SoC. It will register sensors for thermal framework
and use device tree to bind cooling device.

Signed-off-by: Yangtao Li 
---
 MAINTAINERS |   7 +
 drivers/thermal/Kconfig |  14 ++
 drivers/thermal/Makefile|   1 +
 drivers/thermal/sun8i_thermal.c | 399 
 4 files changed, 421 insertions(+)
 create mode 100644 drivers/thermal/sun8i_thermal.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 47800d32cfbc..89dc43f4064d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -682,6 +682,13 @@ L: linux-cry...@vger.kernel.org
 S: Maintained
 F: drivers/crypto/sunxi-ss/
 
+ALLWINNER THERMAL DRIVER
+M: Yangtao Li 
+L: linux...@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/thermal/sun8i-thermal.yaml
+F: drivers/thermal/sun8i_thermal.c
+
 ALLWINNER VPU DRIVER
 M: Maxime Ripard 
 M: Paul Kocialkowski 
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 9966364a6deb..f8b73b32b92d 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -262,6 +262,20 @@ config SPEAR_THERMAL
  Enable this to plug the SPEAr thermal sensor driver into the Linux
  thermal framework.
 
+config SUN8I_THERMAL
+   tristate "Allwinner sun8i thermal driver"
+   depends on ARCH_SUNXI || COMPILE_TEST
+   depends on HAS_IOMEM
+   depends on NVMEM
+   depends on OF
+   depends on RESET_CONTROLLER
+   help
+ Support for the sun8i thermal sensor driver into the Linux thermal
+ framework.
+
+ To compile this driver as a module, choose M here: the
+ module will be called sun8i-thermal.
+
 config ROCKCHIP_THERMAL
tristate "Rockchip thermal driver"
depends on ARCH_ROCKCHIP || COMPILE_TEST
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 74a37c7f847a..fa6f8b206281 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -31,6 +31,7 @@ thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
 obj-y  += broadcom/
 obj-$(CONFIG_THERMAL_MMIO) += thermal_mmio.o
 obj-$(CONFIG_SPEAR_THERMAL)+= spear_thermal.o
+obj-$(CONFIG_SUN8I_THERMAL) += sun8i_thermal.o
 obj-$(CONFIG_ROCKCHIP_THERMAL) += rockchip_thermal.o
 obj-$(CONFIG_RCAR_THERMAL) += rcar_thermal.o
 obj-$(CONFIG_RCAR_GEN3_THERMAL)+= rcar_gen3_thermal.o
diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
new file mode 100644
index ..2ce36fa3fec3
--- /dev/null
+++ b/drivers/thermal/sun8i_thermal.c
@@ -0,0 +1,399 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Thermal sensor driver for Allwinner SOC
+ * Copyright (C) 2019 Yangtao Li
+ *
+ * Based on the work of Icenowy Zheng 
+ * Based on the work of Ondrej Jirman 
+ * Based on the work of Josef Gajdusek 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define MAX_SENSOR_NUM 4
+
+#define SUN50I_H6_SENSOR_NUM   2
+#define SUN50I_H6_OFFSET   -2794
+#define SUN50I_H6_SCALE-67
+
+#define FT_TEMP_MASK   GENMASK(11, 0)
+#define TEMP_CALIB_MASKGENMASK(11, 0)
+#define TEMP_TO_REG672
+#define CALIBRATE_DEFAULT  0x800
+
+#define SUN50I_THS_CTRL0   0x00
+#define SUN50I_H6_THS_ENABLE   0x04
+#define SUN50I_H6_THS_PC   0x08
+#define SUN50I_H6_THS_DIC  0x10
+#define SUN50I_H6_THS_DIS  0x20
+#define SUN50I_H6_THS_MFC  0x30
+#define SUN50I_H6_THS_TEMP_CALIB   0xa0
+#define SUN50I_H6_THS_TEMP_DATA0xc0
+
+#define SUN50I_THS_CTRL0_T_ACQ(x)  ((GENMASK(15, 0) & (x)) << 16)
+#define SUN50I_THS_FILTER_EN   BIT(2)
+#define SUN50I_THS_FILTER_TYPE(x)  (GENMASK(1, 0) & (x))
+#define SUN50I_H6_THS_PC_TEMP_PERIOD(x)((GENMASK(19, 0) & (x)) 
<< 12)
+#define SUN50I_H6_THS_DATA_IRQ_STS(x)  BIT(x)
+
+/* millidegree celsius */
+#define SUN50I_H6_FT_DEVIATION 7000
+
+struct ths_device;
+
+struct tsensor {
+   struct ths_device   *tmdev;
+   struct thermal_zone_device  *tzd;
+   int id;
+};
+
+struct ths_device {
+   struct device   *dev;
+   struct regmap   *regmap;
+   struct reset_control*reset;
+   struct clk  *bus_clk;
+   struct tsensor  sensor[MAX_SENSOR_NUM];
+};
+
+/* Temp Unit: millidegree Celsius */
+static int sun8i_ths_reg2temp(struct ths_device *tmdev,
+ int reg)
+{
+   return (reg + 

Re: [PATCH] RISC-V: Issue a local tlb flush if possible.

2019-08-09 Thread Atish Patra


On 8/9/19, 8:30 PM, "Anup Patel"  wrote:

On Sat, Aug 10, 2019 at 7:13 AM Atish Patra  wrote:
>
> In RISC-V, tlb flush happens via SBI which is expensive.
> If the target cpumask contains a local hartid, some cost
> can be saved by issuing a local tlb flush as we do that
> in OpenSBI anyways.
>
> Signed-off-by: Atish Patra 
> ---
>  arch/riscv/include/asm/tlbflush.h | 33 +++
>  1 file changed, 29 insertions(+), 4 deletions(-)
>
> diff --git a/arch/riscv/include/asm/tlbflush.h 
b/arch/riscv/include/asm/tlbflush.h
> index 687dd19735a7..b32ba4fa5888 100644
> --- a/arch/riscv/include/asm/tlbflush.h
> +++ b/arch/riscv/include/asm/tlbflush.h
> @@ -8,6 +8,7 @@
>  #define _ASM_RISCV_TLBFLUSH_H
>
>  #include 
> +#include 
>  #include 
>
>  /*
> @@ -46,14 +47,38 @@ static inline void remote_sfence_vma(struct cpumask 
*cmask, unsigned long start,
>  unsigned long size)
>  {
> struct cpumask hmask;
> +   struct cpumask tmask;
> +   int cpuid = smp_processor_id();
>
> cpumask_clear();
> -   riscv_cpuid_to_hartid_mask(cmask, );
> -   sbi_remote_sfence_vma(hmask.bits, start, size);
> +   cpumask_clear();
> +
> +   if (cmask)
> +   cpumask_copy(, cmask);
> +   else
> +   cpumask_copy(, cpu_online_mask);

This can be further simplified.

We can totally avoid tmask, cpumask_copy(), and cpumask_clear()
by directly updating hmask.

Ahh yes. Thanks for pointing out.

In addition to this patch, we should also handle the case of
empty hart mask in OpenSBI.

Yes. I have few other fixes as well (around fifo race condition and local 
flushing in OpenSBI).
I will send them soon.

Regards,
Atish
> +
> +   if (cpumask_test_cpu(cpuid, )) {
> +   /* Save trap cost by issuing a local tlb flush here */
> +   if ((start == 0 && size == -1) || (size > PAGE_SIZE))
> +   local_flush_tlb_all();
> +   else if (size == PAGE_SIZE)
> +   local_flush_tlb_page(start);
> +   cpumask_clear_cpu(cpuid, );
> +   } else if (cpumask_empty()) {
> +   /* cpumask is empty. So just do a local flush */
> +   local_flush_tlb_all();
> +   return;
> +   }
> +
> +   if (!cpumask_empty()) {
> +   riscv_cpuid_to_hartid_mask(, );
> +   sbi_remote_sfence_vma(hmask.bits, start, size);
> +   }
>  }
>
> -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
> -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
> +#define flush_tlb_all() remote_sfence_vma(NULL, 0, -1)
> +#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, (addr) + 
PAGE_SIZE)
>  #define flush_tlb_range(vma, start, end) \
> remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - 
(start))
>  #define flush_tlb_mm(mm) \
> --
> 2.21.0
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

Regards,
Anup




Re: [PATCH] x86/umwait: Fix error handling in umwait_init()

2019-08-09 Thread Thomas Gleixner
On Fri, 9 Aug 2019, Fenghua Yu wrote:
> +/*
> + * The CPU hotplug callback sets the control MSR to the original control
> + * value.
> + */
> +static int umwait_cpu_offline(unsigned int cpu)
> +{
> + /*
> +  * This code is protected by the CPU hotplug already and
> +  * orig_umwait_control_cached is never changed after it caches
> +  * the original control MSR value in umwait_init(). So there
> +  * is no race condition here.
> +  */
> + wrmsr(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached, 0);

Even my brain compiler emits an error here.

Thanks,

tglx


[PATCH v2] sh: kernel: disassemble: Mark expected switch fall-throughs

2019-08-09 Thread Gustavo A. R. Silva
Remove logically dead code and mark switch cases where we are expecting
to fall through.

Fix the following warnings (Building: defconfig sh):

arch/sh/kernel/disassemble.c:478:8: warning: this statement may fall
through [-Wimplicit-fallthrough=]
arch/sh/kernel/disassemble.c:487:8: warning: this statement may fall
through [-Wimplicit-fallthrough=]
arch/sh/kernel/disassemble.c:496:8: warning: this statement may fall
through [-Wimplicit-fallthrough=]

Signed-off-by: Gustavo A. R. Silva 
---
Changes in v2:
 - Remove logically dead code. Pointed out by Joe Perches.

NOTE: If no one cares, I'll apply this to my tree and queue it up
  for 5.3-rc4.

 arch/sh/kernel/disassemble.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/sh/kernel/disassemble.c b/arch/sh/kernel/disassemble.c
index defebf1a9c8a..845543780cc5 100644
--- a/arch/sh/kernel/disassemble.c
+++ b/arch/sh/kernel/disassemble.c
@@ -475,8 +475,6 @@ static void print_sh_insn(u32 memaddr, u16 insn)
printk("dbr");
break;
case FD_REG_N:
-   if (0)
-   goto d_reg_n;
case F_REG_N:
printk("fr%d", rn);
break;
@@ -488,7 +486,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
printk("xd%d", rn & ~1);
break;
}
-   d_reg_n:
+   /* else, fall through */
case D_REG_N:
printk("dr%d", rn);
break;
@@ -497,6 +495,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
printk("xd%d", rm & ~1);
break;
}
+   /* else, fall through */
case D_REG_M:
printk("dr%d", rm);
break;
-- 
2.22.0



Re: [PATCH v9 0/7] Solve postboot supplier cleanup and optimize probe ordering

2019-08-09 Thread Frank Rowand
On 8/9/19 10:00 PM, Saravana Kannan wrote:
> On Fri, Aug 9, 2019 at 7:57 PM Frank Rowand  wrote:
>>
>> Hi Saravana,
>>
>> On 7/31/19 3:17 PM, Saravana Kannan wrote:
>>> Add device-links to track functional dependencies between devices
>>> after they are created (but before they are probed) by looking at
>>> their common DT bindings like clocks, interconnects, etc.
>>>
>>> Having functional dependencies automatically added before the devices
>>> are probed, provides the following benefits:
>>>
>>> - Optimizes device probe order and avoids the useless work of
>>>   attempting probes of devices that will not probe successfully
>>>   (because their suppliers aren't present or haven't probed yet).
>>>
>>>   For example, in a commonly available mobile SoC, registering just
>>>   one consumer device's driver at an initcall level earlier than the
>>>   supplier device's driver causes 11 failed probe attempts before the
>>>   consumer device probes successfully. This was with a kernel with all
>>>   the drivers statically compiled in. This problem gets a lot worse if
>>>   all the drivers are loaded as modules without direct symbol
>>>   dependencies.
>>>
>>> - Supplier devices like clock providers, interconnect providers, etc
>>>   need to keep the resources they provide active and at a particular
>>>   state(s) during boot up even if their current set of consumers don't
>>>   request the resource to be active. This is because the rest of the
>>>   consumers might not have probed yet and turning off the resource
>>>   before all the consumers have probed could lead to a hang or
>>>   undesired user experience.
>>>
>>>   Some frameworks (Eg: regulator) handle this today by turning off
>>>   "unused" resources at late_initcall_sync and hoping all the devices
>>>   have probed by then. This is not a valid assumption for systems with
>>>   loadable modules. Other frameworks (Eg: clock) just don't handle
>>>   this due to the lack of a clear signal for when they can turn off
>>>   resources. This leads to downstream hacks to handle cases like this
>>>   that can easily be solved in the upstream kernel.
>>>
>>>   By linking devices before they are probed, we give suppliers a clear
>>>   count of the number of dependent consumers. Once all of the
>>>   consumers are active, the suppliers can turn off the unused
>>>   resources without making assumptions about the number of consumers.
>>>
>>> By default we just add device-links to track "driver presence" (probe
>>> succeeded) of the supplier device. If any other functionality provided
>>> by device-links are needed, it is left to the consumer/supplier
>>> devices to change the link when they probe.
>>>
>>> v1 -> v2:
>>> - Drop patch to speed up of_find_device_by_node()
>>> - Drop depends-on property and use existing bindings
>>>
>>> v2 -> v3:
>>> - Refactor the code to have driver core initiate the linking of devs
>>> - Have driver core link consumers to supplier before it's probed
>>> - Add support for drivers to edit the device links before probing
>>>
>>> v3 -> v4:
>>> - Tested edit_links() on system with cyclic dependency. Works.
>>> - Added some checks to make sure device link isn't attempted from
>>>   parent device node to child device node.
>>> - Added way to pause/resume sync_state callbacks across
>>>   of_platform_populate().
>>> - Recursively parse DT node to create device links from parent to
>>>   suppliers of parent and all child nodes.
>>>
>>> v4 -> v5:
>>> - Fixed copy-pasta bugs with linked list handling
>>> - Walk up the phandle reference till I find an actual device (needed
>>>   for regulators to work)
>>> - Added support for linking devices from regulator DT bindings
>>> - Tested the whole series again to make sure cyclic dependencies are
>>>   broken with edit_links() and regulator links are created properly.
>>>
>>> v5 -> v6:
>>> - Split, squashed and reordered some of the patches.
>>> - Refactored the device linking code to follow the same code pattern for
>>>   any property.
>>>
>>> v6 -> v7:
>>> - No functional changes.
>>> - Renamed i to index
>>> - Added comment to clarify not having to check property name for every
>>>   index
>>> - Added "matched" variable to clarify code. No functional change.
>>> - Added comments to include/linux/device.h for add_links()
>>>
>>> v7 -> v8:
>>> - Rebased on top of linux-next to handle device link changes in [1]
>>>
>>
>>
>>> v8 -> v9:
>>> - Fixed kbuild test bot reported errors (docs and const)
>>
>> Some maintainers have strong opinions about whether change logs should be:
>>
>>   (1) only in patch 0
>>   (2) only in the specific patches that are changed
>>   (3) both in patch 0 and in the specific patches that are changed.
>>
>> I can adapt to any of the three styles.  But for style "(1)" please
>> list which specific patch has changed for each item in the change list.
>>
> 
> Thanks for the context Frank. I'm okay with (1) or (2) but I'll stick
> with (1) for this series. Didn't realize there were 

Re: [PATCH] sh: kernel: disassemble: Mark expected switch fall-throughs

2019-08-09 Thread Gustavo A. R. Silva



On 8/10/19 12:08 AM, Joe Perches wrote:
> On Sat, 2019-08-10 at 00:01 -0500, Gustavo A. R. Silva wrote:
>> Mark switch cases where we are expecting to fall through.
>>
>> Fix the following warnings (Building: defconfig sh):
> []
>> diff --git a/arch/sh/kernel/disassemble.c b/arch/sh/kernel/disassemble.c
> []
>> @@ -477,6 +477,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
>>  case FD_REG_N:
>>  if (0)
>>  goto d_reg_n;
> 
> Might as well remove this if (0) goto,
> remove the added comment
> 

You're right. I'll respin and stop working this Friday midnight.

>> +/* else, fall through */
>>  case F_REG_N:
>>  printk("fr%d", rn);
>>  break;
>> @@ -488,6 +489,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
>>  printk("xd%d", rn & ~1);
>>  break;
>>  }
>> +/* else, fall through */
>>  d_reg_n:
> 
> and remove this only use of d_reg_n
> 

Sure.

Thanks
--
Gustavo

>>  case D_REG_N:
>>  printk("dr%d", rn);
>> @@ -497,6 +499,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
>>  printk("xd%d", rm & ~1);
>>  break;
>>  }
>> +/* else, fall through */
>>  case D_REG_M:
>>  printk("dr%d", rm);
>>  break;
> 


Re: [PATCH] sh: kernel: disassemble: Mark expected switch fall-throughs

2019-08-09 Thread Joe Perches
On Sat, 2019-08-10 at 00:01 -0500, Gustavo A. R. Silva wrote:
> Mark switch cases where we are expecting to fall through.
> 
> Fix the following warnings (Building: defconfig sh):
[]
> diff --git a/arch/sh/kernel/disassemble.c b/arch/sh/kernel/disassemble.c
[]
> @@ -477,6 +477,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
>   case FD_REG_N:
>   if (0)
>   goto d_reg_n;

Might as well remove this if (0) goto,
remove the added comment

> + /* else, fall through */
>   case F_REG_N:
>   printk("fr%d", rn);
>   break;
> @@ -488,6 +489,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
>   printk("xd%d", rn & ~1);
>   break;
>   }
> + /* else, fall through */
>   d_reg_n:

and remove this only use of d_reg_n

>   case D_REG_N:
>   printk("dr%d", rn);
> @@ -497,6 +499,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
>   printk("xd%d", rm & ~1);
>   break;
>   }
> + /* else, fall through */
>   case D_REG_M:
>   printk("dr%d", rm);
>   break;



Re: [PATCH v9 0/7] Solve postboot supplier cleanup and optimize probe ordering

2019-08-09 Thread Saravana Kannan
On Fri, Aug 9, 2019 at 7:57 PM Frank Rowand  wrote:
>
> Hi Saravana,
>
> On 7/31/19 3:17 PM, Saravana Kannan wrote:
> > Add device-links to track functional dependencies between devices
> > after they are created (but before they are probed) by looking at
> > their common DT bindings like clocks, interconnects, etc.
> >
> > Having functional dependencies automatically added before the devices
> > are probed, provides the following benefits:
> >
> > - Optimizes device probe order and avoids the useless work of
> >   attempting probes of devices that will not probe successfully
> >   (because their suppliers aren't present or haven't probed yet).
> >
> >   For example, in a commonly available mobile SoC, registering just
> >   one consumer device's driver at an initcall level earlier than the
> >   supplier device's driver causes 11 failed probe attempts before the
> >   consumer device probes successfully. This was with a kernel with all
> >   the drivers statically compiled in. This problem gets a lot worse if
> >   all the drivers are loaded as modules without direct symbol
> >   dependencies.
> >
> > - Supplier devices like clock providers, interconnect providers, etc
> >   need to keep the resources they provide active and at a particular
> >   state(s) during boot up even if their current set of consumers don't
> >   request the resource to be active. This is because the rest of the
> >   consumers might not have probed yet and turning off the resource
> >   before all the consumers have probed could lead to a hang or
> >   undesired user experience.
> >
> >   Some frameworks (Eg: regulator) handle this today by turning off
> >   "unused" resources at late_initcall_sync and hoping all the devices
> >   have probed by then. This is not a valid assumption for systems with
> >   loadable modules. Other frameworks (Eg: clock) just don't handle
> >   this due to the lack of a clear signal for when they can turn off
> >   resources. This leads to downstream hacks to handle cases like this
> >   that can easily be solved in the upstream kernel.
> >
> >   By linking devices before they are probed, we give suppliers a clear
> >   count of the number of dependent consumers. Once all of the
> >   consumers are active, the suppliers can turn off the unused
> >   resources without making assumptions about the number of consumers.
> >
> > By default we just add device-links to track "driver presence" (probe
> > succeeded) of the supplier device. If any other functionality provided
> > by device-links are needed, it is left to the consumer/supplier
> > devices to change the link when they probe.
> >
> > v1 -> v2:
> > - Drop patch to speed up of_find_device_by_node()
> > - Drop depends-on property and use existing bindings
> >
> > v2 -> v3:
> > - Refactor the code to have driver core initiate the linking of devs
> > - Have driver core link consumers to supplier before it's probed
> > - Add support for drivers to edit the device links before probing
> >
> > v3 -> v4:
> > - Tested edit_links() on system with cyclic dependency. Works.
> > - Added some checks to make sure device link isn't attempted from
> >   parent device node to child device node.
> > - Added way to pause/resume sync_state callbacks across
> >   of_platform_populate().
> > - Recursively parse DT node to create device links from parent to
> >   suppliers of parent and all child nodes.
> >
> > v4 -> v5:
> > - Fixed copy-pasta bugs with linked list handling
> > - Walk up the phandle reference till I find an actual device (needed
> >   for regulators to work)
> > - Added support for linking devices from regulator DT bindings
> > - Tested the whole series again to make sure cyclic dependencies are
> >   broken with edit_links() and regulator links are created properly.
> >
> > v5 -> v6:
> > - Split, squashed and reordered some of the patches.
> > - Refactored the device linking code to follow the same code pattern for
> >   any property.
> >
> > v6 -> v7:
> > - No functional changes.
> > - Renamed i to index
> > - Added comment to clarify not having to check property name for every
> >   index
> > - Added "matched" variable to clarify code. No functional change.
> > - Added comments to include/linux/device.h for add_links()
> >
> > v7 -> v8:
> > - Rebased on top of linux-next to handle device link changes in [1]
> >
>
>
> > v8 -> v9:
> > - Fixed kbuild test bot reported errors (docs and const)
>
> Some maintainers have strong opinions about whether change logs should be:
>
>   (1) only in patch 0
>   (2) only in the specific patches that are changed
>   (3) both in patch 0 and in the specific patches that are changed.
>
> I can adapt to any of the three styles.  But for style "(1)" please
> list which specific patch has changed for each item in the change list.
>

Thanks for the context Frank. I'm okay with (1) or (2) but I'll stick
with (1) for this series. Didn't realize there were options (2) and
(3). Since you started reviewing from v7, I'll do that in 

[PATCH] sh: kernel: disassemble: Mark expected switch fall-throughs

2019-08-09 Thread Gustavo A. R. Silva
Mark switch cases where we are expecting to fall through.

Fix the following warnings (Building: defconfig sh):

arch/sh/kernel/disassemble.c:478:8: warning: this statement may fall
through [-Wimplicit-fallthrough=]
arch/sh/kernel/disassemble.c:487:8: warning: this statement may fall
through [-Wimplicit-fallthrough=]
arch/sh/kernel/disassemble.c:496:8: warning: this statement may fall
through [-Wimplicit-fallthrough=]

Signed-off-by: Gustavo A. R. Silva 
---

If no one cares, I'll apply this to my tree and queue it up for 5.3-rc4.

 arch/sh/kernel/disassemble.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/sh/kernel/disassemble.c b/arch/sh/kernel/disassemble.c
index defebf1a9c8a..91c87e9891da 100644
--- a/arch/sh/kernel/disassemble.c
+++ b/arch/sh/kernel/disassemble.c
@@ -477,6 +477,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
case FD_REG_N:
if (0)
goto d_reg_n;
+   /* else, fall through */
case F_REG_N:
printk("fr%d", rn);
break;
@@ -488,6 +489,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
printk("xd%d", rn & ~1);
break;
}
+   /* else, fall through */
d_reg_n:
case D_REG_N:
printk("dr%d", rn);
@@ -497,6 +499,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
printk("xd%d", rm & ~1);
break;
}
+   /* else, fall through */
case D_REG_M:
printk("dr%d", rm);
break;
-- 
2.22.0



[PATCH] sh: kernel: hw_breakpoint: Fix missing break in switch statement

2019-08-09 Thread Gustavo A. R. Silva
Add missing break statement in order to prevent the code from
erroneously falling through to case SH_BREAKPOINT_WRITE.

Fixes: 09a072947791 ("sh: hw-breakpoints: Add preliminary support for SH-4A 
UBC.")
Cc: sta...@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva 
---

If no one cares, I'll apply this to my tree and queue it up for 5.3-rc4.

 arch/sh/kernel/hw_breakpoint.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index 3bd010b4c55f..f10d64311127 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -157,6 +157,7 @@ int arch_bp_generic_fields(int sh_len, int sh_type,
switch (sh_type) {
case SH_BREAKPOINT_READ:
*gen_type = HW_BREAKPOINT_R;
+   break;
case SH_BREAKPOINT_WRITE:
*gen_type = HW_BREAKPOINT_W;
break;
-- 
2.22.0



Re: [PATCH 4.9 00/32] 4.9.189-stable review

2019-08-09 Thread kernelci.org bot
stable-rc/linux-4.9.y boot: 102 boots: 0 failed, 90 passed with 12 offline 
(v4.9.188-33-g260869840af4)

Full Boot Summary: 
https://kernelci.org/boot/all/job/stable-rc/branch/linux-4.9.y/kernel/v4.9.188-33-g260869840af4/
Full Build Summary: 
https://kernelci.org/build/stable-rc/branch/linux-4.9.y/kernel/v4.9.188-33-g260869840af4/

Tree: stable-rc
Branch: linux-4.9.y
Git Describe: v4.9.188-33-g260869840af4
Git Commit: 260869840af4f3d7b3b46c4047642a931535c196
Git URL: 
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Tested: 51 unique boards, 22 SoC families, 15 builds out of 196

Boot Regressions Detected:

arm:

bcm2835_defconfig:
gcc-8:
  bcm2835-rpi-b:
  lab-baylibre-seattle: failing since 1 day (last pass: 
v4.9.187-43-g228fba508ff1 - first fail: v4.9.187-71-g399cf2b4ebf0)

sama5_defconfig:
gcc-8:
  at91-sama5d4_xplained:
  lab-baylibre-seattle: failing since 1 day (last pass: 
v4.9.187-43-g228fba508ff1 - first fail: v4.9.187-71-g399cf2b4ebf0)

socfpga_defconfig:
gcc-8:
  socfpga_cyclone5_de0_sockit:
  lab-baylibre-seattle: failing since 1 day (last pass: 
v4.9.187-43-g228fba508ff1 - first fail: v4.9.187-71-g399cf2b4ebf0)

arm64:

defconfig:
gcc-8:
  apq8016-sbc:
  lab-baylibre-seattle: failing since 1 day (last pass: 
v4.9.187-43-g228fba508ff1 - first fail: v4.9.187-71-g399cf2b4ebf0)
  juno-r2:
  lab-baylibre-seattle: failing since 1 day (last pass: 
v4.9.187-43-g228fba508ff1 - first fail: v4.9.187-71-g399cf2b4ebf0)
  meson-gxbb-odroidc2:
  lab-baylibre-seattle: failing since 1 day (last pass: 
v4.9.187-43-g228fba508ff1 - first fail: v4.9.187-71-g399cf2b4ebf0)

Offline Platforms:

arm64:

defconfig:
gcc-8
apq8016-sbc: 1 offline lab
juno-r2: 1 offline lab
meson-gxbb-odroidc2: 1 offline lab

arm:

bcm2835_defconfig:
gcc-8
bcm2835-rpi-b: 1 offline lab

sama5_defconfig:
gcc-8
at91-sama5d4_xplained: 1 offline lab

multi_v7_defconfig:
gcc-8
alpine-db: 1 offline lab
at91-sama5d4_xplained: 1 offline lab
bcm4708-smartrg-sr400ac: 1 offline lab
socfpga_cyclone5_de0_sockit: 1 offline lab
sun5i-r8-chip: 1 offline lab

socfpga_defconfig:
gcc-8
socfpga_cyclone5_de0_sockit: 1 offline lab

sunxi_defconfig:
gcc-8
sun5i-r8-chip: 1 offline lab

---
For more info write to 


[PATCH] firmware: arm_scmi: Eliminate local db variable in SCMI_PERF_FC_RING_DB

2019-08-09 Thread Nathan Chancellor
clang warns four times:

drivers/firmware/arm_scmi/perf.c:320:24: warning: variable 'db' is
uninitialized when used within its own initialization [-Wuninitialized]
SCMI_PERF_FC_RING_DB(db, 64);
~^~~
drivers/firmware/arm_scmi/perf.c:300:31: note: expanded from macro
'SCMI_PERF_FC_RING_DB'
struct scmi_fc_db_info *db = doorbell;  \
~~   ^~~~

This happens because the doorbell identifier becomes db after
preprocessing:

if (db->width == 1)
do {
u8 val = 0;
struct scmi_fc_db_info *db = db;
if (db->mask)
val = ioread8(db->addr) & db->mask;
iowrite8((u8)db->set | val, db->addr);
} while (0);

We could swap the doorbell and db identifiers within the macro and that
would resolve the issue; however, there doesn't appear to be a good
reason for having two copies of the same variable. Eliminate the one in
the do while loop to prevent this warning and make the code clearer.

Fixes: 8f12cbcb6abc ("firmware: arm_scmi: Make use SCMI v2.0 fastchannel for 
performance protocol")
Link: https://github.com/ClangBuiltLinux/linux/issues/635
Signed-off-by: Nathan Chancellor 
---
 drivers/firmware/arm_scmi/perf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c
index 2c5201c8354c..ab946ef6b914 100644
--- a/drivers/firmware/arm_scmi/perf.c
+++ b/drivers/firmware/arm_scmi/perf.c
@@ -294,10 +294,9 @@ scmi_perf_describe_levels_get(const struct scmi_handle 
*handle, u32 domain,
return ret;
 }
 
-#define SCMI_PERF_FC_RING_DB(doorbell, w)  \
+#define SCMI_PERF_FC_RING_DB(db, w)\
 do {   \
u##w val = 0;   \
-   struct scmi_fc_db_info *db = doorbell;  \
\
if (db->mask)   \
val = ioread##w(db->addr) & db->mask;   \
-- 
2.23.0.rc2



Re: [PATCH] sh: Drop -Werror from kernel Makefile

2019-08-09 Thread Gustavo A. R. Silva


Re: [PATCH] sh: Drop -Werror from kernel Makefile

2019-08-09 Thread Gustavo A. R. Silva



On 8/9/19 11:20 PM, Joe Perches wrote:
> On Fri, 2019-08-09 at 21:47 -0500, Gustavo A. R. Silva wrote:
>> On 8/9/19 4:56 PM, Guenter Roeck wrote:
>>> On Fri, Aug 09, 2019 at 04:36:01PM -0500, Gustavo A. R. Silva wrote:
 On 8/9/19 2:56 PM, Guenter Roeck wrote:
> On Sun, Aug 04, 2019 at 11:24:41PM -0400, Rich Felker wrote:
>> On Sun, Aug 04, 2019 at 07:14:23PM -0700, Guenter Roeck wrote:
>>> Since commit a035d552a93b ("Makefile: Globally enable fall-through
>>> warning"), all sh builds fail with errors such as
>>>
>>> arch/sh/kernel/disassemble.c: In function 'print_sh_insn':
>>> arch/sh/kernel/disassemble.c:478:8: error: this statement may fall 
>>> through
>>>
>>> Since this effectively disables all build and boot tests for the
>>> architecture, let's drop -Werror from the sh kernel Makefile until
>>> the problems are fixed.
> []
>> On second thought it seems to me that this is not a good idea, at least
>> for mainline. For the time being I'll take this patch for linux-next only.
>>
>> Who is the maintainer of sh?
> 
> But whoever it may be, isn't particularly active.
> 
> MAINTAINERS-SUPERH
> MAINTAINERS-M:  Yoshinori Sato 
> MAINTAINERS-M:  Rich Felker 
> MAINTAINERS-L:  linux...@vger.kernel.org
> MAINTAINERS-Q:  http://patchwork.kernel.org/project/linux-sh/list/
> MAINTAINERS-S:  Maintained
> MAINTAINERS-F:  Documentation/sh/
> MAINTAINERS:F:  arch/sh/
> MAINTAINERS-F:  drivers/sh/
> 
>> The best solution is to fix those fall-through warnings you see. Could you
>> please send me all the warnings you see? I can try to fix them.
> 
> It's true it's a warning, but adding -Werror is rarely
> a good idea as gcc error output can change with every
> version.
> 

In the meantime I'll install sh4 and fix those warnings.

--
Gustavo


[PATCH] ALSA: hda - Fix a memory leak bug

2019-08-09 Thread Wenwen Wang
In snd_hda_parse_generic_codec(), 'spec' is allocated through kzalloc().
Then, the pin widgets in 'codec' are parsed. However, if the parsing
process fails, 'spec' is not deallocated, leading to a memory leak.

To fix the above issue, free 'spec' before returning the error.

Signed-off-by: Wenwen Wang 
---
 sound/pci/hda/hda_generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 485edab..8f2beb1 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -6100,7 +6100,7 @@ static int snd_hda_parse_generic_codec(struct hda_codec 
*codec)
 
err = snd_hda_parse_pin_defcfg(codec, >autocfg, NULL, 0);
if (err < 0)
-   return err;
+   goto error;
 
err = snd_hda_gen_parse_auto_config(codec, >autocfg);
if (err < 0)
-- 
2.7.4



Re: [PATCH] sh: Drop -Werror from kernel Makefile

2019-08-09 Thread Joe Perches
On Fri, 2019-08-09 at 21:47 -0500, Gustavo A. R. Silva wrote:
> On 8/9/19 4:56 PM, Guenter Roeck wrote:
> > On Fri, Aug 09, 2019 at 04:36:01PM -0500, Gustavo A. R. Silva wrote:
> > > On 8/9/19 2:56 PM, Guenter Roeck wrote:
> > > > On Sun, Aug 04, 2019 at 11:24:41PM -0400, Rich Felker wrote:
> > > > > On Sun, Aug 04, 2019 at 07:14:23PM -0700, Guenter Roeck wrote:
> > > > > > Since commit a035d552a93b ("Makefile: Globally enable fall-through
> > > > > > warning"), all sh builds fail with errors such as
> > > > > > 
> > > > > > arch/sh/kernel/disassemble.c: In function 'print_sh_insn':
> > > > > > arch/sh/kernel/disassemble.c:478:8: error: this statement may fall 
> > > > > > through
> > > > > > 
> > > > > > Since this effectively disables all build and boot tests for the
> > > > > > architecture, let's drop -Werror from the sh kernel Makefile until
> > > > > > the problems are fixed.
[]
> On second thought it seems to me that this is not a good idea, at least
> for mainline. For the time being I'll take this patch for linux-next only.
> 
> Who is the maintainer of sh?

But whoever it may be, isn't particularly active.

MAINTAINERS-SUPERH
MAINTAINERS-M:  Yoshinori Sato 
MAINTAINERS-M:  Rich Felker 
MAINTAINERS-L:  linux...@vger.kernel.org
MAINTAINERS-Q:  http://patchwork.kernel.org/project/linux-sh/list/
MAINTAINERS-S:  Maintained
MAINTAINERS-F:  Documentation/sh/
MAINTAINERS:F:  arch/sh/
MAINTAINERS-F:  drivers/sh/

> The best solution is to fix those fall-through warnings you see. Could you
> please send me all the warnings you see? I can try to fix them.

It's true it's a warning, but adding -Werror is rarely
a good idea as gcc error output can change with every
version.




Re: [PATCH RFC v1 1/2] rcu/tree: Add basic support for kfree_rcu batching

2019-08-09 Thread Joel Fernandes
On Fri, Aug 09, 2019 at 08:38:14PM -0700, Paul E. McKenney wrote:
> On Fri, Aug 09, 2019 at 10:42:32PM -0400, Joel Fernandes wrote:
> > On Wed, Aug 07, 2019 at 10:52:15AM -0700, Paul E. McKenney wrote:
> > [snip] 
> > > > > > @@ -3459,6 +3645,8 @@ void __init rcu_init(void)
> > > > > >  {
> > > > > > int cpu;
> > > > > >  
> > > > > > +   kfree_rcu_batch_init();
> > > > > 
> > > > > What happens if someone does a kfree_rcu() before this point?  It 
> > > > > looks
> > > > > like it should work, but have you tested it?
> > > > > 
> > > > > > rcu_early_boot_tests();
> > > > > 
> > > > > For example, by testing it in rcu_early_boot_tests() and moving the
> > > > > call to kfree_rcu_batch_init() here.
> > > > 
> > > > I have not tried to do the kfree_rcu() this early. I will try it out.
> > > 
> > > Yeah, well, call_rcu() this early came as a surprise to me back in the
> > > day, so...  ;-)
> > 
> > I actually did get surprised as well!
> > 
> > It appears the timers are not fully initialized so the really early
> > kfree_rcu() call from rcu_init() does cause a splat about an initialized
> > timer spinlock (even though future kfree_rcu()s and the system are working
> > fine all the way into the torture tests).
> > 
> > I think to resolve this, we can just not do batching until early_initcall,
> > during which I have an initialization function which switches batching on.
> > >From that point it is safe.
> 
> Just go ahead and batch, but don't bother with the timer until
> after single-threaded boot is done.  For example, you could check
> rcu_scheduler_active similar to how sync_rcu_exp_select_cpus() does.
> (See kernel/rcu/tree_exp.h.)

Cool, that works nicely and I tested it. Actually I made it such that we
don't need to batch even, before the scheduler is up. I don't see any benefit
of that unless we can see a kfree_rcu() flood happening that early at boot
which seems highly doubtful as a real world case.

> If needed, use an early_initcall() to handle the case where early boot
> kfree_rcu() calls needed to set the timer but could not.

And it would also need this complexity of early_initcall.

> > Below is the diff on top of this patch, I think this should be good but let
> > me know if anything looks odd to you. I tested it and it works.
> 
> Keep in mind that a call_rcu() callback can't possibly be invoked until
> quite some time after the scheduler is up and running.  So it will be
> a lot simpler to just skip setting the timer during early boot.

Sure. Skipping batching would skip the timer too :-D

If in the future, batching is needed this early, then I am happy to add an
early_initcall to setup the timer for any batched calls that could not setup
the timer. Hope that is ok with you?

thanks,

 - Joel

[snip]



[GIT PULL] Wimplicit-fallthrough patches for 5.3-rc4

2019-08-09 Thread Gustavo A. R. Silva
The following changes since commit 609488bc979f99f805f34e9a32c1e3b71179d10b:

  Linux 5.3-rc2 (2019-07-28 12:47:02 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/gustavoars/linux.git 
tags/Wimplicit-fallthrough-5.3-rc4

for you to fetch changes up to 1f7585f30a3af595ac07f610b807c738c9e3baab:

  ARM: ep93xx: Mark expected switch fall-through (2019-08-09 19:53:35 -0500)


Wimplicit-fallthrough patches for 5.3-rc4

Hi Linus,

Please, pull the following patches that mark switch cases where we are
expecting to fall through.

 - Fix fall-through warnings in arm, sparc64, mips, i386 and s390.

Thanks

Signed-off-by: Gustavo A. R. Silva 


Gustavo A. R. Silva (17):
  ARM/hw_breakpoint: Mark expected switch fall-throughs
  ARM: tegra: Mark expected switch fall-through
  ARM: alignment: Mark expected switch fall-throughs
  ARM: OMAP: dma: Mark expected switch fall-throughs
  mfd: db8500-prcmu: Mark expected switch fall-throughs
  mfd: omap-usb-host: Mark expected switch fall-throughs
  ARM: signal: Mark expected switch fall-through
  watchdog: Mark expected switch fall-throughs
  watchdog: scx200_wdt: Mark expected switch fall-through
  watchdog: wdt977: Mark expected switch fall-through
  crypto: ux500/crypt: Mark expected switch fall-throughs
  s390/net: Mark expected switch fall-throughs
  watchdog: riowd: Mark expected switch fall-through
  video: fbdev: omapfb_main: Mark expected switch fall-throughs
  pcmcia: db1xxx_ss: Mark expected switch fall-throughs
  scsi: fas216: Mark expected switch fall-throughs
  ARM: ep93xx: Mark expected switch fall-through

 arch/arm/kernel/hw_breakpoint.c|  5 +
 arch/arm/kernel/signal.c   |  1 +
 arch/arm/mach-ep93xx/crunch.c  |  1 +
 arch/arm/mach-tegra/reset.c|  2 +-
 arch/arm/mm/alignment.c|  4 +++-
 arch/arm/plat-omap/dma.c   | 14 +-
 drivers/crypto/ux500/cryp/cryp.c   |  6 ++
 drivers/mfd/db8500-prcmu.c |  2 ++
 drivers/mfd/omap-usb-host.c|  4 ++--
 drivers/pcmcia/db1xxx_ss.c |  4 
 drivers/s390/net/ctcm_fsms.c   |  1 +
 drivers/s390/net/ctcm_mpc.c|  3 +++
 drivers/s390/net/qeth_l2_main.c|  2 +-
 drivers/scsi/arm/fas216.c  |  8 
 drivers/video/fbdev/omap/omapfb_main.c |  8 
 drivers/watchdog/ar7_wdt.c |  1 +
 drivers/watchdog/pcwd.c|  2 +-
 drivers/watchdog/riowd.c   |  2 +-
 drivers/watchdog/sb_wdog.c |  1 +
 drivers/watchdog/scx200_wdt.c  |  1 +
 drivers/watchdog/wdt.c |  2 +-
 drivers/watchdog/wdt977.c  |  2 +-
 22 files changed, 58 insertions(+), 18 deletions(-)


Re: [PATCH 4.4 00/21] 4.4.189-stable review

2019-08-09 Thread kernelci.org bot
stable-rc/linux-4.4.y boot: 94 boots: 2 failed, 82 passed with 9 offline, 1 
conflict (v4.4.188-22-gab9a14a0618d)

Full Boot Summary: 
https://kernelci.org/boot/all/job/stable-rc/branch/linux-4.4.y/kernel/v4.4.188-22-gab9a14a0618d/
Full Build Summary: 
https://kernelci.org/build/stable-rc/branch/linux-4.4.y/kernel/v4.4.188-22-gab9a14a0618d/

Tree: stable-rc
Branch: linux-4.4.y
Git Describe: v4.4.188-22-gab9a14a0618d
Git Commit: ab9a14a0618d99ad7e0b7e589a97f3421ac4d662
Git URL: 
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Tested: 46 unique boards, 20 SoC families, 14 builds out of 190

Boot Regressions Detected:

arm:

bcm2835_defconfig:
gcc-8:
  bcm2835-rpi-b:
  lab-baylibre-seattle: failing since 1 day (last pass: 
v4.4.187-23-g462a4b2bd3bf - first fail: v4.4.187-40-geae076a61a51)

sama5_defconfig:
gcc-8:
  at91-sama5d4_xplained:
  lab-baylibre-seattle: failing since 1 day (last pass: 
v4.4.187-23-g462a4b2bd3bf - first fail: v4.4.187-40-geae076a61a51)

arm64:

defconfig:
gcc-8:
  apq8016-sbc:
  lab-baylibre-seattle: failing since 1 day (last pass: 
v4.4.187-23-g462a4b2bd3bf - first fail: v4.4.187-40-geae076a61a51)

Boot Failures Detected:

arm64:
defconfig:
gcc-8:
qcom-qdf2400: 1 failed lab

arm:
multi_v7_defconfig:
gcc-8:
stih410-b2120: 1 failed lab

Offline Platforms:

arm64:

defconfig:
gcc-8
apq8016-sbc: 1 offline lab

arm:

bcm2835_defconfig:
gcc-8
bcm2835-rpi-b: 1 offline lab

sama5_defconfig:
gcc-8
at91-sama5d4_xplained: 1 offline lab

multi_v7_defconfig:
gcc-8
alpine-db: 1 offline lab
at91-sama5d4_xplained: 1 offline lab
bcm4708-smartrg-sr400ac: 1 offline lab
socfpga_cyclone5_de0_sockit: 1 offline lab
sun5i-r8-chip: 1 offline lab

sunxi_defconfig:
gcc-8
sun5i-r8-chip: 1 offline lab

Conflicting Boot Failure Detected: (These likely are not failures as other labs 
are reporting PASS. Needs review.)

x86_64:
x86_64_defconfig:
qemu:
lab-baylibre: PASS (gcc-8)
lab-mhart: PASS (gcc-8)
lab-linaro-lkft: FAIL (gcc-8)
lab-drue: PASS (gcc-8)
lab-collabora: PASS (gcc-8)

---
For more info write to 


Re: [PATCH 2/3] media: vimc: Collapse component structure into a single monolithic driver

2019-08-09 Thread Helen Koike
Hi Shuah,

Thanks for the patch, just some small comments.

On 8/9/19 6:45 PM, Shuah Khan wrote:
> vimc uses Component API to split the driver into functional components.
> The real hardware resembles a monolith structure than component and
> component structure added a level of complexity making it hard to
> maintain without adding any real benefit.
> 
> The sensor is one vimc component that would makes sense to be a separate
> module to closely align with the real hardware. It would be easier to
> collapse vimc into single monolithic driver first and then split the
> sensor off as a separate module.
> 
> Collapse it into a single monolithic driver removing the Component API.
> This patch removes the component API and makes minimal changes to the
> code base preserving the functional division of the code structure.
> Preserving the functional structure allows us to split the sensor off
> as a separate module in the future.
> 
> Major design elements in this change are:
> - Use existing struct vimc_ent_config and struct vimc_pipeline_config
>   to drive the initialization of the functional components.
> - Make vimc_ent_config global by moving it to vimc.h
> - Add two new hooks add and rm to initialize and register, unregister
>   and free subdevs.
> - All component API is now gone and bind and unbind hooks are modified
>   to do "add" and "rm" with minimal changes to just add and rm subdevs.
> - vimc-core's bind and unbind are now register and unregister.
> - vimc-core invokes "add" hooks from its vimc_register_devices().
>   The "add" hooks remain the same and register subdevs. They don't
>   create platform devices of their own and use vimc's pdev.dev as
>   their reference device. The "add" hooks save their vimc_ent_device(s)
>   in the corresponding vimc_ent_config.
> - vimc-core invokes "rm" hooks from its unregister to unregister subdevs
>   and cleanup.
> - vimc-core invokes "add" and "rm" hooks with pointer to struct vimc_device
>   and the corresponding struct vimc_ent_config pointer.
> 
> The following configure and stream test works on all devices.
> 
> media-ctl -d platform:vimc -V '"Sensor A":0[fmt:SBGGR8_1X8/640x480]'
> media-ctl -d platform:vimc -V '"Debayer A":0[fmt:SBGGR8_1X8/640x480]'
> media-ctl -d platform:vimc -V '"Sensor B":0[fmt:SBGGR8_1X8/640x480]'
> media-ctl -d platform:vimc -V '"Debayer B":0[fmt:SBGGR8_1X8/640x480]'
> 
> v4l2-ctl -z platform:vimc -d "RGB/YUV Capture" -v width=1920,height=1440
> v4l2-ctl -z platform:vimc -d "Raw Capture 0" -v pixelformat=BA81
> v4l2-ctl -z platform:vimc -d "Raw Capture 1" -v pixelformat=BA81
> 
> v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video1
> v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video2
> v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video3
> 
> Signed-off-by: Shuah Khan 
> ---
>  drivers/media/platform/vimc/Makefile   |   7 +-
>  drivers/media/platform/vimc/vimc-capture.c |  75 ++
>  drivers/media/platform/vimc/vimc-core.c| 156 -
>  drivers/media/platform/vimc/vimc-debayer.c |  68 ++---
>  drivers/media/platform/vimc/vimc-scaler.c  |  68 ++---
>  drivers/media/platform/vimc/vimc-sensor.c  |  69 ++---
>  drivers/media/platform/vimc/vimc.h |  25 +++-
>  7 files changed, 125 insertions(+), 343 deletions(-)
> 
> diff --git a/drivers/media/platform/vimc/Makefile 
> b/drivers/media/platform/vimc/Makefile
> index 96d06f030c31..a53b2b532e9f 100644
> --- a/drivers/media/platform/vimc/Makefile
> +++ b/drivers/media/platform/vimc/Makefile
> @@ -1,5 +1,6 @@
>  # SPDX-License-Identifier: GPL-2.0
> -vimc-y := vimc-core.o vimc-common.o vimc-streamer.o
> +vimc-y := vimc-core.o vimc-common.o vimc-streamer.o vimc-capture.o \
> + vimc-debayer.o vimc-scaler.o vimc-sensor.o
> +
> +obj-$(CONFIG_VIDEO_VIMC) += vimc.o
>  
> -obj-$(CONFIG_VIDEO_VIMC) += vimc.o vimc-capture.o vimc-debayer.o \
> -vimc-scaler.o vimc-sensor.o
> diff --git a/drivers/media/platform/vimc/vimc-capture.c 
> b/drivers/media/platform/vimc/vimc-capture.c
> index c52fc5d97c2d..b7b2d3c3d4f8 100644
> --- a/drivers/media/platform/vimc/vimc-capture.c
> +++ b/drivers/media/platform/vimc/vimc-capture.c
> @@ -5,10 +5,6 @@
>   * Copyright (C) 2015-2017 Helen Koike 
>   */
>  
> -#include 
> -#include 
> -#include 
> -#include 
>  #include 
>  #include 
>  #include 
> @@ -17,8 +13,6 @@
>  #include "vimc-common.h"
>  #include "vimc-streamer.h"
>  
> -#define VIMC_CAP_DRV_NAME "vimc-capture"
> -
>  static const u32 vimc_cap_supported_pixfmt[] = {
>   V4L2_PIX_FMT_BGR24,
>   V4L2_PIX_FMT_RGB24,
> @@ -348,11 +342,11 @@ static void vimc_cap_release(struct video_device *vdev)
>   kfree(vcap);
>  }
>  
> -static void vimc_cap_comp_unbind(struct device *comp, struct device *master,
> -  void *master_data)
> +void vimc_cap_rm(struct vimc_device *vimc, struct vimc_ent_config *vent)
>  {
> - struct vimc_ent_device *ved = dev_get_drvdata(comp);
> - struct 

Re: [PATCH 0/3] Collapse vimc into single monolithic driver

2019-08-09 Thread Helen Koike
Hi Andre,

Thanks for testing this.

On 8/9/19 9:24 PM, André Almeida wrote:
> On 8/9/19 9:17 PM, Shuah Khan wrote:
>> Hi Andre,
>>
>> On 8/9/19 5:52 PM, André Almeida wrote:
>>> Hello Shuah,
>>>
>>> Thanks for the patch, I did some comments below.
>>>
>>> On 8/9/19 6:45 PM, Shuah Khan wrote:
 vimc uses Component API to split the driver into functional components.
 The real hardware resembles a monolith structure than component and
 component structure added a level of complexity making it hard to
 maintain without adding any real benefit.
  The sensor is one vimc component that would makes sense to be a
 separate
 module to closely align with the real hardware. It would be easier to
 collapse vimc into single monolithic driver first and then split the
 sensor off as a separate module.

 This patch series emoves the component API and makes minimal changes to
 the code base preserving the functional division of the code structure.
 Preserving the functional structure allows us to split the sensor off
 as a separate module in the future.

 Major design elements in this change are:
  - Use existing struct vimc_ent_config and struct
 vimc_pipeline_config
    to drive the initialization of the functional components.
  - Make vimc_ent_config global by moving it to vimc.h
  - Add two new hooks add and rm to initialize and register,
 unregister
    and free subdevs.
  - All component API is now gone and bind and unbind hooks are
 modified
    to do "add" and "rm" with minimal changes to just add and rm
 subdevs.
  - vimc-core's bind and unbind are now register and unregister.
  - vimc-core invokes "add" hooks from its vimc_register_devices().
    The "add" hooks remain the same and register subdevs. They don't
    create platform devices of their own and use vimc's pdev.dev as
    their reference device. The "add" hooks save their
 vimc_ent_device(s)
    in the corresponding vimc_ent_config.
  - vimc-core invokes "rm" hooks from its unregister to unregister
 subdevs
    and cleanup.
  - vimc-core invokes "add" and "rm" hooks with pointer to struct
 vimc_device
    and the corresponding struct vimc_ent_config pointer.
  The following configure and stream test works on all devices.
   media-ctl -d platform:vimc -V '"Sensor
 A":0[fmt:SBGGR8_1X8/640x480]'
  media-ctl -d platform:vimc -V '"Debayer
 A":0[fmt:SBGGR8_1X8/640x480]'
  media-ctl -d platform:vimc -V '"Sensor
 B":0[fmt:SBGGR8_1X8/640x480]'
  media-ctl -d platform:vimc -V '"Debayer
 B":0[fmt:SBGGR8_1X8/640x480]'
   v4l2-ctl -z platform:vimc -d "RGB/YUV Capture" -v
 width=1920,height=1440
  v4l2-ctl -z platform:vimc -d "Raw Capture 0" -v pixelformat=BA81
  v4l2-ctl -z platform:vimc -d "Raw Capture 1" -v pixelformat=BA81
   v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video1
  v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video2
  v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video3

 The third patch in the series fixes a general protection fault found
 when rmmod is done while stream is active.
>>>
>>> I applied your patch on top of media_tree/master and I did some testing.
>>> Not sure if I did something wrong, but just adding and removing the
>>> module generated a kernel panic:
>>
>> Thanks for testing.
>>
>> Odd. I tested modprobe and rmmod both.I was working on Linux 5.3-rc2.
>> I will apply these to media latest and work from there. I have to
>> rebase these on top of the reverts from Lucas and Helen
> 
> Ok, please let me know if I succeeded to reproduce.
> 
>>>
>>> ~# modprobe vimc
>>> ~# rmmod vimc
>>> [   16.452974] stack segment:  [#1] SMP PTI
>>> [   16.453688] CPU: 0 PID: 2038 Comm: rmmod Not tainted 5.3.0-rc2+ #36
>>> [   16.454678] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
>>> BIOS 1.12.0-20181126_142135-anatol 04/01/2014
>>> [   16.456191] RIP: 0010:kfree+0x4d/0x240
>>>
>>> 
>>>
>>> [   16.469188] Call Trace:
>>> [   16.469666]  vimc_remove+0x35/0x90 [vimc]
>>> [   16.470436]  platform_drv_remove+0x1f/0x40
>>> [   16.471233]  device_release_driver_internal+0xd3/0x1b0
>>> [   16.472184]  driver_detach+0x37/0x6b
>>> [   16.472882]  bus_remove_driver+0x50/0xc1
>>> [   16.473569]  vimc_exit+0xc/0xca0 [vimc]
>>> [   16.474231]  __x64_sys_delete_module+0x18d/0x240
>>> [   16.475036]  do_syscall_64+0x43/0x110
>>> [   16.475656]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>> [   16.476504] RIP: 0033:0x7fceb8dafa4b
>>>
>>> 
>>>
>>> [   16.484853] Modules linked in: vimc(-) videobuf2_vmalloc
>>> videobuf2_memops v4l2_tpg videobuf2_v4l2 videobuf2_common
>>> [   16.486187] ---[ end trace 91e5e0894e254d49 ]---
>>> [   16.486758] RIP: 0010:kfree+0x4d/0x240
>>>
>>> 
>>>
>>> 

Re: [PATCH RFC v1 1/2] rcu/tree: Add basic support for kfree_rcu batching

2019-08-09 Thread Joel Fernandes
On Fri, Aug 09, 2019 at 08:40:27PM -0700, Paul E. McKenney wrote:
[snip]
> > > In contrast, a heavy duty userspace-driven workload would transition to
> > > and from userspace for each kfree_rcu(), and that would increment the
> > > dyntick-idle count on each transition to and from userspace.  Adding the
> > > rcu_momentary_dyntick_idle() emulates a pair of such transitions.
> > 
> > But even if we're in kernel mode and not transitioning, I thought the FQS
> > loop (rcu_implicit_dynticks_qs() function) would set need_heavy_qs to true 
> > at
> > 2 * jiffies_to_sched_qs.
> > 
> > Hmm, I forgot that jiffies_to_sched_qs can be quite large I guess. You're
> > right, we could call rcu_momentary_dyntick_idle() in advance before waiting
> > for FQS loop to do the setting of need_heavy_qs.
> > 
> > Or, am I missing something with the rcu_momentary_dyntick_idle() point you
> > made?
> 
> The trick is that rcu_momentary_dyntick_idle() directly increments the
> CPU's dyntick counter, so that the next FQS loop will note that the CPU
> passed through a quiescent state.  No need for need_heavy_qs in this case.

Yes, that's what I also understand. Thanks for confirming,

 - Joel




Re: [PATCH RFC v1 1/2] rcu/tree: Add basic support for kfree_rcu batching

2019-08-09 Thread Paul E. McKenney
On Fri, Aug 09, 2019 at 05:36:43PM -0400, Joel Fernandes wrote:
> On Fri, Aug 09, 2019 at 01:42:17PM -0700, Paul E. McKenney wrote:
> > > Also, I can go back to 500M if I just keep KFREE_DRAIN_JIFFIES at HZ/50. 
> > > So I
> > > am quite happy about that. I think I can declare that the "let list grow
> > > indefinitely" design works quite well even with an insanely heavily loaded
> > > case of every CPU in a 16CPU system with 500M memory, indefinitely doing
> > > kfree_rcu()in a tight loop with appropriate cond_resched(). And I am like
> > > thinking - wow how does this stuff even work at such insane scales :-D
> > 
> > A lot of work by a lot of people over a long period of time.  On their
> > behalf, I thank you for the implied compliment.  So once this patch gets
> > in, perhaps you will have complimented yourself as well.  ;-)
> > 
> > But more work is needed, and will continue to be as new workloads,
> > compiler optimizations, and hardware appears.  And it would be good to
> > try this on a really big system at some point.
> 
> Cool!
> 
> > > > > > o   Along with the above boot parameter, use "rcutree.use_softirq=0"
> > > > > > to cause RCU to use kthreads instead of softirq.  (You might 
> > > > > > well
> > > > > > find issues in priority setting as well, but might as well find
> > > > > > them now if so!)
> > > > > 
> > > > > Doesn't think one actually reduce the priority of the core RCU work? 
> > > > > softirq
> > > > > will always have higher priority than any there. So wouldn't that 
> > > > > have the
> > > > > effect of not reclaiming things fast enough? (Or, in my case not 
> > > > > scheduling
> > > > > the rcu_work which does the reclaim).
> > > > 
> > > > For low kfree_rcu() loads, yes, it increases overhead due to the need
> > > > for context switches instead of softirq running at the tail end of an
> > > > interrupt.  But for high kfree_rcu() loads, it gets you realtime 
> > > > priority
> > > > (in conjunction with "rcutree.kthread_prio=", that is).
> > > 
> > > I meant for high kfree_rcu() loads, a softirq context executing RCU 
> > > callback
> > > is still better from the point of view of the callback running because the
> > > softirq will run above all else (higher than the highest priority task) so
> > > use_softirq=0 would be a down grade from that perspective if something 
> > > higher
> > > than rcutree.kthread_prio is running on the CPU. So unless kthread_prio is
> > > set to the highest prio, then softirq running would work better. Did I 
> > > miss
> > > something?
> > 
> > Under heavy load, softirq stops running at the tail end of interrupts and
> > is instead run within the context of a per-CPU ksoftirqd kthread.  At normal
> > SCHED_OTHER priority.
> 
> Ah, yes. Agreed!
> 
> > > > > > o   With any of the above, invoke rcu_momentary_dyntick_idle() along
> > > > > > with cond_resched() in your kfree_rcu() loop.  This simulates
> > > > > > a trip to userspace for nohz_full CPUs, so if this helps for
> > > > > > non-nohz_full CPUs, adjustments to the kernel might be called 
> > > > > > for.
> > > 
> > > I did not try this yet. But I am thinking why would this help in nohz_idle
> > > case? In nohz_idle we already have the tick active when CPU is idle. I 
> > > guess
> > > it is because there may be a long time that elapses before
> > > rcu_data.rcu_need_heavy_qs == true ?
> > 
> > Under your heavy rcuperf load, none of the CPUs would ever be idle.  Nor
> > would they every be in nohz_full userspace context, either.
> 
> Sorry I made a typo, I meant 'tick active when CPU is non-idle for NOHZ_IDLE
> systems' above.
> 
> > In contrast, a heavy duty userspace-driven workload would transition to
> > and from userspace for each kfree_rcu(), and that would increment the
> > dyntick-idle count on each transition to and from userspace.  Adding the
> > rcu_momentary_dyntick_idle() emulates a pair of such transitions.
> 
> But even if we're in kernel mode and not transitioning, I thought the FQS
> loop (rcu_implicit_dynticks_qs() function) would set need_heavy_qs to true at
> 2 * jiffies_to_sched_qs.
> 
> Hmm, I forgot that jiffies_to_sched_qs can be quite large I guess. You're
> right, we could call rcu_momentary_dyntick_idle() in advance before waiting
> for FQS loop to do the setting of need_heavy_qs.
> 
> Or, am I missing something with the rcu_momentary_dyntick_idle() point you
> made?

The trick is that rcu_momentary_dyntick_idle() directly increments the
CPU's dyntick counter, so that the next FQS loop will note that the CPU
passed through a quiescent state.  No need for need_heavy_qs in this case.

Thanx, Paul

> thanks,
> 
>  - Joel
> 
> 
> > 
> > Thanx, Paul
> > 
> > > > > Ok, will try it.
> > > > > 
> > > > > Save these bullet points for future reference! ;-)  thanks,
> > > > 
> > > > I guess this is helping me to prepare for Plumbers.  ;-)
> > > 

Re: [PATCH RFC v1 1/2] rcu/tree: Add basic support for kfree_rcu batching

2019-08-09 Thread Paul E. McKenney
On Fri, Aug 09, 2019 at 10:42:32PM -0400, Joel Fernandes wrote:
> On Wed, Aug 07, 2019 at 10:52:15AM -0700, Paul E. McKenney wrote:
> [snip] 
> > > > > @@ -3459,6 +3645,8 @@ void __init rcu_init(void)
> > > > >  {
> > > > >   int cpu;
> > > > >  
> > > > > + kfree_rcu_batch_init();
> > > > 
> > > > What happens if someone does a kfree_rcu() before this point?  It looks
> > > > like it should work, but have you tested it?
> > > > 
> > > > >   rcu_early_boot_tests();
> > > > 
> > > > For example, by testing it in rcu_early_boot_tests() and moving the
> > > > call to kfree_rcu_batch_init() here.
> > > 
> > > I have not tried to do the kfree_rcu() this early. I will try it out.
> > 
> > Yeah, well, call_rcu() this early came as a surprise to me back in the
> > day, so...  ;-)
> 
> I actually did get surprised as well!
> 
> It appears the timers are not fully initialized so the really early
> kfree_rcu() call from rcu_init() does cause a splat about an initialized
> timer spinlock (even though future kfree_rcu()s and the system are working
> fine all the way into the torture tests).
> 
> I think to resolve this, we can just not do batching until early_initcall,
> during which I have an initialization function which switches batching on.
> >From that point it is safe.

Just go ahead and batch, but don't bother with the timer until
after single-threaded boot is done.  For example, you could check
rcu_scheduler_active similar to how sync_rcu_exp_select_cpus() does.
(See kernel/rcu/tree_exp.h.)

If needed, use an early_initcall() to handle the case where early boot
kfree_rcu() calls needed to set the timer but could not.

> Below is the diff on top of this patch, I think this should be good but let
> me know if anything looks odd to you. I tested it and it works.

Keep in mind that a call_rcu() callback can't possibly be invoked until
quite some time after the scheduler is up and running.  So it will be
a lot simpler to just skip setting the timer during early boot.

Thanx, Paul

> have a great weekend! thanks,
> -Joel
> 
> ---8<---
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index a09ef81a1a4f..358f5c065fa4 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -2634,6 +2634,7 @@ struct kfree_rcu_cpu {
>  };
>  
>  static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
> +int kfree_rcu_batching_ready;
>  
>  /*
>   * This function is invoked in workqueue context after a grace period.
> @@ -2742,6 +2743,17 @@ static void kfree_rcu_monitor(struct work_struct *work)
>   spin_unlock_irqrestore(>lock, flags);
>  }
>  
> +/*
> + * This version of kfree_call_rcu does not do batching of kfree_rcu() 
> requests.
> + * Used only by rcuperf torture test for comparison with kfree_rcu_batch()
> + * or during really early init.
> + */
> +void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
> +{
> + __call_rcu(head, func, -1, 1);
> +}
> +EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch);
> +
>  /*
>   * Queue a request for lazy invocation of kfree() after a grace period.
>   *
> @@ -2764,6 +2775,10 @@ void kfree_call_rcu(struct rcu_head *head, 
> rcu_callback_t func)
>   unsigned long flags;
>   struct kfree_rcu_cpu *krcp;
>   bool monitor_todo;
> + static int once;
> +
> + if (!READ_ONCE(kfree_rcu_batching_ready))
> + return kfree_call_rcu_nobatch(head, func);
>  
>   local_irq_save(flags);
>   krcp = this_cpu_ptr();
> @@ -2794,16 +2809,6 @@ void kfree_call_rcu(struct rcu_head *head, 
> rcu_callback_t func)
>  }
>  EXPORT_SYMBOL_GPL(kfree_call_rcu);
>  
> -/*
> - * This version of kfree_call_rcu does not do batching of kfree_rcu() 
> requests.
> - * Used only by rcuperf torture test for comparison with kfree_rcu_batch().
> - */
> -void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
> -{
> - __call_rcu(head, func, -1, 1);
> -}
> -EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch);
> -
>  /*
>   * During early boot, any blocking grace-period wait automatically
>   * implies a grace period.  Later on, this is never the case for PREEMPT.
> @@ -3650,17 +3655,6 @@ static void __init rcu_dump_rcu_node_tree(void)
>   pr_cont("\n");
>  }
>  
> -void kfree_rcu_batch_init(void)
> -{
> - int cpu;
> -
> - for_each_possible_cpu(cpu) {
> - struct kfree_rcu_cpu *krcp = per_cpu_ptr(, cpu);
> - spin_lock_init(>lock);
> - INIT_DELAYED_WORK(>monitor_work, kfree_rcu_monitor);
> - }
> -}
> -
>  struct workqueue_struct *rcu_gp_wq;
>  struct workqueue_struct *rcu_par_gp_wq;
>  
> @@ -3668,8 +3662,6 @@ void __init rcu_init(void)
>  {
>   int cpu;
>  
> - kfree_rcu_batch_init();
> -
>   rcu_early_boot_tests();
>  
>   rcu_bootup_announce();
> @@ -3700,6 +3692,21 @@ void __init rcu_init(void)
>   srcu_init();
>  }
>  
> +static int __init kfree_rcu_batch_init(void)
> +{
> + int cpu;
> +
> + 

Re: [PATCH RFC v1 1/2] rcu/tree: Add basic support for kfree_rcu batching

2019-08-09 Thread Paul E. McKenney
On Fri, Aug 09, 2019 at 05:25:12PM -0400, Joel Fernandes wrote:
> On Fri, Aug 09, 2019 at 04:26:45PM -0400, Joel Fernandes wrote:
> > On Fri, Aug 09, 2019 at 04:22:26PM -0400, Joel Fernandes wrote:
> > > On Fri, Aug 09, 2019 at 09:33:46AM -0700, Paul E. McKenney wrote:
> > > > On Fri, Aug 09, 2019 at 11:39:24AM -0400, Joel Fernandes wrote:
> > > > > On Fri, Aug 09, 2019 at 08:16:19AM -0700, Paul E. McKenney wrote:
> > > > > > On Thu, Aug 08, 2019 at 07:30:14PM -0400, Joel Fernandes wrote:
> > > > > [snip]
> > > > > > > > But I could make it something like:
> > > > > > > > 1. Letting ->head grow if ->head_free busy
> > > > > > > > 2. If head_free is busy, then just queue/requeue the monitor to 
> > > > > > > > try again.
> > > > > > > > 
> > > > > > > > This would even improve performance, but will still risk going 
> > > > > > > > out of memory.
> > > > > > > 
> > > > > > > It seems I can indeed hit an out of memory condition once I 
> > > > > > > changed it to
> > > > > > > "letting list grow" (diff is below which applies on top of this 
> > > > > > > patch) while
> > > > > > > at the same time removing the schedule_timeout(2) and replacing 
> > > > > > > it with
> > > > > > > cond_resched() in the rcuperf test.  I think the reason is the 
> > > > > > > rcuperf test
> > > > > > > starves the worker threads that are executing in workqueue 
> > > > > > > context after a
> > > > > > > grace period and those are unable to get enough CPU time to kfree 
> > > > > > > things fast
> > > > > > > enough. But I am not fully sure about it and need to test/trace 
> > > > > > > more to
> > > > > > > figure out why this is happening.
> > > > > > > 
> > > > > > > If I add back the schedule_uninterruptibe_timeout(2) call, the 
> > > > > > > out of memory
> > > > > > > situation goes away.
> > > > > > > 
> > > > > > > Clearly we need to do more work on this patch.
> > > > > > > 
> > > > > > > In the regular kfree_rcu_no_batch() case, I don't hit this issue. 
> > > > > > > I believe
> > > > > > > that since the kfree is happening in softirq context in the 
> > > > > > > _no_batch() case,
> > > > > > > it fares better. The question then I guess is how do we run the 
> > > > > > > rcu_work in a
> > > > > > > higher priority context so it is not starved and runs often 
> > > > > > > enough. I'll
> > > > > > > trace more.
> > > > > > > 
> > > > > > > Perhaps I can also lower the priority of the rcuperf threads to 
> > > > > > > give the
> > > > > > > worker thread some more room to run and see if anything changes. 
> > > > > > > But I am not
> > > > > > > sure then if we're preparing the code for the real world with such
> > > > > > > modifications.
> > > > > > > 
> > > > > > > Any thoughts?
> > > > > > 
> > > > > > Several!  With luck, perhaps some are useful.  ;-)
> > > > > > 
> > > > > > o   Increase the memory via kvm.sh "--memory 1G" or more.  The
> > > > > > default is "--memory 500M".
> > > > > 
> > > > > Thanks, this definitely helped.
> > > 
> > > Also, I can go back to 500M if I just keep KFREE_DRAIN_JIFFIES at HZ/50. 
> > > So I
> > > am quite happy about that. I think I can declare that the "let list grow
> > > indefinitely" design works quite well even with an insanely heavily loaded
> > > case of every CPU in a 16CPU system with 500M memory, indefinitely doing
> > > kfree_rcu()in a tight loop with appropriate cond_resched(). And I am like
> > > thinking - wow how does this stuff even work at such insane scales :-D
> > 
> > Oh, and I should probably also count whether there are any 'total number of
> > grace periods' reduction, due to the batching!
>  
> And, the number of grace periods did dramatically drop (by 5X) with the
> batching!! I have modified the rcuperf test to show the number of grace
> periods that elapsed during the test.

Very good!  Batching for the win!  ;-)

Thanx, Paul



Re: [GIT PULL] Kbuild fixes for v5.3-rc4

2019-08-09 Thread pr-tracker-bot
The pull request you sent on Sat, 10 Aug 2019 11:10:22 +0900:

> git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git 
> tags/kbuild-fixes-v5.3-3

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/451577f3e3a9bf1861218641dbbf98e214e77851

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker


Re: [PATCH] RISC-V: Issue a local tlb flush if possible.

2019-08-09 Thread Anup Patel
On Sat, Aug 10, 2019 at 7:13 AM Atish Patra  wrote:
>
> In RISC-V, tlb flush happens via SBI which is expensive.
> If the target cpumask contains a local hartid, some cost
> can be saved by issuing a local tlb flush as we do that
> in OpenSBI anyways.
>
> Signed-off-by: Atish Patra 
> ---
>  arch/riscv/include/asm/tlbflush.h | 33 +++
>  1 file changed, 29 insertions(+), 4 deletions(-)
>
> diff --git a/arch/riscv/include/asm/tlbflush.h 
> b/arch/riscv/include/asm/tlbflush.h
> index 687dd19735a7..b32ba4fa5888 100644
> --- a/arch/riscv/include/asm/tlbflush.h
> +++ b/arch/riscv/include/asm/tlbflush.h
> @@ -8,6 +8,7 @@
>  #define _ASM_RISCV_TLBFLUSH_H
>
>  #include 
> +#include 
>  #include 
>
>  /*
> @@ -46,14 +47,38 @@ static inline void remote_sfence_vma(struct cpumask 
> *cmask, unsigned long start,
>  unsigned long size)
>  {
> struct cpumask hmask;
> +   struct cpumask tmask;
> +   int cpuid = smp_processor_id();
>
> cpumask_clear();
> -   riscv_cpuid_to_hartid_mask(cmask, );
> -   sbi_remote_sfence_vma(hmask.bits, start, size);
> +   cpumask_clear();
> +
> +   if (cmask)
> +   cpumask_copy(, cmask);
> +   else
> +   cpumask_copy(, cpu_online_mask);

This can be further simplified.

We can totally avoid tmask, cpumask_copy(), and cpumask_clear()
by directly updating hmask.

In addition to this patch, we should also handle the case of
empty hart mask in OpenSBI.

> +
> +   if (cpumask_test_cpu(cpuid, )) {
> +   /* Save trap cost by issuing a local tlb flush here */
> +   if ((start == 0 && size == -1) || (size > PAGE_SIZE))
> +   local_flush_tlb_all();
> +   else if (size == PAGE_SIZE)
> +   local_flush_tlb_page(start);
> +   cpumask_clear_cpu(cpuid, );
> +   } else if (cpumask_empty()) {
> +   /* cpumask is empty. So just do a local flush */
> +   local_flush_tlb_all();
> +   return;
> +   }
> +
> +   if (!cpumask_empty()) {
> +   riscv_cpuid_to_hartid_mask(, );
> +   sbi_remote_sfence_vma(hmask.bits, start, size);
> +   }
>  }
>
> -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
> -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
> +#define flush_tlb_all() remote_sfence_vma(NULL, 0, -1)
> +#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, (addr) + 
> PAGE_SIZE)
>  #define flush_tlb_range(vma, start, end) \
> remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start))
>  #define flush_tlb_mm(mm) \
> --
> 2.21.0
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

Regards,
Anup


Re: [PATCH 1/3] media: vimc: move private defines to a common header

2019-08-09 Thread Helen Koike
Hi Shuah,

Thanks for the patch.

On 8/9/19 6:45 PM, Shuah Khan wrote:
> In preparation for collapsing the component driver structure into
> a monolith, move private device structure defines to a new common
> header file.
> 
> Signed-off-by: Shuah Khan 
> ---
>  drivers/media/platform/vimc/vimc-capture.c |  21 +
>  drivers/media/platform/vimc/vimc-core.c|  18 +---
>  drivers/media/platform/vimc/vimc-debayer.c |  16 +---
>  drivers/media/platform/vimc/vimc-scaler.c  |  15 +--
>  drivers/media/platform/vimc/vimc-sensor.c  |  13 +--
>  drivers/media/platform/vimc/vimc.h | 102 +
>  6 files changed, 107 insertions(+), 78 deletions(-)
>  create mode 100644 drivers/media/platform/vimc/vimc.h
> 
> diff --git a/drivers/media/platform/vimc/vimc-capture.c 
> b/drivers/media/platform/vimc/vimc-capture.c
> index 664855708fdf..c52fc5d97c2d 100644
> --- a/drivers/media/platform/vimc/vimc-capture.c
> +++ b/drivers/media/platform/vimc/vimc-capture.c
> @@ -13,6 +13,7 @@
>  #include 
>  #include 
>  
> +#include "vimc.h"
>  #include "vimc-common.h"
>  #include "vimc-streamer.h"
>  
> @@ -44,26 +45,6 @@ static const u32 vimc_cap_supported_pixfmt[] = {
>   V4L2_PIX_FMT_SRGGB12,
>  };
>  
> -struct vimc_cap_device {
> - struct vimc_ent_device ved;
> - struct video_device vdev;
> - struct device *dev;
> - struct v4l2_pix_format format;
> - struct vb2_queue queue;
> - struct list_head buf_list;
> - /*
> -  * NOTE: in a real driver, a spin lock must be used to access the
> -  * queue because the frames are generated from a hardware interruption
> -  * and the isr is not allowed to sleep.
> -  * Even if it is not necessary a spinlock in the vimc driver, we
> -  * use it here as a code reference
> -  */
> - spinlock_t qlock;
> - struct mutex lock;
> - u32 sequence;
> - struct vimc_stream stream;
> -};
> -
>  static const struct v4l2_pix_format fmt_default = {
>   .width = 640,
>   .height = 480,
> diff --git a/drivers/media/platform/vimc/vimc-core.c 
> b/drivers/media/platform/vimc/vimc-core.c
> index 571c55aa0e16..c9b351472272 100644
> --- a/drivers/media/platform/vimc/vimc-core.c
> +++ b/drivers/media/platform/vimc/vimc-core.c
> @@ -12,6 +12,7 @@
>  #include 
>  #include 
>  
> +#include "vimc.h"
>  #include "vimc-common.h"
>  
>  #define VIMC_MDEV_MODEL_NAME "VIMC MDEV"
> @@ -24,23 +25,6 @@
>   .flags = link_flags,\
>  }
>  
> -struct vimc_device {
> - /* The platform device */
> - struct platform_device pdev;
> -
> - /* The pipeline configuration */
> - const struct vimc_pipeline_config *pipe_cfg;
> -
> - /* The Associated media_device parent */
> - struct media_device mdev;
> -
> - /* Internal v4l2 parent device*/
> - struct v4l2_device v4l2_dev;
> -
> - /* Subdevices */
> - struct platform_device **subdevs;
> -};
> -
>  /* Structure which describes individual configuration for each entity */
>  struct vimc_ent_config {
>   const char *name;
> diff --git a/drivers/media/platform/vimc/vimc-debayer.c 
> b/drivers/media/platform/vimc/vimc-debayer.c
> index 00598fbf3cba..750752bb173c 100644
> --- a/drivers/media/platform/vimc/vimc-debayer.c
> +++ b/drivers/media/platform/vimc/vimc-debayer.c
> @@ -13,6 +13,7 @@
>  #include 
>  #include 
>  
> +#include "vimc.h"
>  #include "vimc-common.h"
>  
>  #define VIMC_DEB_DRV_NAME "vimc-debayer"
> @@ -44,21 +45,6 @@ struct vimc_deb_pix_map {
>   enum vimc_deb_rgb_colors order[2][2];
>  };
>  
> -struct vimc_deb_device {
> - struct vimc_ent_device ved;
> - struct v4l2_subdev sd;
> - struct device *dev;
> - /* The active format */
> - struct v4l2_mbus_framefmt sink_fmt;
> - u32 src_code;
> - void (*set_rgb_src)(struct vimc_deb_device *vdeb, unsigned int lin,
> - unsigned int col, unsigned int rgb[3]);
> - /* Values calculated when the stream starts */
> - u8 *src_frame;
> - const struct vimc_deb_pix_map *sink_pix_map;
> - unsigned int sink_bpp;
> -};
> -
>  static const struct v4l2_mbus_framefmt sink_fmt_default = {
>   .width = 640,
>   .height = 480,
> diff --git a/drivers/media/platform/vimc/vimc-scaler.c 
> b/drivers/media/platform/vimc/vimc-scaler.c
> index c7123a45c55b..fe99b9102ada 100644
> --- a/drivers/media/platform/vimc/vimc-scaler.c
> +++ b/drivers/media/platform/vimc/vimc-scaler.c
> @@ -13,6 +13,7 @@
>  #include 
>  #include 
>  
> +#include "vimc.h"
>  #include "vimc-common.h"
>  
>  #define VIMC_SCA_DRV_NAME "vimc-scaler"
> @@ -31,20 +32,6 @@ static const u32 vimc_sca_supported_pixfmt[] = {
>   V4L2_PIX_FMT_ARGB32,
>  };
>  
> -struct vimc_sca_device {
> - struct vimc_ent_device ved;
> - struct v4l2_subdev sd;
> - struct device *dev;
> - /* NOTE: the source fmt is the same as the sink
> -  * with the width and hight multiplied by mult
> -  */
> - struct v4l2_mbus_framefmt 

Re: [PATCH v9 0/7] Solve postboot supplier cleanup and optimize probe ordering

2019-08-09 Thread Frank Rowand
Hi Saravana,

On 7/31/19 3:17 PM, Saravana Kannan wrote:
> Add device-links to track functional dependencies between devices
> after they are created (but before they are probed) by looking at
> their common DT bindings like clocks, interconnects, etc.
> 
> Having functional dependencies automatically added before the devices
> are probed, provides the following benefits:
> 
> - Optimizes device probe order and avoids the useless work of
>   attempting probes of devices that will not probe successfully
>   (because their suppliers aren't present or haven't probed yet).
> 
>   For example, in a commonly available mobile SoC, registering just
>   one consumer device's driver at an initcall level earlier than the
>   supplier device's driver causes 11 failed probe attempts before the
>   consumer device probes successfully. This was with a kernel with all
>   the drivers statically compiled in. This problem gets a lot worse if
>   all the drivers are loaded as modules without direct symbol
>   dependencies.
> 
> - Supplier devices like clock providers, interconnect providers, etc
>   need to keep the resources they provide active and at a particular
>   state(s) during boot up even if their current set of consumers don't
>   request the resource to be active. This is because the rest of the
>   consumers might not have probed yet and turning off the resource
>   before all the consumers have probed could lead to a hang or
>   undesired user experience.
> 
>   Some frameworks (Eg: regulator) handle this today by turning off
>   "unused" resources at late_initcall_sync and hoping all the devices
>   have probed by then. This is not a valid assumption for systems with
>   loadable modules. Other frameworks (Eg: clock) just don't handle
>   this due to the lack of a clear signal for when they can turn off
>   resources. This leads to downstream hacks to handle cases like this
>   that can easily be solved in the upstream kernel.
> 
>   By linking devices before they are probed, we give suppliers a clear
>   count of the number of dependent consumers. Once all of the
>   consumers are active, the suppliers can turn off the unused
>   resources without making assumptions about the number of consumers.
> 
> By default we just add device-links to track "driver presence" (probe
> succeeded) of the supplier device. If any other functionality provided
> by device-links are needed, it is left to the consumer/supplier
> devices to change the link when they probe.
> 
> v1 -> v2:
> - Drop patch to speed up of_find_device_by_node()
> - Drop depends-on property and use existing bindings
> 
> v2 -> v3:
> - Refactor the code to have driver core initiate the linking of devs
> - Have driver core link consumers to supplier before it's probed
> - Add support for drivers to edit the device links before probing
> 
> v3 -> v4:
> - Tested edit_links() on system with cyclic dependency. Works.
> - Added some checks to make sure device link isn't attempted from
>   parent device node to child device node.
> - Added way to pause/resume sync_state callbacks across
>   of_platform_populate().
> - Recursively parse DT node to create device links from parent to
>   suppliers of parent and all child nodes.
> 
> v4 -> v5:
> - Fixed copy-pasta bugs with linked list handling
> - Walk up the phandle reference till I find an actual device (needed
>   for regulators to work)
> - Added support for linking devices from regulator DT bindings
> - Tested the whole series again to make sure cyclic dependencies are
>   broken with edit_links() and regulator links are created properly.
> 
> v5 -> v6:
> - Split, squashed and reordered some of the patches.
> - Refactored the device linking code to follow the same code pattern for
>   any property.
> 
> v6 -> v7:
> - No functional changes.
> - Renamed i to index
> - Added comment to clarify not having to check property name for every
>   index
> - Added "matched" variable to clarify code. No functional change.
> - Added comments to include/linux/device.h for add_links()
> 
> v7 -> v8:
> - Rebased on top of linux-next to handle device link changes in [1]
> 


> v8 -> v9:
> - Fixed kbuild test bot reported errors (docs and const)

Some maintainers have strong opinions about whether change logs should be:

  (1) only in patch 0
  (2) only in the specific patches that are changed
  (3) both in patch 0 and in the specific patches that are changed.

I can adapt to any of the three styles.  But for style "(1)" please
list which specific patch has changed for each item in the change list.

-Frank


> 
> [1] - https://lore.kernel.org/lkml/2305283.AStDPdUUnE@kreacher/
> 
> -Saravana
> 
> 
> Saravana Kannan (7):
>   driver core: Add support for linking devices during device addition
>   driver core: Add edit_links() callback for drivers
>   of/platform: Add functional dependency link from DT bindings
>   driver core: Add sync_state driver/bus callback
>   of/platform: Pause/resume sync state during init and
> 

Re: [PATCH] sh: Drop -Werror from kernel Makefile

2019-08-09 Thread Gustavo A. R. Silva
Guenter,

On 8/9/19 4:56 PM, Guenter Roeck wrote:
> On Fri, Aug 09, 2019 at 04:36:01PM -0500, Gustavo A. R. Silva wrote:
>> Hi Guenter,
>>
>> On 8/9/19 2:56 PM, Guenter Roeck wrote:
>>> On Sun, Aug 04, 2019 at 11:24:41PM -0400, Rich Felker wrote:
 On Sun, Aug 04, 2019 at 07:14:23PM -0700, Guenter Roeck wrote:
> Since commit a035d552a93b ("Makefile: Globally enable fall-through
> warning"), all sh builds fail with errors such as
>
> arch/sh/kernel/disassemble.c: In function 'print_sh_insn':
> arch/sh/kernel/disassemble.c:478:8: error: this statement may fall through
>
> Since this effectively disables all build and boot tests for the
> architecture, let's drop -Werror from the sh kernel Makefile until
> the problems are fixed.
>
> Cc: Gustavo A. R. Silva 
> Signed-off-by: Guenter Roeck 

 Acked-by: Rich Felker 

>>> Any chance to get this or a similar patch applied soon ? All sh builds
>>> in mainline and -next are still broken.
>>>
>>
>> If no one cares, I can add it to my tree and include it in my pull-request
>> for 5.3-rc4.
>>
>> I would just need your Tested-by.
>>
> 
> Sure:
> 
> Tested-by: Guenter Roeck 
> 
> [ Applied to ToT and built sh:{defconfig,allnoconfig,tinyconfig} ]
> 

On second thought it seems to me that this is not a good idea, at least
for mainline. For the time being I'll take this patch for linux-next only.

Who is the maintainer of sh?

The best solution is to fix those fall-through warnings you see. Could you
please send me all the warnings you see? I can try to fix them.

Thanks!
--
Gustavo


Re: [PATCH RFC v1 1/2] rcu/tree: Add basic support for kfree_rcu batching

2019-08-09 Thread Joel Fernandes
On Wed, Aug 07, 2019 at 10:52:15AM -0700, Paul E. McKenney wrote:
[snip] 
> > > > @@ -3459,6 +3645,8 @@ void __init rcu_init(void)
> > > >  {
> > > > int cpu;
> > > >  
> > > > +   kfree_rcu_batch_init();
> > > 
> > > What happens if someone does a kfree_rcu() before this point?  It looks
> > > like it should work, but have you tested it?
> > > 
> > > > rcu_early_boot_tests();
> > > 
> > > For example, by testing it in rcu_early_boot_tests() and moving the
> > > call to kfree_rcu_batch_init() here.
> > 
> > I have not tried to do the kfree_rcu() this early. I will try it out.
> 
> Yeah, well, call_rcu() this early came as a surprise to me back in the
> day, so...  ;-)

I actually did get surprised as well!

It appears the timers are not fully initialized so the really early
kfree_rcu() call from rcu_init() does cause a splat about an initialized
timer spinlock (even though future kfree_rcu()s and the system are working
fine all the way into the torture tests).

I think to resolve this, we can just not do batching until early_initcall,
during which I have an initialization function which switches batching on.
>From that point it is safe.

Below is the diff on top of this patch, I think this should be good but let
me know if anything looks odd to you. I tested it and it works.

have a great weekend! thanks,
-Joel

---8<---

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a09ef81a1a4f..358f5c065fa4 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2634,6 +2634,7 @@ struct kfree_rcu_cpu {
 };
 
 static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
+int kfree_rcu_batching_ready;
 
 /*
  * This function is invoked in workqueue context after a grace period.
@@ -2742,6 +2743,17 @@ static void kfree_rcu_monitor(struct work_struct *work)
spin_unlock_irqrestore(>lock, flags);
 }
 
+/*
+ * This version of kfree_call_rcu does not do batching of kfree_rcu() requests.
+ * Used only by rcuperf torture test for comparison with kfree_rcu_batch()
+ * or during really early init.
+ */
+void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
+{
+   __call_rcu(head, func, -1, 1);
+}
+EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch);
+
 /*
  * Queue a request for lazy invocation of kfree() after a grace period.
  *
@@ -2764,6 +2775,10 @@ void kfree_call_rcu(struct rcu_head *head, 
rcu_callback_t func)
unsigned long flags;
struct kfree_rcu_cpu *krcp;
bool monitor_todo;
+   static int once;
+
+   if (!READ_ONCE(kfree_rcu_batching_ready))
+   return kfree_call_rcu_nobatch(head, func);
 
local_irq_save(flags);
krcp = this_cpu_ptr();
@@ -2794,16 +2809,6 @@ void kfree_call_rcu(struct rcu_head *head, 
rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 
-/*
- * This version of kfree_call_rcu does not do batching of kfree_rcu() requests.
- * Used only by rcuperf torture test for comparison with kfree_rcu_batch().
- */
-void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
-{
-   __call_rcu(head, func, -1, 1);
-}
-EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch);
-
 /*
  * During early boot, any blocking grace-period wait automatically
  * implies a grace period.  Later on, this is never the case for PREEMPT.
@@ -3650,17 +3655,6 @@ static void __init rcu_dump_rcu_node_tree(void)
pr_cont("\n");
 }
 
-void kfree_rcu_batch_init(void)
-{
-   int cpu;
-
-   for_each_possible_cpu(cpu) {
-   struct kfree_rcu_cpu *krcp = per_cpu_ptr(, cpu);
-   spin_lock_init(>lock);
-   INIT_DELAYED_WORK(>monitor_work, kfree_rcu_monitor);
-   }
-}
-
 struct workqueue_struct *rcu_gp_wq;
 struct workqueue_struct *rcu_par_gp_wq;
 
@@ -3668,8 +3662,6 @@ void __init rcu_init(void)
 {
int cpu;
 
-   kfree_rcu_batch_init();
-
rcu_early_boot_tests();
 
rcu_bootup_announce();
@@ -3700,6 +3692,21 @@ void __init rcu_init(void)
srcu_init();
 }
 
+static int __init kfree_rcu_batch_init(void)
+{
+   int cpu;
+
+   for_each_possible_cpu(cpu) {
+   struct kfree_rcu_cpu *krcp = per_cpu_ptr(, cpu);
+   spin_lock_init(>lock);
+   INIT_DELAYED_WORK(>monitor_work, kfree_rcu_monitor);
+   }
+
+   WRITE_ONCE(kfree_rcu_batching_ready, 1);
+   return 0;
+}
+early_initcall(kfree_rcu_batch_init);
+
 #include "tree_stall.h"
 #include "tree_exp.h"
 #include "tree_plugin.h"


[GIT PULL] Kbuild fixes for v5.3-rc4

2019-08-09 Thread Masahiro YAMADA
Hi Linus,

Please pull more Kbuild fixes.
Thanks!


The following changes since commit e21a712a9685488f5ce80495b37b9fdbe96c230d:

  Linux 5.3-rc3 (2019-08-04 18:40:12 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git
tags/kbuild-fixes-v5.3-3

for you to fetch changes up to c07d8d47bca1b325102fa2be3a463075f7b051d9:

  kbuild: show hint if subdir-y/m is used to visit module Makefile
(2019-08-10 01:45:31 +0900)


Kbuild fixes for v5.3 (3rd)

 - revive single target %.ko

 - do not create built-in.a where it is unneeded

 - do not create modules.order where it is unneeded

 - show a warning if subdir-y/m is used to visit a module Makefile


Masahiro Yamada (4):
  kbuild: revive single target %.ko
  kbuild: fix false-positive need-builtin calculation
  kbuild: generate modules.order only in directories visited by obj-y/m
  kbuild: show hint if subdir-y/m is used to visit module Makefile

 Makefile | 13 -
 scripts/Makefile.build   | 11 ++-
 scripts/Makefile.modpost |  6 ++
 3 files changed, 24 insertions(+), 6 deletions(-)


[PATCH] x86/umwait: Fix error handling in umwait_init()

2019-08-09 Thread Fenghua Yu
Currently, failure of cpuhp_setup_state() is ignored and the syscore
ops and the control interfaces can still be added even after the
failure. But, this error handling will cause a few issues:

1. The CPUs may have different values in the IA32_UMWAIT_CONTROL
   MSR because there is no way to roll back the control MSR on
   the CPUs which already set the MSR before the failure.
2. If the sysfs interface is added successfully, there will be a mismatch
   between the global control value and the control MSR:
   - The interface shows the default global control value. But,
 the control MSR is not set to the value because the CPU online
 function, which is supposed to set the MSR to the value,
 is not installed.
   - If the sysadmin changes the global control value through
 the interface, the control MSR on all current online CPUs is
 set to the new value. But, the control MSR on newly onlined CPUs
 after the value change will not be set to the new value due to
 lack of the CPU online function.
3. On resume from suspend/hibernation, the boot CPU restores the control
   MSR to the global control value through the syscore ops. But, the
   control MSR on all APs is not set due to lake of the CPU online
   function.

To solve the issues and enforce consistent behavior on the failure
of the CPU hotplug setup, make the following changes:

1. Cache the original control MSR value which is configured by
   hardware or BIOS before kernel boot. This value is likely to
   be 0. But it could be a different number as well. Cache the
   control MSR only once before the MSR is changed.
2. Add the CPU offline function so that the MSR is restored to the
   original control value on all CPUs on the failure.
3. On the failure, exit from cpumait_init() so that the syscore ops
   and the control interfaces are not added.

Reported-by: Valdis Kletnieks 
Suggested-by: Thomas Gleixner 
Signed-off-by: Fenghua Yu 
---
 arch/x86/kernel/cpu/umwait.c | 39 +++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c
index 6a204e7336c1..95581f4cf6d8 100644
--- a/arch/x86/kernel/cpu/umwait.c
+++ b/arch/x86/kernel/cpu/umwait.c
@@ -17,6 +17,12 @@
  */
 static u32 umwait_control_cached = UMWAIT_CTRL_VAL(10, UMWAIT_C02_ENABLE);
 
+/*
+ * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
+ * hardware or BIOS before kernel boot.
+ */
+static u32 orig_umwait_control_cached __read_mostly;
+
 /*
  * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in
  * the sysfs write functions.
@@ -52,6 +58,23 @@ static int umwait_cpu_online(unsigned int cpu)
return 0;
 }
 
+/*
+ * The CPU hotplug callback sets the control MSR to the original control
+ * value.
+ */
+static int umwait_cpu_offline(unsigned int cpu)
+{
+   /*
+* This code is protected by the CPU hotplug already and
+* orig_umwait_control_cached is never changed after it caches
+* the original control MSR value in umwait_init(). So there
+* is no race condition here.
+*/
+   wrmsr(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached, 0);
+
+   return 0;
+}
+
 /*
  * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which
  * is the only active CPU at this time. The MSR is set up on the APs via the
@@ -185,8 +208,22 @@ static int __init umwait_init(void)
if (!boot_cpu_has(X86_FEATURE_WAITPKG))
return -ENODEV;
 
+   /*
+* Cache the original control MSR value before the control MSR is
+* changed. This is the only place where orig_umwait_control_cached
+* is modified.
+*/
+   rdmsrl(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached);
+
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online",
-   umwait_cpu_online, NULL);
+   umwait_cpu_online, umwait_cpu_offline);
+   if (ret < 0) {
+   /*
+* On failure, the control MSR on all CPUs has the
+* original control value.
+*/
+   return ret;
+   }
 
register_syscore_ops(_syscore_ops);
 
-- 
2.19.1



Re: [PATCH] regulator: core: Add devres versions of regulator_enable/disable

2019-08-09 Thread Chuhong Yuan
On Fri, Aug 9, 2019 at 11:11 PM Mark Brown  wrote:
>
> On Fri, Aug 09, 2019 at 11:03:52AM +0800, Chuhong Yuan wrote:
> > I wrote a coccinelle script to detect possible chances
> > of utilizing devm_() APIs to simplify the driver.
> > The script found 147 drivers in total and 22 of them
> > have be patched.
>
> > Within the 125 left ones, at least 31 of them (24.8%)
> > are hindered from benefiting from devm_() APIs because
> > of lack of a devres version of regulator_enable().
>
> I'm not super keen on managed versions of these functions since they're
> very likely to cause reference counting issues between the probe/remove
> path and the suspend/resume path which aren't obvious from the code, I'm
> especially worried about double frees on release.

I find that 29 of 31 cases I found call regulator_disable() only when encounter
probe failure or in .remove.
So I think the devm versions of regulator_enable/disable() will not cause big
problems.

I even found a driver to forget to disable regulator when encounter
probe failure,
which is drivers/iio/adc/ti-adc128s052.c.
And a devm version of regulator_enable() can prevent such mistakes.


[PATCH] RISC-V: Issue a local tlb flush if possible.

2019-08-09 Thread Atish Patra
In RISC-V, tlb flush happens via SBI which is expensive.
If the target cpumask contains a local hartid, some cost
can be saved by issuing a local tlb flush as we do that
in OpenSBI anyways.

Signed-off-by: Atish Patra 
---
 arch/riscv/include/asm/tlbflush.h | 33 +++
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/arch/riscv/include/asm/tlbflush.h 
b/arch/riscv/include/asm/tlbflush.h
index 687dd19735a7..b32ba4fa5888 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -8,6 +8,7 @@
 #define _ASM_RISCV_TLBFLUSH_H
 
 #include 
+#include 
 #include 
 
 /*
@@ -46,14 +47,38 @@ static inline void remote_sfence_vma(struct cpumask *cmask, 
unsigned long start,
 unsigned long size)
 {
struct cpumask hmask;
+   struct cpumask tmask;
+   int cpuid = smp_processor_id();
 
cpumask_clear();
-   riscv_cpuid_to_hartid_mask(cmask, );
-   sbi_remote_sfence_vma(hmask.bits, start, size);
+   cpumask_clear();
+
+   if (cmask)
+   cpumask_copy(, cmask);
+   else
+   cpumask_copy(, cpu_online_mask);
+
+   if (cpumask_test_cpu(cpuid, )) {
+   /* Save trap cost by issuing a local tlb flush here */
+   if ((start == 0 && size == -1) || (size > PAGE_SIZE))
+   local_flush_tlb_all();
+   else if (size == PAGE_SIZE)
+   local_flush_tlb_page(start);
+   cpumask_clear_cpu(cpuid, );
+   } else if (cpumask_empty()) {
+   /* cpumask is empty. So just do a local flush */
+   local_flush_tlb_all();
+   return;
+   }
+
+   if (!cpumask_empty()) {
+   riscv_cpuid_to_hartid_mask(, );
+   sbi_remote_sfence_vma(hmask.bits, start, size);
+   }
 }
 
-#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
-#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
+#define flush_tlb_all() remote_sfence_vma(NULL, 0, -1)
+#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, (addr) + 
PAGE_SIZE)
 #define flush_tlb_range(vma, start, end) \
remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start))
 #define flush_tlb_mm(mm) \
-- 
2.21.0



Re: [PATCH] powerpc/mm: Use refcount_t for refcount

2019-08-09 Thread Chuhong Yuan
On Fri, Aug 9, 2019 at 8:36 PM Michael Ellerman  wrote:
>
> Chuhong Yuan  writes:
> > Reference counters are preferred to use refcount_t instead of
> > atomic_t.
> > This is because the implementation of refcount_t can prevent
> > overflows and detect possible use-after-free.
> > So convert atomic_t ref counters to refcount_t.
> >
> > Signed-off-by: Chuhong Yuan 
>
> Thanks.
>
> We don't have a fast implementation of refcount_t, so I'm worried this
> could cause a measurable performance regression.
>
> Did you benchmark it at all?
>

I did not benchmark it and I don't have the testing environment...

> cheers
>
> > diff --git a/arch/powerpc/mm/book3s64/mmu_context.c 
> > b/arch/powerpc/mm/book3s64/mmu_context.c
> > index 2d0cb5ba9a47..f836fd5a6abc 100644
> > --- a/arch/powerpc/mm/book3s64/mmu_context.c
> > +++ b/arch/powerpc/mm/book3s64/mmu_context.c
> > @@ -231,7 +231,7 @@ static void pmd_frag_destroy(void *pmd_frag)
> >   /* drop all the pending references */
> >   count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
> >   /* We allow PTE_FRAG_NR fragments from a PTE page */
> > - if (atomic_sub_and_test(PMD_FRAG_NR - count, 
> > >pt_frag_refcount)) {
> > + if (refcount_sub_and_test(PMD_FRAG_NR - count, 
> > >pt_frag_refcount)) {
> >   pgtable_pmd_page_dtor(page);
> >   __free_page(page);
> >   }
> > diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
> > b/arch/powerpc/mm/book3s64/pgtable.c
> > index 7d0e0d0d22c4..40056896ce4e 100644
> > --- a/arch/powerpc/mm/book3s64/pgtable.c
> > +++ b/arch/powerpc/mm/book3s64/pgtable.c
> > @@ -277,7 +277,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
> >   return NULL;
> >   }
> >
> > - atomic_set(>pt_frag_refcount, 1);
> > + refcount_set(>pt_frag_refcount, 1);
> >
> >   ret = page_address(page);
> >   /*
> > @@ -294,7 +294,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
> >* count.
> >*/
> >   if (likely(!mm->context.pmd_frag)) {
> > - atomic_set(>pt_frag_refcount, PMD_FRAG_NR);
> > + refcount_set(>pt_frag_refcount, PMD_FRAG_NR);
> >   mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
> >   }
> >   spin_unlock(>page_table_lock);
> > @@ -317,8 +317,7 @@ void pmd_fragment_free(unsigned long *pmd)
> >  {
> >   struct page *page = virt_to_page(pmd);
> >
> > - BUG_ON(atomic_read(>pt_frag_refcount) <= 0);
> > - if (atomic_dec_and_test(>pt_frag_refcount)) {
> > + if (refcount_dec_and_test(>pt_frag_refcount)) {
> >   pgtable_pmd_page_dtor(page);
> >   __free_page(page);
> >   }
> > diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
> > index a7b05214760c..4ef8231b677f 100644
> > --- a/arch/powerpc/mm/pgtable-frag.c
> > +++ b/arch/powerpc/mm/pgtable-frag.c
> > @@ -24,7 +24,7 @@ void pte_frag_destroy(void *pte_frag)
> >   /* drop all the pending references */
> >   count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
> >   /* We allow PTE_FRAG_NR fragments from a PTE page */
> > - if (atomic_sub_and_test(PTE_FRAG_NR - count, 
> > >pt_frag_refcount)) {
> > + if (refcount_sub_and_test(PTE_FRAG_NR - count, 
> > >pt_frag_refcount)) {
> >   pgtable_page_dtor(page);
> >   __free_page(page);
> >   }
> > @@ -71,7 +71,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, 
> > int kernel)
> >   return NULL;
> >   }
> >
> > - atomic_set(>pt_frag_refcount, 1);
> > + refcount_set(>pt_frag_refcount, 1);
> >
> >   ret = page_address(page);
> >   /*
> > @@ -87,7 +87,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, 
> > int kernel)
> >* count.
> >*/
> >   if (likely(!pte_frag_get(>context))) {
> > - atomic_set(>pt_frag_refcount, PTE_FRAG_NR);
> > + refcount_set(>pt_frag_refcount, PTE_FRAG_NR);
> >   pte_frag_set(>context, ret + PTE_FRAG_SIZE);
> >   }
> >   spin_unlock(>page_table_lock);
> > @@ -110,8 +110,7 @@ void pte_fragment_free(unsigned long *table, int kernel)
> >  {
> >   struct page *page = virt_to_page(table);
> >
> > - BUG_ON(atomic_read(>pt_frag_refcount) <= 0);
> > - if (atomic_dec_and_test(>pt_frag_refcount)) {
> > + if (refcount_dec_and_test(>pt_frag_refcount)) {
> >   if (!kernel)
> >   pgtable_page_dtor(page);
> >   __free_page(page);
> > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> > index 3a37a89eb7a7..7fe23a3faf95 100644
> > --- a/include/linux/mm_types.h
> > +++ b/include/linux/mm_types.h
> > @@ -14,6 +14,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >
> >  #include 
> >
> > @@ -147,7 +148,7 @@ struct page {
> >   unsigned long _pt_pad_2;/* mapping */
> >   union {
> > 

[PATCH] syscalls: Update the syscall #defines to match uapi

2019-08-09 Thread Alistair Francis
Update the #defines around sys_fstat64() and sys_fstatat64() to match
the #defines around the __NR3264_fstatat and __NR3264_fstat definitions
in include/uapi/asm-generic/unistd.h. This avoids compiler failures if
one is defined.

Signed-off-by: Alistair Francis 
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2bcef4c70183..e4bf5e480d60 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -512,7 +512,7 @@ asmlinkage long sys_readlinkat(int dfd, const char __user 
*path, char __user *bu
 asmlinkage long sys_newfstatat(int dfd, const char __user *filename,
   struct stat __user *statbuf, int flag);
 asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf);
-#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64)
+#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
 asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf);
 asmlinkage long sys_fstatat64(int dfd, const char __user *filename,
   struct stat64 __user *statbuf, int flag);
-- 
2.22.0



Re: [PATCH v4 1/2] fork: extend clone3() to support CLONE_SET_TID

2019-08-09 Thread Christian Brauner
On Thu, Aug 08, 2019 at 11:22:21PM +0200, Adrian Reber wrote:
> The main motivation to add set_tid to clone3() is CRIU.
> 
> To restore a process with the same PID/TID CRIU currently uses
> /proc/sys/kernel/ns_last_pid. It writes the desired (PID - 1) to
> ns_last_pid and then (quickly) does a clone(). This works most of the
> time, but it is racy. It is also slow as it requires multiple syscalls.
> 
> Extending clone3() to support set_tid makes it possible restore a
> process using CRIU without accessing /proc/sys/kernel/ns_last_pid and
> race free (as long as the desired PID/TID is available).
> 
> This clone3() extension places the same restrictions (CAP_SYS_ADMIN)
> on clone3() with set_tid as they are currently in place for ns_last_pid.
> 
> Signed-off-by: Adrian Reber 
> ---
> v2:
>  - Removed (size < sizeof(struct clone_args)) as discussed with
>Christian and Dmitry
>  - Added comment to ((set_tid != 1) && idr_get_cursor() <= 1) (Oleg)
>  - Use idr_alloc() instead of idr_alloc_cyclic() (Oleg)
> 
> v3:
>  - Return EEXIST if PID is already in use (Christian)
>  - Drop CLONE_SET_TID (Christian and Oleg)
>  - Use idr_is_empty() instead of idr_get_cursor() (Oleg)
>  - Handle different `struct clone_args` sizes (Dmitry)
> 
> v4:
>  - Rework struct size check with defines (Christian)
>  - Reduce number of set_tid checks (Oleg)
>  - Less parentheses and more robust code (Oleg)
>  - Do ns_capable() on correct user_ns (Oleg, Christian)
> ---
>  include/linux/pid.h|  2 +-
>  include/linux/sched/task.h |  1 +
>  include/uapi/linux/sched.h |  1 +
>  kernel/fork.c  | 25 +++--
>  kernel/pid.c   | 34 +++---
>  5 files changed, 53 insertions(+), 10 deletions(-)
> 
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index 2a83e434db9d..052000db0ced 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -116,7 +116,7 @@ extern struct pid *find_vpid(int nr);
>  extern struct pid *find_get_pid(int nr);
>  extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
>  
> -extern struct pid *alloc_pid(struct pid_namespace *ns);
> +extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t set_tid);
>  extern void free_pid(struct pid *pid);
>  extern void disable_pid_allocation(struct pid_namespace *ns);
>  
> diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
> index 0497091e40c1..4f2a80564332 100644
> --- a/include/linux/sched/task.h
> +++ b/include/linux/sched/task.h
> @@ -26,6 +26,7 @@ struct kernel_clone_args {
>   unsigned long stack;
>   unsigned long stack_size;
>   unsigned long tls;
> + pid_t set_tid;
>  };
>  
>  /*
> diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
> index b3105ac1381a..e1ce103a2c47 100644
> --- a/include/uapi/linux/sched.h
> +++ b/include/uapi/linux/sched.h
> @@ -45,6 +45,7 @@ struct clone_args {
>   __aligned_u64 stack;
>   __aligned_u64 stack_size;
>   __aligned_u64 tls;
> + __aligned_u64 set_tid;
>  };
>  
>  /*
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 2852d0e76ea3..2a03f0e201e9 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -117,6 +117,13 @@
>   */
>  #define MAX_THREADS FUTEX_TID_MASK
>  
> +/*
> + * Different sizes of struct clone_args
> + */
> +#define CLONE3_ARGS_SIZE_V0 64
> +/* V1 includes set_tid */
> +#define CLONE3_ARGS_SIZE_V1 72
> +
>  /*
>   * Protected counters by write_lock_irq(_lock)
>   */
> @@ -2031,7 +2038,13 @@ static __latent_entropy struct task_struct 
> *copy_process(
>   stackleak_task_init(p);
>  
>   if (pid != _struct_pid) {
> - pid = alloc_pid(p->nsproxy->pid_ns_for_children);
> + if (args->set_tid && !ns_capable(
> + p->nsproxy->pid_ns_for_children->user_ns,
> + CAP_SYS_ADMIN)) {
> + retval = -EPERM;
> + goto bad_fork_cleanup_thread;
> + }
> + pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid);
>   if (IS_ERR(pid)) {
>   retval = PTR_ERR(pid);
>   goto bad_fork_cleanup_thread;
> @@ -2535,9 +2548,14 @@ noinline static int copy_clone_args_from_user(struct 
> kernel_clone_args *kargs,
>   if (unlikely(size > PAGE_SIZE))
>   return -E2BIG;
>  
> - if (unlikely(size < sizeof(struct clone_args)))
> + /* The struct needs to be at least the size of the original struct. */

I don't think you need that comment. I think the macro is pretty
self-explanatory. If you want it to be even clearer you could even make
it CLONE3_ARGS_SIZE_MIN but V0 is good enough. :)

> + if (unlikely(size < CLONE3_ARGS_SIZE_V0))
>   return -EINVAL;
>  
> + if (size < sizeof(struct clone_args))
> + memset((void *) + size, 0,
> + sizeof(struct clone_args) - size);
> +
>   if 

[PATCH 2/2] leds: triggers: Don't remove trigger if LED_KEEP_TRIGGER flag is set

2019-08-09 Thread Guru Das Srinagesh
From: Fenglin Wu 

The LED_KEEP_TRIGGER flag prevents the trigger being removed while
turning off the LEDs. Extend the flag usage to prevent the trigger being
removed even while "none" trigger is set.

Signed-off-by: Fenglin Wu 
Signed-off-by: Guru Das Srinagesh 
---
 drivers/leds/led-triggers.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index 8d11a5e..a0e4531 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -40,7 +40,8 @@ ssize_t led_trigger_store(struct device *dev, struct 
device_attribute *attr,
goto unlock;
}
 
-   if (sysfs_streq(buf, "none")) {
+   if (sysfs_streq(buf, "none") &&
+   !(led_cdev->flags & LED_KEEP_TRIGGER)) {
led_trigger_remove(led_cdev);
goto unlock;
}
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 1/2] leds: Add flag to keep trigger always

2019-08-09 Thread Guru Das Srinagesh
From: Subbaraman Narayanamurthy 

Commit 0013b23d66a2768f5babbb0ea9f03ab067a990d8 ("leds: disable triggers
on brightness set") removes the trigger on an LED class device when
brightness is set to 0. However, there are some LED class devices which
need the trigger not to be removed. In a use case like camera flash,
camera flash driver passes in a trigger device to LED class driver. If
the trigger is removed when the brightness is set to 0, this will affect
the clients using those triggers. Hence add a flag to always keep the
trigger even when brightness is set to 0.

Signed-off-by: Subbaraman Narayanamurthy 
Signed-off-by: Guru Das Srinagesh 
---
 drivers/leds/led-class.c | 2 +-
 include/linux/leds.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index d231240..13c28d1 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -52,7 +52,7 @@ static ssize_t brightness_store(struct device *dev,
if (ret)
goto unlock;
 
-   if (state == LED_OFF)
+   if (state == LED_OFF && !(led_cdev->flags & LED_KEEP_TRIGGER))
led_trigger_remove(led_cdev);
led_set_brightness(led_cdev, state);
flush_work(_cdev->set_brightness_work);
diff --git a/include/linux/leds.h b/include/linux/leds.h
index d101fd1..e079a22 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -73,6 +73,7 @@ struct led_classdev {
 #define LED_BRIGHT_HW_CHANGED  BIT(21)
 #define LED_RETAIN_AT_SHUTDOWN BIT(22)
 #define LED_INIT_DEFAULT_TRIGGER BIT(23)
+#define LED_KEEP_TRIGGER   BIT(24)
 
/* set_brightness_work / blink_timer flags, atomic, private. */
unsigned long   work_flags;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



Re: [PATCH 0/3] Collapse vimc into single monolithic driver

2019-08-09 Thread Shuah Khan

On 8/9/19 6:24 PM, André Almeida wrote:

On 8/9/19 9:17 PM, Shuah Khan wrote:

Hi Andre,

On 8/9/19 5:52 PM, André Almeida wrote:

Hello Shuah,

Thanks for the patch, I did some comments below.

On 8/9/19 6:45 PM, Shuah Khan wrote:

vimc uses Component API to split the driver into functional components.
The real hardware resembles a monolith structure than component and
component structure added a level of complexity making it hard to
maintain without adding any real benefit.
  The sensor is one vimc component that would makes sense to be a
separate
module to closely align with the real hardware. It would be easier to
collapse vimc into single monolithic driver first and then split the
sensor off as a separate module.

This patch series emoves the component API and makes minimal changes to
the code base preserving the functional division of the code structure.
Preserving the functional structure allows us to split the sensor off
as a separate module in the future.

Major design elements in this change are:
  - Use existing struct vimc_ent_config and struct
vimc_pipeline_config
    to drive the initialization of the functional components.
  - Make vimc_ent_config global by moving it to vimc.h
  - Add two new hooks add and rm to initialize and register,
unregister
    and free subdevs.
  - All component API is now gone and bind and unbind hooks are
modified
    to do "add" and "rm" with minimal changes to just add and rm
subdevs.
  - vimc-core's bind and unbind are now register and unregister.
  - vimc-core invokes "add" hooks from its vimc_register_devices().
    The "add" hooks remain the same and register subdevs. They don't
    create platform devices of their own and use vimc's pdev.dev as
    their reference device. The "add" hooks save their
vimc_ent_device(s)
    in the corresponding vimc_ent_config.
  - vimc-core invokes "rm" hooks from its unregister to unregister
subdevs
    and cleanup.
  - vimc-core invokes "add" and "rm" hooks with pointer to struct
vimc_device
    and the corresponding struct vimc_ent_config pointer.
  The following configure and stream test works on all devices.
   media-ctl -d platform:vimc -V '"Sensor
A":0[fmt:SBGGR8_1X8/640x480]'
  media-ctl -d platform:vimc -V '"Debayer
A":0[fmt:SBGGR8_1X8/640x480]'
  media-ctl -d platform:vimc -V '"Sensor
B":0[fmt:SBGGR8_1X8/640x480]'
  media-ctl -d platform:vimc -V '"Debayer
B":0[fmt:SBGGR8_1X8/640x480]'
   v4l2-ctl -z platform:vimc -d "RGB/YUV Capture" -v
width=1920,height=1440
  v4l2-ctl -z platform:vimc -d "Raw Capture 0" -v pixelformat=BA81
  v4l2-ctl -z platform:vimc -d "Raw Capture 1" -v pixelformat=BA81
   v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video1
  v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video2
  v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video3

The third patch in the series fixes a general protection fault found
when rmmod is done while stream is active.


I applied your patch on top of media_tree/master and I did some testing.
Not sure if I did something wrong, but just adding and removing the
module generated a kernel panic:


Thanks for testing.

Odd. I tested modprobe and rmmod both.I was working on Linux 5.3-rc2.
I will apply these to media latest and work from there. I have to
rebase these on top of the reverts from Lucas and Helen


Ok, please let me know if I succeeded to reproduce.



~# modprobe vimc
~# rmmod vimc
[   16.452974] stack segment:  [#1] SMP PTI
[   16.453688] CPU: 0 PID: 2038 Comm: rmmod Not tainted 5.3.0-rc2+ #36
[   16.454678] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.12.0-20181126_142135-anatol 04/01/2014
[   16.456191] RIP: 0010:kfree+0x4d/0x240



[   16.469188] Call Trace:
[   16.469666]  vimc_remove+0x35/0x90 [vimc]
[   16.470436]  platform_drv_remove+0x1f/0x40
[   16.471233]  device_release_driver_internal+0xd3/0x1b0
[   16.472184]  driver_detach+0x37/0x6b
[   16.472882]  bus_remove_driver+0x50/0xc1
[   16.473569]  vimc_exit+0xc/0xca0 [vimc]
[   16.474231]  __x64_sys_delete_module+0x18d/0x240
[   16.475036]  do_syscall_64+0x43/0x110
[   16.475656]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   16.476504] RIP: 0033:0x7fceb8dafa4b



[   16.484853] Modules linked in: vimc(-) videobuf2_vmalloc
videobuf2_memops v4l2_tpg videobuf2_v4l2 videobuf2_common
[   16.486187] ---[ end trace 91e5e0894e254d49 ]---
[   16.486758] RIP: 0010:kfree+0x4d/0x240



fish: “rmmod vimc” terminated by signal SIGSEGV (Address boundary error)

I just added the module after booting, no other action was made. Here is
how my `git log --oneline` looks like:

897d708e922b media: vimc: Fix gpf in rmmod path when stream is active
2e4a5ad8ad6d media: vimc: Collapse component structure into a single
monolithic driver
7c8da1687e92 media: vimc: move private defines to a common header
97299a303532 media: Remove dev_err() usage after platform_get_irq()

Re: Resend [PATCH] kernel/resource.c: invalidate parent when freed resource has childs

2019-08-09 Thread Wei Yang
On Fri, Aug 09, 2019 at 03:45:59PM -0700, Linus Torvalds wrote:
>On Fri, Aug 9, 2019 at 3:38 PM Wei Yang  wrote:
>>
>> In theory, child may have siblings. Would it be possible to have several
>> devices under xhci-hcd?
>
>I'm less interested in the xhci-hcd case - which I certainly *hope* is
>fixed already? - than in "if this happens somewhere else".
>

Agree, this is what I want to say.

>So if we do want to remove the parent (which may be a good idea with a
>warning), and want to make sure that the children are really removed
>from the resource hierarchy, we should do somethiing like
>
>  static bool detach_children(struct resource *res)
>  {
>res = res->child;
>if (!res)
>return false;
>do {
>res->parent = NULL;
>res = res->sibling;
>} while (res);
>return true;
>  }
>
>and then we could write the __release_region() warning as
>
>/* You should not release a resource that has children */
>WARN_ON_ONCE(detach_children(res));
>

I am thinking about why this could happen.

To guard the core kernel code, it looks reasonable.

>or something?
>
>NOTE! The above is entirely untested, and written purely in my mail
>reader. It might be seriously buggy, including not compiling, or doing
>odd things. See it more as a "maybe something like this" code snippet
>example than any kind of final form.
>
>   Linus

-- 
Wei Yang
Help you, Help me


Re: [RFC PATCH v2 12/19] mm/gup: Prep put_user_pages() to take an vaddr_pin struct

2019-08-09 Thread John Hubbard
On 8/9/19 3:58 PM, ira.we...@intel.com wrote:
> From: Ira Weiny 
> 
> Once callers start to use vaddr_pin the put_user_pages calls will need
> to have access to this data coming in.  Prep put_user_pages() for this
> data.
> 
> Signed-off-by: Ira Weiny 
> ---
>  include/linux/mm.h |  20 +---
>  mm/gup.c   | 122 -
>  2 files changed, 88 insertions(+), 54 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index befe150d17be..9d37cafbef9a 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1064,25 +1064,7 @@ static inline void put_page(struct page *page)
>   __put_page(page);
>  }
>  
> -/**
> - * put_user_page() - release a gup-pinned page
> - * @page:pointer to page to be released
> - *
> - * Pages that were pinned via get_user_pages*() must be released via
> - * either put_user_page(), or one of the put_user_pages*() routines
> - * below. This is so that eventually, pages that are pinned via
> - * get_user_pages*() can be separately tracked and uniquely handled. In
> - * particular, interactions with RDMA and filesystems need special
> - * handling.
> - *
> - * put_user_page() and put_page() are not interchangeable, despite this early
> - * implementation that makes them look the same. put_user_page() calls must
> - * be perfectly matched up with get_user_page() calls.
> - */
> -static inline void put_user_page(struct page *page)
> -{
> - put_page(page);
> -}
> +void put_user_page(struct page *page);
>  
>  void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
>  bool make_dirty);
> diff --git a/mm/gup.c b/mm/gup.c
> index a7a9d2f5278c..10cfd30ff668 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -24,30 +24,41 @@
>  
>  #include "internal.h"
>  
> -/**
> - * put_user_pages_dirty_lock() - release and optionally dirty gup-pinned 
> pages
> - * @pages:  array of pages to be maybe marked dirty, and definitely released.

A couple comments from our circular review chain: some fellow with the same
last name as you, recommended wording it like this:

  @pages:  array of pages to be put

> - * @npages: number of pages in the @pages array.
> - * @make_dirty: whether to mark the pages dirty
> - *
> - * "gup-pinned page" refers to a page that has had one of the 
> get_user_pages()
> - * variants called on that page.
> - *
> - * For each page in the @pages array, make that page (or its head page, if a
> - * compound page) dirty, if @make_dirty is true, and if the page was 
> previously
> - * listed as clean. In any case, releases all pages using put_user_page(),
> - * possibly via put_user_pages(), for the non-dirty case.
> - *
> - * Please see the put_user_page() documentation for details.
> - *
> - * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
> - * required, then the caller should a) verify that this is really correct,
> - * because _lock() is usually required, and b) hand code it:
> - * set_page_dirty_lock(), put_user_page().
> - *
> - */
> -void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
> -bool make_dirty)
> +static void __put_user_page(struct vaddr_pin *vaddr_pin, struct page *page)
> +{
> + page = compound_head(page);
> +
> + /*
> +  * For devmap managed pages we need to catch refcount transition from
> +  * GUP_PIN_COUNTING_BIAS to 1, when refcount reach one it means the
> +  * page is free and we need to inform the device driver through
> +  * callback. See include/linux/memremap.h and HMM for details.
> +  */
> + if (put_devmap_managed_page(page))
> + return;
> +
> + if (put_page_testzero(page))
> + __put_page(page);
> +}
> +
> +static void __put_user_pages(struct vaddr_pin *vaddr_pin, struct page 
> **pages,
> +  unsigned long npages)
> +{
> + unsigned long index;
> +
> + /*
> +  * TODO: this can be optimized for huge pages: if a series of pages is
> +  * physically contiguous and part of the same compound page, then a
> +  * single operation to the head page should suffice.
> +  */

As discussed in the other review thread (""), let's just delete that comment,
as long as you're moving things around.


> + for (index = 0; index < npages; index++)
> + __put_user_page(vaddr_pin, pages[index]);
> +}
> +
> +static void __put_user_pages_dirty_lock(struct vaddr_pin *vaddr_pin,
> + struct page **pages,
> + unsigned long npages,
> + bool make_dirty)

Elsewhere in this series, we pass vaddr_pin at the end of the arg list.
Here we pass it at the beginning, and it caused a minor jar when reading it.
Obviously just bike shedding at this point, though. Either way. :)

>  {
>   unsigned long index;
>  
> @@ -58,7 +69,7 @@ void 

Re: [PATCH 0/3] Collapse vimc into single monolithic driver

2019-08-09 Thread André Almeida
On 8/9/19 9:17 PM, Shuah Khan wrote:
> Hi Andre,
> 
> On 8/9/19 5:52 PM, André Almeida wrote:
>> Hello Shuah,
>>
>> Thanks for the patch, I did some comments below.
>>
>> On 8/9/19 6:45 PM, Shuah Khan wrote:
>>> vimc uses Component API to split the driver into functional components.
>>> The real hardware resembles a monolith structure than component and
>>> component structure added a level of complexity making it hard to
>>> maintain without adding any real benefit.
>>>  The sensor is one vimc component that would makes sense to be a
>>> separate
>>> module to closely align with the real hardware. It would be easier to
>>> collapse vimc into single monolithic driver first and then split the
>>> sensor off as a separate module.
>>>
>>> This patch series emoves the component API and makes minimal changes to
>>> the code base preserving the functional division of the code structure.
>>> Preserving the functional structure allows us to split the sensor off
>>> as a separate module in the future.
>>>
>>> Major design elements in this change are:
>>>  - Use existing struct vimc_ent_config and struct
>>> vimc_pipeline_config
>>>    to drive the initialization of the functional components.
>>>  - Make vimc_ent_config global by moving it to vimc.h
>>>  - Add two new hooks add and rm to initialize and register,
>>> unregister
>>>    and free subdevs.
>>>  - All component API is now gone and bind and unbind hooks are
>>> modified
>>>    to do "add" and "rm" with minimal changes to just add and rm
>>> subdevs.
>>>  - vimc-core's bind and unbind are now register and unregister.
>>>  - vimc-core invokes "add" hooks from its vimc_register_devices().
>>>    The "add" hooks remain the same and register subdevs. They don't
>>>    create platform devices of their own and use vimc's pdev.dev as
>>>    their reference device. The "add" hooks save their
>>> vimc_ent_device(s)
>>>    in the corresponding vimc_ent_config.
>>>  - vimc-core invokes "rm" hooks from its unregister to unregister
>>> subdevs
>>>    and cleanup.
>>>  - vimc-core invokes "add" and "rm" hooks with pointer to struct
>>> vimc_device
>>>    and the corresponding struct vimc_ent_config pointer.
>>>  The following configure and stream test works on all devices.
>>>   media-ctl -d platform:vimc -V '"Sensor
>>> A":0[fmt:SBGGR8_1X8/640x480]'
>>>  media-ctl -d platform:vimc -V '"Debayer
>>> A":0[fmt:SBGGR8_1X8/640x480]'
>>>  media-ctl -d platform:vimc -V '"Sensor
>>> B":0[fmt:SBGGR8_1X8/640x480]'
>>>  media-ctl -d platform:vimc -V '"Debayer
>>> B":0[fmt:SBGGR8_1X8/640x480]'
>>>   v4l2-ctl -z platform:vimc -d "RGB/YUV Capture" -v
>>> width=1920,height=1440
>>>  v4l2-ctl -z platform:vimc -d "Raw Capture 0" -v pixelformat=BA81
>>>  v4l2-ctl -z platform:vimc -d "Raw Capture 1" -v pixelformat=BA81
>>>   v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video1
>>>  v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video2
>>>  v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video3
>>>
>>> The third patch in the series fixes a general protection fault found
>>> when rmmod is done while stream is active.
>>
>> I applied your patch on top of media_tree/master and I did some testing.
>> Not sure if I did something wrong, but just adding and removing the
>> module generated a kernel panic:
> 
> Thanks for testing.
> 
> Odd. I tested modprobe and rmmod both.I was working on Linux 5.3-rc2.
> I will apply these to media latest and work from there. I have to
> rebase these on top of the reverts from Lucas and Helen

Ok, please let me know if I succeeded to reproduce.

>>
>> ~# modprobe vimc
>> ~# rmmod vimc
>> [   16.452974] stack segment:  [#1] SMP PTI
>> [   16.453688] CPU: 0 PID: 2038 Comm: rmmod Not tainted 5.3.0-rc2+ #36
>> [   16.454678] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
>> BIOS 1.12.0-20181126_142135-anatol 04/01/2014
>> [   16.456191] RIP: 0010:kfree+0x4d/0x240
>>
>> 
>>
>> [   16.469188] Call Trace:
>> [   16.469666]  vimc_remove+0x35/0x90 [vimc]
>> [   16.470436]  platform_drv_remove+0x1f/0x40
>> [   16.471233]  device_release_driver_internal+0xd3/0x1b0
>> [   16.472184]  driver_detach+0x37/0x6b
>> [   16.472882]  bus_remove_driver+0x50/0xc1
>> [   16.473569]  vimc_exit+0xc/0xca0 [vimc]
>> [   16.474231]  __x64_sys_delete_module+0x18d/0x240
>> [   16.475036]  do_syscall_64+0x43/0x110
>> [   16.475656]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>> [   16.476504] RIP: 0033:0x7fceb8dafa4b
>>
>> 
>>
>> [   16.484853] Modules linked in: vimc(-) videobuf2_vmalloc
>> videobuf2_memops v4l2_tpg videobuf2_v4l2 videobuf2_common
>> [   16.486187] ---[ end trace 91e5e0894e254d49 ]---
>> [   16.486758] RIP: 0010:kfree+0x4d/0x240
>>
>> 
>>
>> fish: “rmmod vimc” terminated by signal SIGSEGV (Address boundary error)
>>
>> I just added the module after booting, no other action was made. Here is
>> how my `git log --oneline` looks 

[PATCH 00/11] Face lift for bu21013_ts driver

2019-08-09 Thread Dmitry Torokhov
Hi Linus,

So your patch has prompted me to take a look at the driver and
try to clean it up. I am sure I screwed up somewhere, but you said
you have the device, so please take a look at the series and
see if you can salvage them

Thanks!

Dmitry Torokhov (10):
  ARM: ux500: improve BU21013 touchpad bindings
  Input: bu21013_ts - rename some variables
  Input: bu21013_ts - annotate supend/resume methods as __maybe_unused
  Input: bu21013_ts - remove useless comments
  Input: bu21013_ts - convert to using managed resources
  Input: bu21013_ts - remove support for platform data
  Input: bu21013_ts - use interrupt from I2C client
  Input: bu21013_ts - fix suspend when wake source
  Input: bu21013_ts - switch to using MT-B (slotted) protocol
  Input: bu21013_ts - switch to using standard touchscreen properties

Linus Walleij (1):
  Input: bu21013_ts - convert to use GPIO descriptors

 .../bindings/input/touchscreen/bu21013.txt|  27 +-
 arch/arm/boot/dts/ste-hrefprev60-stuib.dts|  14 +-
 arch/arm/boot/dts/ste-hrefv60plus-stuib.dts   |  14 +-
 drivers/input/touchscreen/bu21013_ts.c| 740 --
 include/linux/input/bu21013.h |  34 -
 5 files changed, 362 insertions(+), 467 deletions(-)
 delete mode 100644 include/linux/input/bu21013.h

-- 
Dmitry


[PATCH 11/11] Input: bu21013_ts - switch to using standard touchscreen properties

2019-08-09 Thread Dmitry Torokhov
This switches the driver over to the standard touchscreen properties for
coordinate transformation, while keeping old bindings working as well.

Signed-off-by: Dmitry Torokhov 
---
 .../bindings/input/touchscreen/bu21013.txt| 16 --
 drivers/input/touchscreen/bu21013_ts.c| 54 +++
 2 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt 
b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
index 7ddb5de8343d..da4c9d8b99b1 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
@@ -10,6 +10,16 @@ Required properties:
 Optional properties:
  - touch-gpios : GPIO pin registering a touch event
  - -supply: Phandle to a regulator supply
+ - touchscreen-size-x  : General touchscreen binding, see [1].
+ - touchscreen-size-y  : General touchscreen binding, see [1].
+ - touchscreen-inverted-x  : General touchscreen binding, see [1].
+ - touchscreen-inverted-y  : General touchscreen binding, see [1].
+ - touchscreen-swapped-x-y : General touchscreen binding, see [1].
+
+[1] All general touchscreen properties are described in
+Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt.
+
+Deprecated properties:
  - rohm,touch-max-x: Maximum outward permitted limit in the X axis
  - rohm,touch-max-y: Maximum outward permitted limit in the Y axis
  - rohm,flip-x : Flip touch coordinates on the X axis
@@ -26,8 +36,8 @@ Example:
touch-gpio = < 20 GPIO_ACTIVE_LOW>;
avdd-supply = <_ldo_aux1_reg>;
 
-   rohm,touch-max-x = <384>;
-   rohm,touch-max-y = <704>;
-   rohm,flip-y;
+   touchscreen-size-x = <384>;
+   touchscreen-size-y = <704>;
+   touchscreen-inverted-y;
};
};
diff --git a/drivers/input/touchscreen/bu21013_ts.c 
b/drivers/input/touchscreen/bu21013_ts.c
index 2c534aa61687..c89a00a6e67c 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -139,6 +140,7 @@
  * struct bu21013_ts - touch panel data structure
  * @client: pointer to the i2c client
  * @in_dev: pointer to the input device structure
+ * @props: the device coordinate transformation properties
  * @regulator: pointer to the Regulator used for touch screen
  * @cs_gpiod: chip select GPIO line
  * @int_gpiod: touch interrupt GPIO line
@@ -155,6 +157,7 @@
 struct bu21013_ts {
struct i2c_client *client;
struct input_dev *in_dev;
+   struct touchscreen_properties props;
struct regulator *regulator;
struct gpio_desc *cs_gpiod;
struct gpio_desc *int_gpiod;
@@ -201,19 +204,13 @@ static int bu21013_do_touch_report(struct bu21013_ts *ts)
 
for (i = 0; i < MAX_FINGERS; i++) {
const u8 *data = [4 * i + 3];
-   struct input_mt_pos *p = [finger_down_count];
+   unsigned int x, y;
 
-   p->x = data[0] << SHIFT_2 | (data[1] & MASK_BITS);
-   p->y = data[2] << SHIFT_2 | (data[3] & MASK_BITS);
-   if (p->x == 0 || p->y == 0)
-   continue;
-
-   finger_down_count++;
-
-   if (ts->x_flip)
-   p->x = ts->touch_x_max - p->x;
-   if (ts->y_flip)
-   p->y = ts->touch_y_max - p->y;
+   x = data[0] << SHIFT_2 | (data[1] & MASK_BITS);
+   y = data[2] << SHIFT_2 | (data[3] & MASK_BITS);
+   if (x != 0 && y != 0)
+   touchscreen_set_mt_pos([finger_down_count++],
+  >props, x, y);
}
 
if (finger_down_count == 2 &&
@@ -412,6 +409,8 @@ static int bu21013_probe(struct i2c_client *client,
 {
struct bu21013_ts *ts;
struct input_dev *in_dev;
+   struct input_absinfo *info;
+   u32 max_x = 0, max_y = 0;
int error;
 
if (!i2c_check_functionality(client->adapter,
@@ -434,11 +433,6 @@ static int bu21013_probe(struct i2c_client *client,
ts->x_flip = device_property_read_bool(>dev, "rohm,flip-x");
ts->y_flip = device_property_read_bool(>dev, "rohm,flip-y");
 
-   device_property_read_u32(>dev, "rohm,touch-max-x",
->touch_x_max);
-   device_property_read_u32(>dev, "rohm,touch-max-y",
->touch_y_max);
-
in_dev = devm_input_allocate_device(>dev);
if (!in_dev) {
dev_err(>dev, "device memory alloc failed\n");
@@ -451,10 +445,28 @@ static int bu21013_probe(struct i2c_client *client,
in_dev->name = DRIVER_TP;
   

[PATCH 07/11] Input: bu21013_ts - remove support for platform data

2019-08-09 Thread Dmitry Torokhov
There are no current users of the platform data in the tree, and
any new users should either use device tree, or static device
properties to describe the device.

This change drop the platform data definition and handling and moves the
driver over to generic device properties API. We also drop support for the
external clock. If it is needed we will have to extend the bindings to
supply the clock reference and handle it properly in the driver.

Also, wakeup setting should be coming from I2C client.

Signed-off-by: Dmitry Torokhov 
---
 drivers/input/touchscreen/bu21013_ts.c | 109 +
 include/linux/input/bu21013.h  |  30 ---
 2 files changed, 37 insertions(+), 102 deletions(-)
 delete mode 100644 include/linux/input/bu21013.h

diff --git a/drivers/input/touchscreen/bu21013_ts.c 
b/drivers/input/touchscreen/bu21013_ts.c
index 4b6f9544e94a..79de7327a460 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -4,18 +4,18 @@
  * Author: Naveen Kumar G  for ST-Ericsson
  */
 
-#include 
+#include 
 #include 
-#include 
+#include 
 #include 
-#include 
 #include 
-#include 
-#include 
-#include 
+#include 
+#include 
 #include 
-#include 
-#include 
+#include 
+#include 
+#include 
+#include 
 
 #define MAX_FINGERS2
 #define RESET_DELAY30
@@ -137,23 +137,32 @@
 /**
  * struct bu21013_ts - touch panel data structure
  * @client: pointer to the i2c client
- * @touch_stopped: touch stop flag
- * @chip: pointer to the touch panel controller
  * @in_dev: pointer to the input device structure
  * @regulator: pointer to the Regulator used for touch screen
  * @cs_gpiod: chip select GPIO line
  * @int_gpiod: touch interrupt GPIO line
+ * @irq: interrupt number the device is using
+ * @touch_x_max: maximum X coordinate reported by the device
+ * @touch_y_max: maximum Y coordinate reported by the device
+ * @x_flip: indicates that the driver should invert X coordinate before
+ * reporting
+ * @y_flip: indicates that the driver should invert Y coordinate before
+ * reporting
+ * @touch_stopped: touch stop flag
  *
  * Touch panel device data structure
  */
 struct bu21013_ts {
struct i2c_client *client;
-   const struct bu21013_platform_device *chip;
struct input_dev *in_dev;
struct regulator *regulator;
struct gpio_desc *cs_gpiod;
struct gpio_desc *int_gpiod;
unsigned int irq;
+   u32 touch_x_max;
+   u32 touch_y_max;
+   bool x_flip;
+   bool y_flip;
bool touch_stopped;
 };
 
@@ -208,10 +217,10 @@ static int bu21013_do_touch_report(struct bu21013_ts *ts)
}
 
for (i = 0; i < finger_down_count; i++) {
-   if (ts->chip->x_flip)
-   pos_x[i] = ts->chip->touch_x_max - pos_x[i];
-   if (ts->chip->y_flip)
-   pos_y[i] = ts->chip->touch_y_max - pos_y[i];
+   if (ts->x_flip)
+   pos_x[i] = ts->touch_x_max - pos_x[i];
+   if (ts->y_flip)
+   pos_y[i] = ts->touch_y_max - pos_y[i];
 
input_report_abs(ts->in_dev,
 ABS_MT_POSITION_X, pos_x[i]);
@@ -304,14 +313,9 @@ static int bu21013_init_chip(struct bu21013_ts *ts)
return error;
}
 
-   if (ts->chip->ext_clk)
-   error = i2c_smbus_write_byte_data(client, BU21013_CLK_MODE_REG,
- BU21013_CLK_MODE_EXT |
-   BU21013_CLK_MODE_CALIB);
-   else
-   error = i2c_smbus_write_byte_data(client, BU21013_CLK_MODE_REG,
- BU21013_CLK_MODE_DIV |
-   BU21013_CLK_MODE_CALIB);
+   error = i2c_smbus_write_byte_data(client, BU21013_CLK_MODE_REG,
+ BU21013_CLK_MODE_DIV |
+   BU21013_CLK_MODE_CALIB);
if (error) {
dev_err(>dev, "BU21013_CLK_MODE reg write failed\n");
return error;
@@ -388,43 +392,6 @@ static int bu21013_init_chip(struct bu21013_ts *ts)
return 0;
 }
 
-#ifdef CONFIG_OF
-static const struct bu21013_platform_device *
-bu21013_parse_dt(struct device *dev)
-{
-   struct device_node *np = dev->of_node;
-   struct bu21013_platform_device *pdata;
-
-   if (!np) {
-   dev_err(dev, "no device tree or platform data\n");
-   return ERR_PTR(-EINVAL);
-   }
-
-   pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
-   if (!pdata)
-   return ERR_PTR(-ENOMEM);
-
-   pdata->y_flip = pdata->x_flip = false;
-
-   pdata->x_flip = of_property_read_bool(np, "rohm,flip-x");
-   pdata->y_flip = 

[PATCH 02/11] Input: bu21013_ts - convert to use GPIO descriptors

2019-08-09 Thread Dmitry Torokhov
From: Linus Walleij 

This driver can use GPIO descriptors rather than GPIO numbers
without any problems, convert it. Name the field variables after
the actual pins on the chip rather than the "reset" and "touch"
names from the devicetree bindings that are vaguely inaccurate.

No in-tree users pass GPIO numbers in platform data so drop
this. Descriptor tables can be used to get these GPIOs from a board
file if need be.

Signed-off-by: Linus Walleij 
Signed-off-by: Dmitry Torokhov 
---
 .../bindings/input/touchscreen/bu21013.txt|  5 +-
 drivers/input/touchscreen/bu21013_ts.c| 86 ---
 include/linux/input/bu21013.h |  4 -
 3 files changed, 41 insertions(+), 54 deletions(-)

diff --git a/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt 
b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
index 56d835242af2..43899fc36ecf 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
@@ -2,10 +2,11 @@
 
 Required properties:
  - compatible  : "rohm,bu21013_tp"
- - reg :  I2C device address
+ - reg : I2C device address
+ - reset-gpios : GPIO pin enabling (selecting) chip (CS)
 
 Optional properties:
- - touch-gpio  : GPIO pin registering a touch event
+ - touch-gpios : GPIO pin registering a touch event
  - -supply: Phandle to a regulator supply
  - rohm,touch-max-x: Maximum outward permitted limit in the X axis
  - rohm,touch-max-y: Maximum outward permitted limit in the Y axis
diff --git a/drivers/input/touchscreen/bu21013_ts.c 
b/drivers/input/touchscreen/bu21013_ts.c
index 1d703e230ac3..c20f86f98ffc 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -14,11 +14,9 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
-#include 
 
-#define PEN_DOWN_INTR  0
 #define MAX_FINGERS2
 #define RESET_DELAY30
 #define PENUP_TIMEOUT  (10)
@@ -143,8 +141,9 @@
  * @touch_stopped: touch stop flag
  * @chip: pointer to the touch panel controller
  * @in_dev: pointer to the input device structure
- * @intr_pin: interrupt pin value
  * @regulator: pointer to the Regulator used for touch screen
+ * @cs_gpiod: chip select GPIO line
+ * @int_gpiod: touch interrupt GPIO line
  *
  * Touch panel device data structure
  */
@@ -154,8 +153,9 @@ struct bu21013_ts_data {
const struct bu21013_platform_device *chip;
struct input_dev *in_dev;
struct regulator *regulator;
+   struct gpio_desc *cs_gpiod;
+   struct gpio_desc *int_gpiod;
unsigned int irq;
-   unsigned int intr_pin;
bool touch_stopped;
 };
 
@@ -257,20 +257,21 @@ static irqreturn_t bu21013_gpio_irq(int irq, void 
*device_data)
 {
struct bu21013_ts_data *data = device_data;
struct i2c_client *i2c = data->client;
+   int keep_polling;
int retval;
 
do {
retval = bu21013_do_touch_report(data);
if (retval < 0) {
dev_err(>dev, "bu21013_do_touch_report failed\n");
-   return IRQ_NONE;
+   break;
}
 
-   data->intr_pin = gpio_get_value(data->chip->touch_pin);
-   if (data->intr_pin == PEN_DOWN_INTR)
+   keep_polling = gpiod_get_value(data->int_gpiod);
+   if (keep_polling)
wait_event_timeout(data->wait, data->touch_stopped,
   msecs_to_jiffies(2));
-   } while (!data->intr_pin && !data->touch_stopped);
+   } while (keep_polling && !data->touch_stopped);
 
return IRQ_HANDLED;
 }
@@ -425,28 +426,6 @@ static void bu21013_free_irq(struct bu21013_ts_data 
*bu21013_data)
free_irq(bu21013_data->irq, bu21013_data);
 }
 
-/**
- * bu21013_cs_disable() - deconfigures the touch panel controller
- * @bu21013_data: device structure pointer
- *
- * This function is used to deconfigure the chip selection
- * for touch panel controller.
- */
-static void bu21013_cs_disable(struct bu21013_ts_data *bu21013_data)
-{
-   int error;
-
-   error = gpio_direction_output(bu21013_data->chip->cs_pin, 0);
-   if (error < 0)
-   dev_warn(_data->client->dev,
-"%s: gpio direction failed, error: %d\n",
-__func__, error);
-   else
-   gpio_set_value(bu21013_data->chip->cs_pin, 0);
-
-   gpio_free(bu21013_data->chip->cs_pin);
-}
-
 #ifdef CONFIG_OF
 static const struct bu21013_platform_device *
 bu21013_parse_dt(struct device *dev)
@@ -471,9 +450,6 @@ bu21013_parse_dt(struct device *dev)
of_property_read_u32(np, "rohm,touch-max-x", >touch_x_max);
of_property_read_u32(np, "rohm,touch-max-y", >touch_y_max);
 
-   pdata->touch_pin = 

[PATCH 04/11] Input: bu21013_ts - annotate supend/resume methods as __maybe_unused

2019-08-09 Thread Dmitry Torokhov
Instead if #ifdef-ing out suspend and resume methods, let's mark
them as __maybe_unused to get better compile time coverage.

Signed-off-by: Dmitry Torokhov 
---
 drivers/input/touchscreen/bu21013_ts.c | 13 +++--
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/input/touchscreen/bu21013_ts.c 
b/drivers/input/touchscreen/bu21013_ts.c
index e9cb020ed725..0bdadd24296f 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -634,7 +634,6 @@ static int bu21013_remove(struct i2c_client *client)
return 0;
 }
 
-#ifdef CONFIG_PM
 /**
  * bu21013_suspend() - suspend the touch screen controller
  * @dev: pointer to device structure
@@ -642,7 +641,7 @@ static int bu21013_remove(struct i2c_client *client)
  * This function is used to suspend the
  * touch panel controller and returns integer
  */
-static int bu21013_suspend(struct device *dev)
+static int __maybe_unused bu21013_suspend(struct device *dev)
 {
struct bu21013_ts *ts = dev_get_drvdata(dev);
struct i2c_client *client = ts->client;
@@ -665,7 +664,7 @@ static int bu21013_suspend(struct device *dev)
  * This function is used to resume the touch panel
  * controller and returns integer.
  */
-static int bu21013_resume(struct device *dev)
+static int __maybe_unused bu21013_resume(struct device *dev)
 {
struct bu21013_ts *ts = dev_get_drvdata(dev);
struct i2c_client *client = ts->client;
@@ -693,11 +692,7 @@ static int bu21013_resume(struct device *dev)
return 0;
 }
 
-static const struct dev_pm_ops bu21013_dev_pm_ops = {
-   .suspend = bu21013_suspend,
-   .resume  = bu21013_resume,
-};
-#endif
+static SIMPLE_DEV_PM_OPS(bu21013_dev_pm_ops, bu21013_suspend, bu21013_resume);
 
 static const struct i2c_device_id bu21013_id[] = {
{ DRIVER_TP, 0 },
@@ -708,9 +703,7 @@ MODULE_DEVICE_TABLE(i2c, bu21013_id);
 static struct i2c_driver bu21013_driver = {
.driver = {
.name   =   DRIVER_TP,
-#ifdef CONFIG_PM
.pm =   _dev_pm_ops,
-#endif
},
.probe  =   bu21013_probe,
.remove =   bu21013_remove,
-- 
2.23.0.rc1.153.gdeed80330f-goog



[PATCH 01/11] ARM: ux500: improve BU21013 touchpad bindings

2019-08-09 Thread Dmitry Torokhov
In preparation to update to bu21013_tp driver properly annotate GPIOs
property (the INT GPIOs are active low, not open drain), and also define
interrupt lines so we do not have to have special conversion in the driver.

Signed-off-by: Dmitry Torokhov 
---
 arch/arm/boot/dts/ste-hrefprev60-stuib.dts  | 14 ++
 arch/arm/boot/dts/ste-hrefv60plus-stuib.dts | 14 ++
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/arch/arm/boot/dts/ste-hrefprev60-stuib.dts 
b/arch/arm/boot/dts/ste-hrefprev60-stuib.dts
index aed940bd65a8..b78be5f4c212 100644
--- a/arch/arm/boot/dts/ste-hrefprev60-stuib.dts
+++ b/arch/arm/boot/dts/ste-hrefprev60-stuib.dts
@@ -4,6 +4,8 @@
  */
 
 /dts-v1/;
+#include 
+#include 
 #include "ste-hrefprev60.dtsi"
 #include "ste-href-stuib.dtsi"
 
@@ -23,12 +25,16 @@
i2c@8011 {
/* Only one of these will be used */
bu21013_tp@5c {
-   touch-gpio = < 12 0x4>;
-   reset-gpio = <_gpio 13 0x4>;
+   interrupt-parent = <>;
+   interrupts = <12 IRQ_TYPE_LEVEL_LOW>;
+   touch-gpios = < 12 GPIO_ACTIVE_LOW>;
+   reset-gpios = <_gpio 13 
GPIO_LINE_OPEN_DRAIN>;
};
bu21013_tp@5d {
-   touch-gpio = < 12 0x4>;
-   reset-gpio = <_gpio 13 0x4>;
+   interrupt-parent = <>;
+   interrupts = <12 IRQ_TYPE_LEVEL_LOW>;
+   touch-gpios = < 12 GPIO_ACTIVE_LOW>;
+   reset-gpios = <_gpio 13 
GPIO_LINE_OPEN_DRAIN>;
};
};
};
diff --git a/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts 
b/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts
index 0f3c3b86bb20..9be513aad549 100644
--- a/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts
+++ b/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts
@@ -6,6 +6,8 @@
  */
 
 /dts-v1/;
+#include 
+#include 
 #include "ste-hrefv60plus.dtsi"
 #include "ste-href-stuib.dtsi"
 
@@ -25,12 +27,16 @@
i2c@8011 {
/* Only one of these will be used */
bu21013_tp@5c {
-   touch-gpio = < 20 0x4>;
-   reset-gpio = < 17 0x4>;
+   interrupt-parent = <>;
+   interrupts = <20 IRQ_TYPE_LEVEL_LOW>;
+   touch-gpios = < 20 GPIO_ACTIVE_LOW>;
+   reset-gpios = < 17 GPIO_LINE_OPEN_DRAIN>;
};
bu21013_tp@5d {
-   touch-gpio = < 20 0x4>;
-   reset-gpio = < 17 0x4>;
+   interrupt-parent = <>;
+   interrupts = <20 IRQ_TYPE_LEVEL_LOW>;
+   touch-gpios = < 20 GPIO_ACTIVE_LOW>;
+   reset-gpios = < 17 GPIO_LINE_OPEN_DRAIN>;
};
};
};
-- 
2.23.0.rc1.153.gdeed80330f-goog



[PATCH 03/11] Input: bu21013_ts - rename some variables

2019-08-09 Thread Dmitry Torokhov
"bu21013_data" and "struct bu21013_ts_data" are a tad long, let's call them
"ts" and "struct bu21013_ts".

Also rename retval to error in bu21013_init_chip() and adjust formatting;
i2c_smbus_write_byte_data() returns negative on error and 0 on success, so
we simply test if whether erro is 0 or not.

Signed-off-by: Dmitry Torokhov 
---
 drivers/input/touchscreen/bu21013_ts.c | 376 +
 1 file changed, 190 insertions(+), 186 deletions(-)

diff --git a/drivers/input/touchscreen/bu21013_ts.c 
b/drivers/input/touchscreen/bu21013_ts.c
index c20f86f98ffc..e9cb020ed725 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -135,7 +135,7 @@
 #define DRIVER_TP  "bu21013_tp"
 
 /**
- * struct bu21013_ts_data - touch panel data structure
+ * struct bu21013_ts - touch panel data structure
  * @client: pointer to the i2c client
  * @wait: variable to wait_queue_head_t structure
  * @touch_stopped: touch stop flag
@@ -147,7 +147,7 @@
  *
  * Touch panel device data structure
  */
-struct bu21013_ts_data {
+struct bu21013_ts {
struct i2c_client *client;
wait_queue_head_t wait;
const struct bu21013_platform_device *chip;
@@ -161,34 +161,35 @@ struct bu21013_ts_data {
 
 /**
  * bu21013_read_block_data(): read the touch co-ordinates
- * @data: bu21013_ts_data structure pointer
+ * @data: bu21013_ts structure pointer
  * @buf: byte pointer
  *
  * Read the touch co-ordinates using i2c read block into buffer
  * and returns integer.
  */
-static int bu21013_read_block_data(struct bu21013_ts_data *data, u8 *buf)
+static int bu21013_read_block_data(struct bu21013_ts *ts, u8 *buf)
 {
int ret, i;
 
for (i = 0; i < I2C_RETRY_COUNT; i++) {
-   ret = i2c_smbus_read_i2c_block_data
-   (data->client, BU21013_SENSORS_BTN_0_7_REG,
-   LENGTH_OF_BUFFER, buf);
+   ret = i2c_smbus_read_i2c_block_data(ts->client,
+   BU21013_SENSORS_BTN_0_7_REG,
+   LENGTH_OF_BUFFER, buf);
if (ret == LENGTH_OF_BUFFER)
return 0;
}
+
return -EINVAL;
 }
 
 /**
  * bu21013_do_touch_report(): Get the touch co-ordinates
- * @data: bu21013_ts_data structure pointer
+ * @data: bu21013_ts structure pointer
  *
  * Get the touch co-ordinates from touch sensor registers and writes
  * into device structure and returns integer.
  */
-static int bu21013_do_touch_report(struct bu21013_ts_data *data)
+static int bu21013_do_touch_report(struct bu21013_ts *ts)
 {
u8  buf[LENGTH_OF_BUFFER];
unsigned int pos_x[2], pos_y[2];
@@ -196,10 +197,7 @@ static int bu21013_do_touch_report(struct bu21013_ts_data 
*data)
int finger_down_count = 0;
int i;
 
-   if (data == NULL)
-   return -EINVAL;
-
-   if (bu21013_read_block_data(data, buf) < 0)
+   if (bu21013_read_block_data(ts, buf) < 0)
return -EINVAL;
 
has_x_sensors = hweight32(buf[0] & BU21013_SENSORS_EN_0_7);
@@ -227,21 +225,21 @@ static int bu21013_do_touch_report(struct bu21013_ts_data 
*data)
}
 
for (i = 0; i < finger_down_count; i++) {
-   if (data->chip->x_flip)
-   pos_x[i] = data->chip->touch_x_max - pos_x[i];
-   if (data->chip->y_flip)
-   pos_y[i] = data->chip->touch_y_max - pos_y[i];
+   if (ts->chip->x_flip)
+   pos_x[i] = ts->chip->touch_x_max - pos_x[i];
+   if (ts->chip->y_flip)
+   pos_y[i] = ts->chip->touch_y_max - pos_y[i];
 
-   input_report_abs(data->in_dev,
+   input_report_abs(ts->in_dev,
 ABS_MT_POSITION_X, pos_x[i]);
-   input_report_abs(data->in_dev,
+   input_report_abs(ts->in_dev,
 ABS_MT_POSITION_Y, pos_y[i]);
-   input_mt_sync(data->in_dev);
+   input_mt_sync(ts->in_dev);
}
} else
-   input_mt_sync(data->in_dev);
+   input_mt_sync(ts->in_dev);
 
-   input_sync(data->in_dev);
+   input_sync(ts->in_dev);
 
return 0;
 }
@@ -255,23 +253,22 @@ static int bu21013_do_touch_report(struct bu21013_ts_data 
*data)
  */
 static irqreturn_t bu21013_gpio_irq(int irq, void *device_data)
 {
-   struct bu21013_ts_data *data = device_data;
-   struct i2c_client *i2c = data->client;
+   struct bu21013_ts *ts = device_data;
int keep_polling;
-   int retval;
+   int error;
 
do {
-   retval = bu21013_do_touch_report(data);
-   if (retval < 0) {
-  

[PATCH 08/11] Input: bu21013_ts - use interrupt from I2C client

2019-08-09 Thread Dmitry Torokhov
Instead of trying to map INT GPIO to interrupt, let's use one supplied by
I2C client. If there is none - bail. This will also allow us to treat INT
GPIO as optional, as per the binding.

Signed-off-by: Dmitry Torokhov 
---
 .../bindings/input/touchscreen/bu21013.txt|  6 +++-
 drivers/input/touchscreen/bu21013_ts.c| 35 ++-
 2 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt 
b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
index 43899fc36ecf..7ddb5de8343d 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
@@ -4,6 +4,8 @@ Required properties:
  - compatible  : "rohm,bu21013_tp"
  - reg : I2C device address
  - reset-gpios : GPIO pin enabling (selecting) chip (CS)
+ - interrupt-parent: the phandle for the gpio controller
+ - interrupts  : (gpio) interrupt to which the chip is connected
 
 Optional properties:
  - touch-gpios : GPIO pin registering a touch event
@@ -19,7 +21,9 @@ Example:
bu21013_tp@5c {
compatible = "rohm,bu21013_tp";
reg = <0x5c>;
-   touch-gpio = < 20 0x4>;
+   interrupt-parent = <>;
+   interrupts <&20 IRQ_TYPE_LEVEL_LOW>;
+   touch-gpio = < 20 GPIO_ACTIVE_LOW>;
avdd-supply = <_ldo_aux1_reg>;
 
rohm,touch-max-x = <384>;
diff --git a/drivers/input/touchscreen/bu21013_ts.c 
b/drivers/input/touchscreen/bu21013_ts.c
index 79de7327a460..d745643861cb 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -141,7 +141,6 @@
  * @regulator: pointer to the Regulator used for touch screen
  * @cs_gpiod: chip select GPIO line
  * @int_gpiod: touch interrupt GPIO line
- * @irq: interrupt number the device is using
  * @touch_x_max: maximum X coordinate reported by the device
  * @touch_y_max: maximum Y coordinate reported by the device
  * @x_flip: indicates that the driver should invert X coordinate before
@@ -158,7 +157,6 @@ struct bu21013_ts {
struct regulator *regulator;
struct gpio_desc *cs_gpiod;
struct gpio_desc *int_gpiod;
-   unsigned int irq;
u32 touch_x_max;
u32 touch_y_max;
bool x_flip;
@@ -252,7 +250,8 @@ static irqreturn_t bu21013_gpio_irq(int irq, void 
*device_data)
if (unlikely(ts->touch_stopped))
break;
 
-   keep_polling = gpiod_get_value(ts->int_gpiod);
+   keep_polling = ts->int_gpiod ?
+   gpiod_get_value(ts->int_gpiod) : false;
if (keep_polling)
usleep_range(2000, 2500);
} while (keep_polling);
@@ -419,6 +418,11 @@ static int bu21013_probe(struct i2c_client *client,
return -EIO;
}
 
+   if (!client->irq) {
+   dev_err(>dev, "No IRQ set up\n");
+   return -EINVAL;
+   }
+
ts = devm_kzalloc(>dev, sizeof(*ts), GFP_KERNEL);
if (!ts)
return -ENOMEM;
@@ -491,14 +495,17 @@ static int bu21013_probe(struct i2c_client *client,
}
 
/* Named "INT" on the chip, DT binding is "touch" */
-   ts->int_gpiod = devm_gpiod_get(>dev, "touch", GPIOD_IN);
+   ts->int_gpiod = devm_gpiod_get_optional(>dev,
+   "touch", GPIOD_IN);
error = PTR_ERR_OR_ZERO(ts->int_gpiod);
if (error) {
if (error != -EPROBE_DEFER)
dev_err(>dev, "failed to get INT GPIO\n");
return error;
}
-   gpiod_set_consumer_name(ts->int_gpiod, "BU21013 INT");
+
+   if (ts->int_gpiod)
+   gpiod_set_consumer_name(ts->int_gpiod, "BU21013 INT");
 
/* configure the touch panel controller */
error = bu21013_init_chip(ts);
@@ -507,16 +514,12 @@ static int bu21013_probe(struct i2c_client *client,
return error;
}
 
-   ts->irq = gpiod_to_irq(ts->int_gpiod);
-   error = devm_request_threaded_irq(>dev, ts->irq,
+   error = devm_request_threaded_irq(>dev, client->irq,
  NULL, bu21013_gpio_irq,
- IRQF_TRIGGER_FALLING |
-   IRQF_SHARED |
-   IRQF_ONESHOT,
- DRIVER_TP, ts);
+ IRQF_ONESHOT, DRIVER_TP, ts);
if (error) {
dev_err(>dev, "request irq %d failed\n",
-   ts->irq);
+   client->irq);
return error;
}
 
@@ -549,9 

[PATCH 05/11] Input: bu21013_ts - remove useless comments

2019-08-09 Thread Dmitry Torokhov
The comments for individual functions in the driver do not provide any
additional information beyond what function names indicate.

Signed-off-by: Dmitry Torokhov 
---
 drivers/input/touchscreen/bu21013_ts.c | 61 +-
 1 file changed, 2 insertions(+), 59 deletions(-)

diff --git a/drivers/input/touchscreen/bu21013_ts.c 
b/drivers/input/touchscreen/bu21013_ts.c
index 0bdadd24296f..a5230f6ea5f0 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -159,14 +159,6 @@ struct bu21013_ts {
bool touch_stopped;
 };
 
-/**
- * bu21013_read_block_data(): read the touch co-ordinates
- * @data: bu21013_ts structure pointer
- * @buf: byte pointer
- *
- * Read the touch co-ordinates using i2c read block into buffer
- * and returns integer.
- */
 static int bu21013_read_block_data(struct bu21013_ts *ts, u8 *buf)
 {
int ret, i;
@@ -182,13 +174,6 @@ static int bu21013_read_block_data(struct bu21013_ts *ts, 
u8 *buf)
return -EINVAL;
 }
 
-/**
- * bu21013_do_touch_report(): Get the touch co-ordinates
- * @data: bu21013_ts structure pointer
- *
- * Get the touch co-ordinates from touch sensor registers and writes
- * into device structure and returns integer.
- */
 static int bu21013_do_touch_report(struct bu21013_ts *ts)
 {
u8  buf[LENGTH_OF_BUFFER];
@@ -243,14 +228,7 @@ static int bu21013_do_touch_report(struct bu21013_ts *ts)
 
return 0;
 }
-/**
- * bu21013_gpio_irq() - gpio thread function for touch interrupt
- * @irq: irq value
- * @device_data: void pointer
- *
- * This gpio thread function for touch interrupt
- * and returns irqreturn_t.
- */
+
 static irqreturn_t bu21013_gpio_irq(int irq, void *device_data)
 {
struct bu21013_ts *ts = device_data;
@@ -273,13 +251,6 @@ static irqreturn_t bu21013_gpio_irq(int irq, void 
*device_data)
return IRQ_HANDLED;
 }
 
-/**
- * bu21013_init_chip() - power on sequence for the bu21013 controller
- * @data: device structure pointer
- *
- * This function is used to power on
- * the bu21013 controller and returns integer.
- */
 static int bu21013_init_chip(struct bu21013_ts *ts)
 {
struct i2c_client *client = ts->client;
@@ -468,14 +439,6 @@ bu21013_parse_dt(struct device *dev)
 }
 #endif
 
-/**
- * bu21013_probe() - initializes the i2c-client touchscreen driver
- * @client: i2c client structure pointer
- * @id: i2c device id pointer
- *
- * This function used to initializes the i2c-client touchscreen
- * driver and returns integer.
- */
 static int bu21013_probe(struct i2c_client *client,
 const struct i2c_device_id *id)
 {
@@ -606,13 +569,7 @@ static int bu21013_probe(struct i2c_client *client,
 
return error;
 }
-/**
- * bu21013_remove() - removes the i2c-client touchscreen driver
- * @client: i2c client structure pointer
- *
- * This function uses to remove the i2c-client
- * touchscreen driver and returns integer.
- */
+
 static int bu21013_remove(struct i2c_client *client)
 {
struct bu21013_ts *ts = i2c_get_clientdata(client);
@@ -634,13 +591,6 @@ static int bu21013_remove(struct i2c_client *client)
return 0;
 }
 
-/**
- * bu21013_suspend() - suspend the touch screen controller
- * @dev: pointer to device structure
- *
- * This function is used to suspend the
- * touch panel controller and returns integer
- */
 static int __maybe_unused bu21013_suspend(struct device *dev)
 {
struct bu21013_ts *ts = dev_get_drvdata(dev);
@@ -657,13 +607,6 @@ static int __maybe_unused bu21013_suspend(struct device 
*dev)
return 0;
 }
 
-/**
- * bu21013_resume() - resume the touch screen controller
- * @dev: pointer to device structure
- *
- * This function is used to resume the touch panel
- * controller and returns integer.
- */
 static int __maybe_unused bu21013_resume(struct device *dev)
 {
struct bu21013_ts *ts = dev_get_drvdata(dev);
-- 
2.23.0.rc1.153.gdeed80330f-goog



[PATCH 10/11] Input: bu21013_ts - switch to using MT-B (slotted) protocol

2019-08-09 Thread Dmitry Torokhov
MT-B protocol is more efficient and everyone expects it. We use in-kernel
tracking to identify contacts.

Signed-off-by: Dmitry Torokhov 
---
 drivers/input/touchscreen/bu21013_ts.c | 80 ++
 1 file changed, 43 insertions(+), 37 deletions(-)

diff --git a/drivers/input/touchscreen/bu21013_ts.c 
b/drivers/input/touchscreen/bu21013_ts.c
index d7e16e915743..2c534aa61687 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -181,11 +182,13 @@ static int bu21013_read_block_data(struct bu21013_ts *ts, 
u8 *buf)
 
 static int bu21013_do_touch_report(struct bu21013_ts *ts)
 {
-   u8  buf[LENGTH_OF_BUFFER];
-   unsigned int pos_x[2], pos_y[2];
-   boolhas_x_sensors, has_y_sensors;
-   int finger_down_count = 0;
-   int i;
+   struct input_dev *input = ts->in_dev;
+   struct input_mt_pos pos[MAX_FINGERS];
+   int slots[MAX_FINGERS];
+   u8 buf[LENGTH_OF_BUFFER];
+   bool has_x_sensors, has_y_sensors;
+   int finger_down_count = 0;
+   int i;
 
if (bu21013_read_block_data(ts, buf) < 0)
return -EINVAL;
@@ -197,39 +200,38 @@ static int bu21013_do_touch_report(struct bu21013_ts *ts)
return 0;
 
for (i = 0; i < MAX_FINGERS; i++) {
-   const u8 *p = [4 * i + 3];
-   unsigned int x = p[0] << SHIFT_2 | (p[1] & MASK_BITS);
-   unsigned int y = p[2] << SHIFT_2 | (p[3] & MASK_BITS);
-   if (x == 0 || y == 0)
+   const u8 *data = [4 * i + 3];
+   struct input_mt_pos *p = [finger_down_count];
+
+   p->x = data[0] << SHIFT_2 | (data[1] & MASK_BITS);
+   p->y = data[2] << SHIFT_2 | (data[3] & MASK_BITS);
+   if (p->x == 0 || p->y == 0)
continue;
-   pos_x[finger_down_count] = x;
-   pos_y[finger_down_count] = y;
+
finger_down_count++;
+
+   if (ts->x_flip)
+   p->x = ts->touch_x_max - p->x;
+   if (ts->y_flip)
+   p->y = ts->touch_y_max - p->y;
}
 
-   if (finger_down_count) {
-   if (finger_down_count == 2 &&
-   (abs(pos_x[0] - pos_x[1]) < DELTA_MIN ||
-abs(pos_y[0] - pos_y[1]) < DELTA_MIN)) {
-   return 0;
-   }
+   if (finger_down_count == 2 &&
+   (abs(pos[0].x - pos[1].x) < DELTA_MIN ||
+abs(pos[0].y - pos[1].y) < DELTA_MIN)) {
+   return 0;
+   }
 
-   for (i = 0; i < finger_down_count; i++) {
-   if (ts->x_flip)
-   pos_x[i] = ts->touch_x_max - pos_x[i];
-   if (ts->y_flip)
-   pos_y[i] = ts->touch_y_max - pos_y[i];
-
-   input_report_abs(ts->in_dev,
-ABS_MT_POSITION_X, pos_x[i]);
-   input_report_abs(ts->in_dev,
-ABS_MT_POSITION_Y, pos_y[i]);
-   input_mt_sync(ts->in_dev);
-   }
-   } else
-   input_mt_sync(ts->in_dev);
+   input_mt_assign_slots(input, slots, pos, finger_down_count, DELTA_MIN);
+   for (i = 0; i < finger_down_count; i++) {
+   input_mt_slot(input, slots[i]);
+   input_mt_report_slot_state(input, MT_TOOL_FINGER, true);
+   input_report_abs(input, ABS_MT_POSITION_X, pos[i].x);
+   input_report_abs(input, ABS_MT_POSITION_Y, pos[i].y);
+   }
 
-   input_sync(ts->in_dev);
+   input_mt_sync_frame(input);
+   input_sync(input);
 
return 0;
 }
@@ -443,20 +445,24 @@ static int bu21013_probe(struct i2c_client *client,
return -ENOMEM;
}
ts->in_dev = in_dev;
+   input_set_drvdata(in_dev, ts);
 
/* register the device to input subsystem */
in_dev->name = DRIVER_TP;
in_dev->id.bustype = BUS_I2C;
 
-   __set_bit(EV_SYN, in_dev->evbit);
-   __set_bit(EV_KEY, in_dev->evbit);
-   __set_bit(EV_ABS, in_dev->evbit);
-
input_set_abs_params(in_dev, ABS_MT_POSITION_X,
 0, ts->touch_x_max, 0, 0);
input_set_abs_params(in_dev, ABS_MT_POSITION_Y,
 0, ts->touch_y_max, 0, 0);
-   input_set_drvdata(in_dev, ts);
+
+   error = input_mt_init_slots(in_dev, MAX_FINGERS,
+   INPUT_MT_DIRECT | INPUT_MT_TRACK |
+   INPUT_MT_DROP_UNUSED);
+   if (error) {
+   dev_err(>dev, "failed to initialize MT slots");
+   return error;
+   }
 
ts->regulator = devm_regulator_get(>dev, "avdd");
if 

[PATCH 09/11] Input: bu21013_ts - fix suspend when wake source

2019-08-09 Thread Dmitry Torokhov
If the touchscreen is configured as wakeup source we should not be cutting
off power to it.

Also, now that the driver relies on I2C client to supply IRQ, we do not
need to explicitly enable and disable IRQ for wakeup: if device is created
as wakeup source, I2C core will mark interrupt as wakeup one.

Signed-off-by: Dmitry Torokhov 
---
 drivers/input/touchscreen/bu21013_ts.c | 48 +-
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/input/touchscreen/bu21013_ts.c 
b/drivers/input/touchscreen/bu21013_ts.c
index d745643861cb..d7e16e915743 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -547,44 +547,44 @@ static int bu21013_remove(struct i2c_client *client)
 
 static int __maybe_unused bu21013_suspend(struct device *dev)
 {
-   struct bu21013_ts *ts = dev_get_drvdata(dev);
-   struct i2c_client *client = ts->client;
+   struct i2c_client *client = to_i2c_client(dev);
+   struct bu21013_ts *ts = i2c_get_clientdata(client);
 
ts->touch_stopped = true;
-   if (device_may_wakeup(>dev))
-   enable_irq_wake(client->irq);
-   else
-   disable_irq(client->irq);
+   mb();
+   disable_irq(client->irq);
 
-   regulator_disable(ts->regulator);
+   if (!device_may_wakeup(>dev))
+   regulator_disable(ts->regulator);
 
return 0;
 }
 
 static int __maybe_unused bu21013_resume(struct device *dev)
 {
-   struct bu21013_ts *ts = dev_get_drvdata(dev);
-   struct i2c_client *client = ts->client;
-   int retval;
+   struct i2c_client *client = to_i2c_client(dev);
+   struct bu21013_ts *ts = i2c_get_clientdata(client);
+   int error;
 
-   retval = regulator_enable(ts->regulator);
-   if (retval < 0) {
-   dev_err(>dev, "bu21013 regulator enable failed\n");
-   return retval;
-   }
+   if (!device_may_wakeup(>dev)) {
+   error = regulator_enable(ts->regulator);
+   if (error) {
+   dev_err(>dev,
+   "failed to re-enable regulator when 
resuming\n");
+   return error;
+   }
 
-   retval = bu21013_init_chip(ts);
-   if (retval < 0) {
-   dev_err(>dev, "bu21013 controller config failed\n");
-   return retval;
+   error = bu21013_init_chip(ts);
+   if (error) {
+   dev_err(>dev,
+   "failed to reinitialize chip when resuming\n");
+   return error;
+   }
}
 
ts->touch_stopped = false;
-
-   if (device_may_wakeup(>dev))
-   disable_irq_wake(client->irq);
-   else
-   enable_irq(client->irq);
+   mb();
+   enable_irq(client->irq);
 
return 0;
 }
-- 
2.23.0.rc1.153.gdeed80330f-goog



[PATCH 06/11] Input: bu21013_ts - convert to using managed resources

2019-08-09 Thread Dmitry Torokhov
This allows trimming error unwinding and device removal handling.

Signed-off-by: Dmitry Torokhov 
---
 drivers/input/touchscreen/bu21013_ts.c | 182 -
 1 file changed, 84 insertions(+), 98 deletions(-)

diff --git a/drivers/input/touchscreen/bu21013_ts.c 
b/drivers/input/touchscreen/bu21013_ts.c
index a5230f6ea5f0..4b6f9544e94a 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -137,7 +137,6 @@
 /**
  * struct bu21013_ts - touch panel data structure
  * @client: pointer to the i2c client
- * @wait: variable to wait_queue_head_t structure
  * @touch_stopped: touch stop flag
  * @chip: pointer to the touch panel controller
  * @in_dev: pointer to the input device structure
@@ -149,7 +148,6 @@
  */
 struct bu21013_ts {
struct i2c_client *client;
-   wait_queue_head_t wait;
const struct bu21013_platform_device *chip;
struct input_dev *in_dev;
struct regulator *regulator;
@@ -242,11 +240,13 @@ static irqreturn_t bu21013_gpio_irq(int irq, void 
*device_data)
break;
}
 
+   if (unlikely(ts->touch_stopped))
+   break;
+
keep_polling = gpiod_get_value(ts->int_gpiod);
if (keep_polling)
-   wait_event_timeout(ts->wait, ts->touch_stopped,
-  msecs_to_jiffies(2));
-   } while (keep_polling && !ts->touch_stopped);
+   usleep_range(2000, 2500);
+   } while (keep_polling);
 
return IRQ_HANDLED;
 }
@@ -388,20 +388,6 @@ static int bu21013_init_chip(struct bu21013_ts *ts)
return 0;
 }
 
-/**
- * bu21013_free_irq() - frees IRQ registered for touchscreen
- * @ts: device structure pointer
- *
- * This function signals interrupt thread to stop processing and
- * frees interrupt.
- */
-static void bu21013_free_irq(struct bu21013_ts *ts)
-{
-   ts->touch_stopped = true;
-   wake_up(>wait);
-   free_irq(ts->irq, ts);
-}
-
 #ifdef CONFIG_OF
 static const struct bu21013_platform_device *
 bu21013_parse_dt(struct device *dev)
@@ -439,6 +425,20 @@ bu21013_parse_dt(struct device *dev)
 }
 #endif
 
+static void bu21013_power_off(void *_ts)
+{
+   struct bu21013_ts *ts = ts;
+
+   regulator_disable(ts->regulator);
+}
+
+static void bu21013_disable_chip(void *_ts)
+{
+   struct bu21013_ts *ts = ts;
+
+   gpiod_set_value(ts->cs_gpiod, 0);
+}
+
 static int bu21013_probe(struct i2c_client *client,
 const struct i2c_device_id *id)
 {
@@ -460,133 +460,119 @@ static int bu21013_probe(struct i2c_client *client,
return PTR_ERR(pdata);
}
 
-   ts = kzalloc(sizeof(*ts), GFP_KERNEL);
-   in_dev = input_allocate_device();
-   if (!ts || !in_dev) {
+   ts = devm_kzalloc(>dev, sizeof(*ts), GFP_KERNEL);
+   if (!ts)
+   return -ENOMEM;
+
+   ts->chip = pdata;
+   ts->client = client;
+
+   in_dev = devm_input_allocate_device(>dev);
+   if (!in_dev) {
dev_err(>dev, "device memory alloc failed\n");
-   error = -ENOMEM;
-   goto err_free_mem;
+   return -ENOMEM;
}
+   ts->in_dev = in_dev;
 
-   /* Named "INT" on the chip, DT binding is "touch" */
-   ts->int_gpiod = gpiod_get(>dev, "touch", GPIOD_IN);
-   error = PTR_ERR_OR_ZERO(ts->int_gpiod);
-   if (error) {
-   if (error != -EPROBE_DEFER)
-   dev_err(>dev, "failed to get INT GPIO\n");
-   goto err_free_mem;
-   }
-   gpiod_set_consumer_name(ts->int_gpiod, "BU21013 INT");
+   /* register the device to input subsystem */
+   in_dev->name = DRIVER_TP;
+   in_dev->id.bustype = BUS_I2C;
 
-   ts->in_dev = in_dev;
-   ts->chip = pdata;
-   ts->client = client;
-   ts->irq = gpiod_to_irq(ts->int_gpiod);
+   __set_bit(EV_SYN, in_dev->evbit);
+   __set_bit(EV_KEY, in_dev->evbit);
+   __set_bit(EV_ABS, in_dev->evbit);
+
+   input_set_abs_params(in_dev, ABS_MT_POSITION_X,
+0, pdata->touch_x_max, 0, 0);
+   input_set_abs_params(in_dev, ABS_MT_POSITION_Y,
+0, pdata->touch_y_max, 0, 0);
+   input_set_drvdata(in_dev, ts);
 
-   ts->regulator = regulator_get(>dev, "avdd");
+   ts->regulator = devm_regulator_get(>dev, "avdd");
if (IS_ERR(ts->regulator)) {
dev_err(>dev, "regulator_get failed\n");
-   error = PTR_ERR(ts->regulator);
-   goto err_put_int_gpio;
+   return PTR_ERR(ts->regulator);
}
 
error = regulator_enable(ts->regulator);
-   if (error < 0) {
+   if (error) {
dev_err(>dev, "regulator enable failed\n");
-   goto err_put_regulator;
+   return error;
}
 
-   ts->touch_stopped = false;
-  

Re: [RFC PATCH v2 11/19] mm/gup: Pass follow_page_context further down the call stack

2019-08-09 Thread John Hubbard
On 8/9/19 3:58 PM, ira.we...@intel.com wrote:
> From: Ira Weiny 
> 
> In preparation for passing more information (vaddr_pin) into
> follow_page_pte(), follow_devmap_pud(), and follow_devmap_pmd().
> 
> Signed-off-by: Ira Weiny 
> ---
>  include/linux/huge_mm.h | 17 -
>  mm/gup.c| 31 +++
>  mm/huge_memory.c|  6 --
>  mm/internal.h   | 28 
>  4 files changed, 47 insertions(+), 35 deletions(-)
> 
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 45ede62aa85b..b01a20ce0bb9 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -233,11 +233,6 @@ static inline int hpage_nr_pages(struct page *page)
>   return 1;
>  }
>  
> -struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long 
> addr,
> - pmd_t *pmd, int flags, struct dev_pagemap **pgmap);
> -struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long 
> addr,
> - pud_t *pud, int flags, struct dev_pagemap **pgmap);
> -
>  extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t 
> orig_pmd);
>  
>  extern struct page *huge_zero_page;
> @@ -375,18 +370,6 @@ static inline void mm_put_huge_zero_page(struct 
> mm_struct *mm)
>   return;
>  }
>  
> -static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
> - unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
> -{
> - return NULL;
> -}
> -
> -static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
> - unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap)
> -{
> - return NULL;
> -}
> -
>  static inline bool thp_migration_supported(void)
>  {
>   return false;
> diff --git a/mm/gup.c b/mm/gup.c
> index 504af3e9a942..a7a9d2f5278c 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -24,11 +24,6 @@
>  
>  #include "internal.h"
>  
> -struct follow_page_context {
> - struct dev_pagemap *pgmap;
> - unsigned int page_mask;
> -};
> -
>  /**
>   * put_user_pages_dirty_lock() - release and optionally dirty gup-pinned 
> pages
>   * @pages:  array of pages to be maybe marked dirty, and definitely released.
> @@ -172,8 +167,9 @@ static inline bool can_follow_write_pte(pte_t pte, 
> unsigned int flags)
>  
>  static struct page *follow_page_pte(struct vm_area_struct *vma,
>   unsigned long address, pmd_t *pmd, unsigned int flags,
> - struct dev_pagemap **pgmap)
> + struct follow_page_context *ctx)
>  {
> + struct dev_pagemap **pgmap = >pgmap;
>   struct mm_struct *mm = vma->vm_mm;
>   struct page *page;
>   spinlock_t *ptl;
> @@ -363,13 +359,13 @@ static struct page *follow_pmd_mask(struct 
> vm_area_struct *vma,
>   }
>   if (pmd_devmap(pmdval)) {
>   ptl = pmd_lock(mm, pmd);
> - page = follow_devmap_pmd(vma, address, pmd, flags, >pgmap);
> + page = follow_devmap_pmd(vma, address, pmd, flags, ctx);
>   spin_unlock(ptl);
>   if (page)
>   return page;
>   }
>   if (likely(!pmd_trans_huge(pmdval)))
> - return follow_page_pte(vma, address, pmd, flags, >pgmap);
> + return follow_page_pte(vma, address, pmd, flags, ctx);
>  
>   if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
>   return no_page_table(vma, flags);
> @@ -389,7 +385,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct 
> *vma,
>   }
>   if (unlikely(!pmd_trans_huge(*pmd))) {
>   spin_unlock(ptl);
> - return follow_page_pte(vma, address, pmd, flags, >pgmap);
> + return follow_page_pte(vma, address, pmd, flags, ctx);
>   }
>   if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
>   int ret;
> @@ -419,7 +415,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct 
> *vma,
>   }
>  
>   return ret ? ERR_PTR(ret) :
> - follow_page_pte(vma, address, pmd, flags, >pgmap);
> + follow_page_pte(vma, address, pmd, flags, ctx);
>   }
>   page = follow_trans_huge_pmd(vma, address, pmd, flags);
>   spin_unlock(ptl);
> @@ -456,7 +452,7 @@ static struct page *follow_pud_mask(struct vm_area_struct 
> *vma,
>   }
>   if (pud_devmap(*pud)) {
>   ptl = pud_lock(mm, pud);
> - page = follow_devmap_pud(vma, address, pud, flags, >pgmap);
> + page = follow_devmap_pud(vma, address, pud, flags, ctx);
>   spin_unlock(ptl);
>   if (page)
>   return page;
> @@ -786,7 +782,8 @@ static int check_vma_flags(struct vm_area_struct *vma, 
> unsigned long gup_flags)
>  static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
>   unsigned long start, unsigned long nr_pages,
>   unsigned int gup_flags, struct page **pages,
> -   

Re: [PATCH 0/3] Collapse vimc into single monolithic driver

2019-08-09 Thread Shuah Khan

Hi Andre,

On 8/9/19 5:52 PM, André Almeida wrote:

Hello Shuah,

Thanks for the patch, I did some comments below.

On 8/9/19 6:45 PM, Shuah Khan wrote:

vimc uses Component API to split the driver into functional components.
The real hardware resembles a monolith structure than component and
component structure added a level of complexity making it hard to
maintain without adding any real benefit.
 
The sensor is one vimc component that would makes sense to be a separate

module to closely align with the real hardware. It would be easier to
collapse vimc into single monolithic driver first and then split the
sensor off as a separate module.

This patch series emoves the component API and makes minimal changes to
the code base preserving the functional division of the code structure.
Preserving the functional structure allows us to split the sensor off
as a separate module in the future.

Major design elements in this change are:
 - Use existing struct vimc_ent_config and struct vimc_pipeline_config
   to drive the initialization of the functional components.
 - Make vimc_ent_config global by moving it to vimc.h
 - Add two new hooks add and rm to initialize and register, unregister
   and free subdevs.
 - All component API is now gone and bind and unbind hooks are modified
   to do "add" and "rm" with minimal changes to just add and rm subdevs.
 - vimc-core's bind and unbind are now register and unregister.
 - vimc-core invokes "add" hooks from its vimc_register_devices().
   The "add" hooks remain the same and register subdevs. They don't
   create platform devices of their own and use vimc's pdev.dev as
   their reference device. The "add" hooks save their vimc_ent_device(s)
   in the corresponding vimc_ent_config.
 - vimc-core invokes "rm" hooks from its unregister to unregister subdevs
   and cleanup.
 - vimc-core invokes "add" and "rm" hooks with pointer to struct vimc_device
   and the corresponding struct vimc_ent_config pointer.
 
The following configure and stream test works on all devices.
 
 media-ctl -d platform:vimc -V '"Sensor A":0[fmt:SBGGR8_1X8/640x480]'

 media-ctl -d platform:vimc -V '"Debayer A":0[fmt:SBGGR8_1X8/640x480]'
 media-ctl -d platform:vimc -V '"Sensor B":0[fmt:SBGGR8_1X8/640x480]'
 media-ctl -d platform:vimc -V '"Debayer B":0[fmt:SBGGR8_1X8/640x480]'
 
 v4l2-ctl -z platform:vimc -d "RGB/YUV Capture" -v width=1920,height=1440

 v4l2-ctl -z platform:vimc -d "Raw Capture 0" -v pixelformat=BA81
 v4l2-ctl -z platform:vimc -d "Raw Capture 1" -v pixelformat=BA81
 
 v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video1

 v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video2
 v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video3

The third patch in the series fixes a general protection fault found
when rmmod is done while stream is active.


I applied your patch on top of media_tree/master and I did some testing.
Not sure if I did something wrong, but just adding and removing the
module generated a kernel panic:


Thanks for testing.

Odd. I tested modprobe and rmmod both.I was working on Linux 5.3-rc2.
I will apply these to media latest and work from there. I have to
rebase these on top of the reverts from Lucas and Helen


~# modprobe vimc
~# rmmod vimc
[   16.452974] stack segment:  [#1] SMP PTI
[   16.453688] CPU: 0 PID: 2038 Comm: rmmod Not tainted 5.3.0-rc2+ #36
[   16.454678] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.12.0-20181126_142135-anatol 04/01/2014
[   16.456191] RIP: 0010:kfree+0x4d/0x240



[   16.469188] Call Trace:
[   16.469666]  vimc_remove+0x35/0x90 [vimc]
[   16.470436]  platform_drv_remove+0x1f/0x40
[   16.471233]  device_release_driver_internal+0xd3/0x1b0
[   16.472184]  driver_detach+0x37/0x6b
[   16.472882]  bus_remove_driver+0x50/0xc1
[   16.473569]  vimc_exit+0xc/0xca0 [vimc]
[   16.474231]  __x64_sys_delete_module+0x18d/0x240
[   16.475036]  do_syscall_64+0x43/0x110
[   16.475656]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   16.476504] RIP: 0033:0x7fceb8dafa4b



[   16.484853] Modules linked in: vimc(-) videobuf2_vmalloc
videobuf2_memops v4l2_tpg videobuf2_v4l2 videobuf2_common
[   16.486187] ---[ end trace 91e5e0894e254d49 ]---
[   16.486758] RIP: 0010:kfree+0x4d/0x240



fish: “rmmod vimc” terminated by signal SIGSEGV (Address boundary error)

I just added the module after booting, no other action was made. Here is
how my `git log --oneline` looks like:

897d708e922b media: vimc: Fix gpf in rmmod path when stream is active
2e4a5ad8ad6d media: vimc: Collapse component structure into a single
monolithic driver
7c8da1687e92 media: vimc: move private defines to a common header
97299a303532 media: Remove dev_err() usage after platform_get_irq()
25a3d6bac6b9 media: adv7511/cobalt: rename driver name to adv7511-v4l2
...



vimc_print_dot (--print-dot) topology after this change:
digraph board {
   

Re: [PATCH v3 0/6] Add support of New Amlogic temperature sensor for G12 SoCs

2019-08-09 Thread Kevin Hilman
Guillaume La Roque  writes:

> This patchs series add support of New Amlogic temperature sensor and minimal
> thermal zone for SEI510 and ODROID-N2 boards.
>
> First implementation was doing on IIO[1] but after comments i move on thermal 
> framework.
> Formulas and calibration values come from amlogic.
>
> Changes since v2:
>   - fix yaml documention 
>   - remove unneeded status variable for temperature-sensor node
>   - rework driver after Martin review
>   - add some information in commit message
>
> Changes since v1:
>   - fix enum vs const in documentation
>   - fix error with thermal-sensor-cells value set to 1 instead of 0
>   - add some dependencies needed to add cooling-maps
>
> Dependencies :
> - patch 3,4 & 5: depends on Neil's patch and series :
>   - missing dwc2 phy-names[2]
>   - patchsets to add DVFS on G12a[3] which have deps on [4] and 
> [5]
>
> [1] 
> https://lore.kernel.org/linux-amlogic/20190604144714.2009-1-glaro...@baylibre.com/
> [2] 
> https://lore.kernel.org/linux-amlogic/20190625123647.26117-1-narmstr...@baylibre.com/
> [3] 
> https://lore.kernel.org/linux-amlogic/20190729132622.7566-1-narmstr...@baylibre.com/
> [4] 
> https://lore.kernel.org/linux-amlogic/20190731084019.8451-5-narmstr...@baylibre.com/
> [5] 
> https://lore.kernel.org/linux-amlogic/20190729132622.7566-3-narmstr...@baylibre.com/

Thank you for the detailed list of dependencies!  Much appreciated.

With all the deps, I tested this on sei510 and odroid-n2, and basic
functionality seems to work.

As discussed off-list: it would be nice to have an example of how
cpufreq could be used as a cooling device for hot temperatures.  The
vendor kernel has some trip points that could be included as examples,
or even included as extra patches.

Also the driver patch is missing the two main thermal maintainers, so
please resend at least the driver and bindings including them.


Kevin


Re: [RFC PATCH v2 15/19] mm/gup: Introduce vaddr_pin_pages()

2019-08-09 Thread John Hubbard
On 8/9/19 3:58 PM, ira.we...@intel.com wrote:
> From: Ira Weiny 
> 
> The addition of FOLL_LONGTERM has taken on additional meaning for CMA
> pages.
> 
> In addition subsystems such as RDMA require new information to be passed
> to the GUP interface to track file owning information.  As such a simple
> FOLL_LONGTERM flag is no longer sufficient for these users to pin pages.
> 
> Introduce a new GUP like call which takes the newly introduced vaddr_pin
> information.  Failure to pass the vaddr_pin object back to a vaddr_put*
> call will result in a failure if pins were created on files during the
> pin operation.
> 
> Signed-off-by: Ira Weiny 
> 
> ---
> Changes from list:
>   Change to vaddr_put_pages_dirty_lock
>   Change to vaddr_unpin_pages_dirty_lock
> 
>  include/linux/mm.h |  5 
>  mm/gup.c   | 59 ++
>  2 files changed, 64 insertions(+)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 657c947bda49..90c5802866df 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1603,6 +1603,11 @@ int account_locked_vm(struct mm_struct *mm, unsigned 
> long pages, bool inc);
>  int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
>   struct task_struct *task, bool bypass_rlim);
>  
> +long vaddr_pin_pages(unsigned long addr, unsigned long nr_pages,
> +  unsigned int gup_flags, struct page **pages,
> +  struct vaddr_pin *vaddr_pin);
> +void vaddr_unpin_pages_dirty_lock(struct page **pages, unsigned long 
> nr_pages,
> +   struct vaddr_pin *vaddr_pin, bool make_dirty);

Hi Ira,

OK, the API seems fine to me, anyway. :)

A bit more below...

>  bool mapping_inode_has_layout(struct vaddr_pin *vaddr_pin, struct page 
> *page);
>  
>  /* Container for pinned pfns / pages */
> diff --git a/mm/gup.c b/mm/gup.c
> index eeaa0ddd08a6..6d23f70d7847 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -2536,3 +2536,62 @@ int get_user_pages_fast(unsigned long start, int 
> nr_pages,
>   return ret;
>  }
>  EXPORT_SYMBOL_GPL(get_user_pages_fast);
> +
> +/**
> + * vaddr_pin_pages pin pages by virtual address and return the pages to the
> + * user.
> + *
> + * @addr, start address

What's with the commas? I thought kernel-doc wants colons, like this, right?

@addr: start address


> + * @nr_pages, number of pages to pin
> + * @gup_flags, flags to use for the pin
> + * @pages, array of pages returned
> + * @vaddr_pin, initalized meta information this pin is to be associated
> + * with.
> + *
> + * NOTE regarding vaddr_pin:
> + *
> + * Some callers can share pins via file descriptors to other processes.
> + * Callers such as this should use the f_owner field of vaddr_pin to indicate
> + * the file the fd points to.  All other callers should use the mm this pin 
> is
> + * being made against.  Usually "current->mm".
> + *
> + * Expects mmap_sem to be read locked.
> + */
> +long vaddr_pin_pages(unsigned long addr, unsigned long nr_pages,
> +  unsigned int gup_flags, struct page **pages,
> +  struct vaddr_pin *vaddr_pin)
> +{
> + long ret;
> +
> + gup_flags |= FOLL_LONGTERM;


Is now the right time to introduce and use FOLL_PIN? If not, then I can always
add it on top of this later, as part of gup-tracking patches. But you did point
out that FOLL_LONGTERM is taking on additional meaning, and so maybe it's better
to split that meaning up right from the start.


> +
> + if (!vaddr_pin || (!vaddr_pin->mm && !vaddr_pin->f_owner))
> + return -EINVAL;
> +
> + ret = __gup_longterm_locked(current,
> + vaddr_pin->mm,
> + addr, nr_pages,
> + pages, NULL, gup_flags,
> + vaddr_pin);
> + return ret;
> +}
> +EXPORT_SYMBOL(vaddr_pin_pages);
> +
> +/**
> + * vaddr_unpin_pages_dirty_lock - counterpart to vaddr_pin_pages
> + *
> + * @pages, array of pages returned
> + * @nr_pages, number of pages in pages
> + * @vaddr_pin, same information passed to vaddr_pin_pages
> + * @make_dirty: whether to mark the pages dirty
> + *
> + * The semantics are similar to put_user_pages_dirty_lock but a vaddr_pin 
> used
> + * in vaddr_pin_pages should be passed back into this call for propper

Typo:
  proper

> + * tracking.
> + */
> +void vaddr_unpin_pages_dirty_lock(struct page **pages, unsigned long 
> nr_pages,
> +   struct vaddr_pin *vaddr_pin, bool make_dirty)
> +{
> + __put_user_pages_dirty_lock(vaddr_pin, pages, nr_pages, make_dirty);
> +}
> +EXPORT_SYMBOL(vaddr_unpin_pages_dirty_lock);
> 

OK, whew, I'm glad to see the updated _dirty_lock() API used here. :)

thanks,
-- 
John Hubbard
NVIDIA


Re: [RFC PATCH v2 10/19] mm/gup: Pass a NULL vaddr_pin through GUP fast

2019-08-09 Thread John Hubbard
On 8/9/19 3:58 PM, ira.we...@intel.com wrote:
> From: Ira Weiny 
> 
> Internally GUP fast needs to know that fast users will not support file
> pins.  Pass NULL for vaddr_pin through the fast call stack so that the
> pin code can return an error if it encounters file backed memory within
> the address range.
> 

Reviewed-by: John Hubbard 

thanks,
-- 
John Hubbard
NVIDIA

> Signed-off-by: Ira Weiny 
> ---
>  mm/gup.c | 65 ++--
>  1 file changed, 40 insertions(+), 25 deletions(-)
> 
> diff --git a/mm/gup.c b/mm/gup.c
> index 7a449500f0a6..504af3e9a942 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1813,7 +1813,8 @@ static inline struct page *try_get_compound_head(struct 
> page *page, int refs)
>  
>  #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
>  static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
> -  unsigned int flags, struct page **pages, int *nr)
> +  unsigned int flags, struct page **pages, int *nr,
> +  struct vaddr_pin *vaddr_pin)
>  {
>   struct dev_pagemap *pgmap = NULL;
>   int nr_start = *nr, ret = 0;
> @@ -1894,7 +1895,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, 
> unsigned long end,
>   * useful to have gup_huge_pmd even if we can't operate on ptes.
>   */
>  static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
> -  unsigned int flags, struct page **pages, int *nr)
> +  unsigned int flags, struct page **pages, int *nr,
> +  struct vaddr_pin *vaddr_pin)
>  {
>   return 0;
>  }
> @@ -1903,7 +1905,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, 
> unsigned long end,
>  #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && 
> defined(CONFIG_TRANSPARENT_HUGEPAGE)
>  static int __gup_device_huge(unsigned long pfn, unsigned long addr,
>   unsigned long end, struct page **pages, int *nr,
> - unsigned int flags)
> + unsigned int flags, struct vaddr_pin *vaddr_pin)
>  {
>   int nr_start = *nr;
>   struct dev_pagemap *pgmap = NULL;
> @@ -1938,13 +1940,14 @@ static int __gup_device_huge(unsigned long pfn, 
> unsigned long addr,
>  
>  static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
>   unsigned long end, struct page **pages, int *nr,
> - unsigned int flags)
> + unsigned int flags, struct vaddr_pin *vaddr_pin)
>  {
>   unsigned long fault_pfn;
>   int nr_start = *nr;
>  
>   fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
> - if (!__gup_device_huge(fault_pfn, addr, end, pages, nr, flags))
> + if (!__gup_device_huge(fault_pfn, addr, end, pages, nr, flags,
> +vaddr_pin))
>   return 0;
>  
>   if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
> @@ -1957,13 +1960,14 @@ static int __gup_device_huge_pmd(pmd_t orig, pmd_t 
> *pmdp, unsigned long addr,
>  
>  static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
>   unsigned long end, struct page **pages, int *nr,
> - unsigned int flags)
> + unsigned int flags, struct vaddr_pin *vaddr_pin)
>  {
>   unsigned long fault_pfn;
>   int nr_start = *nr;
>  
>   fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
> - if (!__gup_device_huge(fault_pfn, addr, end, pages, nr, flags))
> + if (!__gup_device_huge(fault_pfn, addr, end, pages, nr, flags,
> +vaddr_pin))
>   return 0;
>  
>   if (unlikely(pud_val(orig) != pud_val(*pudp))) {
> @@ -1975,7 +1979,7 @@ static int __gup_device_huge_pud(pud_t orig, pud_t 
> *pudp, unsigned long addr,
>  #else
>  static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
>   unsigned long end, struct page **pages, int *nr,
> - unsigned int flags)
> + unsigned int flags, struct vaddr_pin *vaddr_pin)
>  {
>   BUILD_BUG();
>   return 0;
> @@ -1983,7 +1987,7 @@ static int __gup_device_huge_pmd(pmd_t orig, pmd_t 
> *pmdp, unsigned long addr,
>  
>  static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
>   unsigned long end, struct page **pages, int *nr,
> - unsigned int flags)
> + unsigned int flags, struct vaddr_pin *vaddr_pin)
>  {
>   BUILD_BUG();
>   return 0;
> @@ -2075,7 +2079,8 @@ static inline int gup_huge_pd(hugepd_t hugepd, unsigned 
> long addr,
>  #endif /* CONFIG_ARCH_HAS_HUGEPD */
>  
>  static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
> - unsigned long end, unsigned int flags, struct page **pages, int 
> *nr)
> + unsigned long end, unsigned int flags, struct page **pages,
> + int *nr, struct vaddr_pin *vaddr_pin)
>  {
>   struct page *head, *page;
>   int refs;
> @@ -2087,7 +2092,7 @@ 

Re: [RFC PATCH v2 09/19] mm/gup: Introduce vaddr_pin structure

2019-08-09 Thread John Hubbard
On 8/9/19 3:58 PM, ira.we...@intel.com wrote:
> From: Ira Weiny 
> 
> Some subsystems need to pass owning file information to GUP calls to
> allow for GUP to associate the "owning file" to any files being pinned
> within the GUP call.
> 
> Introduce an object to specify this information and pass it down through
> some of the GUP call stack.
> 
> Signed-off-by: Ira Weiny 
> ---
>  include/linux/mm.h |  9 +
>  mm/gup.c   | 36 ++--
>  2 files changed, 31 insertions(+), 14 deletions(-)
> 

Looks good, although you may want to combine it with the next patch. 
Otherwise it feels like a "to be continued" when you're reading them.

Either way, though:

Reviewed-by: John Hubbard 


thanks,
-- 
John Hubbard
NVIDIA

> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 04f22722b374..befe150d17be 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -971,6 +971,15 @@ static inline bool is_zone_device_page(const struct page 
> *page)
>  }
>  #endif
>  
> +/**
> + * @f_owner The file who "owns this GUP"
> + * @mm The mm who "owns this GUP"
> + */
> +struct vaddr_pin {
> + struct file *f_owner;
> + struct mm_struct *mm;
> +};
> +
>  #ifdef CONFIG_DEV_PAGEMAP_OPS
>  void __put_devmap_managed_page(struct page *page);
>  DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
> diff --git a/mm/gup.c b/mm/gup.c
> index 0b05e22ac05f..7a449500f0a6 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1005,7 +1005,8 @@ static __always_inline long 
> __get_user_pages_locked(struct task_struct *tsk,
>   struct page **pages,
>   struct vm_area_struct **vmas,
>   int *locked,
> - unsigned int flags)
> + unsigned int flags,
> + struct vaddr_pin *vaddr_pin)
>  {
>   long ret, pages_done;
>   bool lock_dropped;
> @@ -1165,7 +1166,8 @@ long get_user_pages_remote(struct task_struct *tsk, 
> struct mm_struct *mm,
>  
>   return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
>  locked,
> -gup_flags | FOLL_TOUCH | FOLL_REMOTE);
> +gup_flags | FOLL_TOUCH | FOLL_REMOTE,
> +NULL);
>  }
>  EXPORT_SYMBOL(get_user_pages_remote);
>  
> @@ -1320,7 +1322,8 @@ static long __get_user_pages_locked(struct task_struct 
> *tsk,
>   struct mm_struct *mm, unsigned long start,
>   unsigned long nr_pages, struct page **pages,
>   struct vm_area_struct **vmas, int *locked,
> - unsigned int foll_flags)
> + unsigned int foll_flags,
> + struct vaddr_pin *vaddr_pin)
>  {
>   struct vm_area_struct *vma;
>   unsigned long vm_flags;
> @@ -1504,7 +1507,7 @@ static long check_and_migrate_cma_pages(struct 
> task_struct *tsk,
>*/
>   nr_pages = __get_user_pages_locked(tsk, mm, start, nr_pages,
>  pages, vmas, NULL,
> -gup_flags);
> +gup_flags, NULL);
>  
>   if ((nr_pages > 0) && migrate_allow) {
>   drain_allow = true;
> @@ -1537,7 +1540,8 @@ static long __gup_longterm_locked(struct task_struct 
> *tsk,
> unsigned long nr_pages,
> struct page **pages,
> struct vm_area_struct **vmas,
> -   unsigned int gup_flags)
> +   unsigned int gup_flags,
> +   struct vaddr_pin *vaddr_pin)
>  {
>   struct vm_area_struct **vmas_tmp = vmas;
>   unsigned long flags = 0;
> @@ -1558,7 +1562,7 @@ static long __gup_longterm_locked(struct task_struct 
> *tsk,
>   }
>  
>   rc = __get_user_pages_locked(tsk, mm, start, nr_pages, pages,
> -  vmas_tmp, NULL, gup_flags);
> +  vmas_tmp, NULL, gup_flags, vaddr_pin);
>  
>   if (gup_flags & FOLL_LONGTERM) {
>   memalloc_nocma_restore(flags);
> @@ -1588,10 +1592,11 @@ static __always_inline long 
> __gup_longterm_locked(struct task_struct *tsk,
> unsigned long nr_pages,
> struct page **pages,
> struct vm_area_struct **vmas,
> -   unsigned int flags)
> +   unsigned int flags,
> +   struct vaddr_pin *vaddr_pin)
>  {
>   return 

Re: [RESEND PATCH 1/2 -mm] mm: account lazy free pages separately

2019-08-09 Thread Yang Shi




On 8/9/19 11:26 AM, Yang Shi wrote:



On 8/9/19 11:02 AM, Michal Hocko wrote:

On Fri 09-08-19 09:19:13, Yang Shi wrote:


On 8/9/19 1:32 AM, Michal Hocko wrote:

On Fri 09-08-19 07:57:44, Yang Shi wrote:
When doing partial unmap to THP, the pages in the affected range 
would

be considered to be reclaimable when memory pressure comes in.  And,
such pages would be put on deferred split queue and get minus from 
the

memory statistics (i.e. /proc/meminfo).

For example, when doing THP split test, /proc/meminfo would show:

Before put on lazy free list:
MemTotal:   45288336 kB
MemFree:    43281376 kB
MemAvailable:   43254048 kB
...
Active(anon):    1096296 kB
Inactive(anon): 8372 kB
...
AnonPages:   1096264 kB
...
AnonHugePages:   1056768 kB

After put on lazy free list:
MemTotal:   45288336 kB
MemFree:    43282612 kB
MemAvailable:   43255284 kB
...
Active(anon):    1094228 kB
Inactive(anon): 8372 kB
...
AnonPages: 49668 kB
...
AnonHugePages: 10240 kB

The THPs confusingly look disappeared although they are still on 
LRU if

you are not familair the tricks done by kernel.

Is this a fallout of the recent deferred freeing work?
This series follows up the discussion happened when reviewing "Make 
deferred

split shrinker memcg aware".

OK, so it is a pre-existing problem. Thanks!


David Rientjes suggested deferred split THP should be accounted into
available memory since they would be shrunk when memory pressure 
comes in,

just like MADV_FREE pages. For the discussion, please refer to:
https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg2010115.html 


Thanks for the reference.

Accounted the lazy free pages to NR_LAZYFREE, and show them in 
meminfo

and other places.  With the change the /proc/meminfo would look like:
Before put on lazy free list:
The name is really confusing because I have thought of MADV_FREE 
immediately.

Yes, I agree. We may use a more specific name, i.e. DeferredSplitTHP.

+LazyFreePages: Cleanly freeable pages under memory pressure (i.e. 
deferred

+   split THP).

What does that mean actually? I have hard time imagine what cleanly
freeable pages mean.
Like deferred split THP and MADV_FREE pages, they could be reclaimed 
during

memory pressure.

If you just go with "DeferredSplitTHP", these ambiguity would go away.

I have to study the code some more but is there any reason why those
pages are not accounted as proper THPs anymore? Sure they are partially
unmaped but they are still THPs so why cannot we keep them accounted
like that. Having a new counter to reflect that sounds like papering
over the problem to me. But as I've said I might be missing something
important here.


I think we could keep those pages accounted for NR_ANON_THPS since 
they are still THP although they are unmapped as you mentioned if we 
just want to fix the improper accounting.


By double checking what NR_ANON_THPS really means, 
Documentation/filesystems/proc.txt says "Non-file backed huge pages 
mapped into userspace page tables". Then it makes some sense to dec 
NR_ANON_THPS when removing rmap even though they are still THPs.


I don't think we would like to change the definition, if so a new 
counter may make more sense.




Here the new counter is introduced for patch 2/2 to account deferred 
split THPs into available memory since NR_ANON_THPS may contain 
non-deferred split THPs.


I could use an internal counter for deferred split THPs, but if it is 
accounted by mod_node_page_state, why not just show it in 
/proc/meminfo? Or we fix NR_ANON_THPS and show deferred split THPs in 
/proc/meminfo?










Re: [RFC PATCH v2 01/19] fs/locks: Export F_LAYOUT lease to user space

2019-08-09 Thread Dave Chinner
On Fri, Aug 09, 2019 at 03:58:15PM -0700, ira.we...@intel.com wrote:
> From: Ira Weiny 
> 
> In order to support an opt-in policy for users to allow long term pins
> of FS DAX pages we need to export the LAYOUT lease to user space.
> 
> This is the first of 2 new lease flags which must be used to allow a
> long term pin to be made on a file.
> 
> After the complete series:
> 
> 0) Registrations to Device DAX char devs are not affected
> 
> 1) The user has to opt in to allowing page pins on a file with an exclusive
>layout lease.  Both exclusive and layout lease flags are user visible now.
> 
> 2) page pins will fail if the lease is not active when the file back page is
>encountered.
> 
> 3) Any truncate or hole punch operation on a pinned DAX page will fail.
> 
> 4) The user has the option of holding the lease or releasing it.  If they
>release it no other pin calls will work on the file.
> 
> 5) Closing the file is ok.
> 
> 6) Unmapping the file is ok
> 
> 7) Pins against the files are tracked back to an owning file or an owning mm
>depending on the internal subsystem needs.  With RDMA there is an owning
>file which is related to the pined file.
> 
> 8) Only RDMA is currently supported
> 
> 9) Truncation of pages which are not actively pinned nor covered by a lease
>will succeed.

This has nothing to do with layout leases or what they provide
access arbitration over. Layout leases have _nothing_ to do with
page pinning or RDMA - they arbitrate behaviour the file offset ->
physical block device mapping within the filesystem and the
behaviour that will occur when a specific lease is held.

The commit descripting needs to describe what F_LAYOUT actually
protects, when they'll get broken, etc, not how RDMA is going to use
it.

> @@ -2022,8 +2030,26 @@ static int do_fcntl_add_lease(unsigned int fd, struct 
> file *filp, long arg)
>   struct file_lock *fl;
>   struct fasync_struct *new;
>   int error;
> + unsigned int flags = 0;
> +
> + /*
> +  * NOTE on F_LAYOUT lease
> +  *
> +  * LAYOUT lease types are taken on files which the user knows that
> +  * they will be pinning in memory for some indeterminate amount of
> +  * time.

Indeed, layout leases have nothing to do with pinning of memory.
That's something an application taht uses layout leases might do,
but it largely irrelevant to the functionality layout leases
provide. What needs to be done here is explain what the layout lease
API actually guarantees w.r.t. the physical file layout, not what
some application is going to do with a lease. e.g.

The layout lease F_RDLCK guarantees that the holder will be
notified that the physical file layout is about to be
changed, and that it needs to release any resources it has
over the range of this lease, drop the lease and then
request it again to wait for the kernel to finish whatever
it is doing on that range.

The layout lease F_RDLCK also allows the holder to modify
the physical layout of the file. If an operation from the
lease holder occurs that would modify the layout, that lease
holder does not get notification that a change will occur,
but it will block until all other F_RDLCK leases have been
released by their holders before going ahead.

If there is a F_WRLCK lease held on the file, then a F_RDLCK
holder will fail any operation that may modify the physical
layout of the file. F_WRLCK provides exclusive physical
modification access to the holder, guaranteeing nothing else
will change the layout of the file while it holds the lease.

The F_WRLCK holder can change the physical layout of the
file if it so desires, this will block while F_RDLCK holders
are notified and release their leases before the
modification will take place.

We need to define the semantics we expose to userspace first.

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com


Re: [PATCH 0/3] Collapse vimc into single monolithic driver

2019-08-09 Thread André Almeida
Hello Shuah,

Thanks for the patch, I did some comments below.

On 8/9/19 6:45 PM, Shuah Khan wrote:
> vimc uses Component API to split the driver into functional components.
> The real hardware resembles a monolith structure than component and
> component structure added a level of complexity making it hard to
> maintain without adding any real benefit.
> 
> The sensor is one vimc component that would makes sense to be a separate
> module to closely align with the real hardware. It would be easier to
> collapse vimc into single monolithic driver first and then split the
> sensor off as a separate module.
> 
> This patch series emoves the component API and makes minimal changes to
> the code base preserving the functional division of the code structure.
> Preserving the functional structure allows us to split the sensor off
> as a separate module in the future.
> 
> Major design elements in this change are:
> - Use existing struct vimc_ent_config and struct vimc_pipeline_config
>   to drive the initialization of the functional components.
> - Make vimc_ent_config global by moving it to vimc.h
> - Add two new hooks add and rm to initialize and register, unregister
>   and free subdevs.
> - All component API is now gone and bind and unbind hooks are modified
>   to do "add" and "rm" with minimal changes to just add and rm subdevs.
> - vimc-core's bind and unbind are now register and unregister.
> - vimc-core invokes "add" hooks from its vimc_register_devices().
>   The "add" hooks remain the same and register subdevs. They don't
>   create platform devices of their own and use vimc's pdev.dev as
>   their reference device. The "add" hooks save their vimc_ent_device(s)
>   in the corresponding vimc_ent_config.
> - vimc-core invokes "rm" hooks from its unregister to unregister subdevs
>   and cleanup.
> - vimc-core invokes "add" and "rm" hooks with pointer to struct 
> vimc_device
>   and the corresponding struct vimc_ent_config pointer.
> 
> The following configure and stream test works on all devices.
> 
> media-ctl -d platform:vimc -V '"Sensor A":0[fmt:SBGGR8_1X8/640x480]'
> media-ctl -d platform:vimc -V '"Debayer A":0[fmt:SBGGR8_1X8/640x480]'
> media-ctl -d platform:vimc -V '"Sensor B":0[fmt:SBGGR8_1X8/640x480]'
> media-ctl -d platform:vimc -V '"Debayer B":0[fmt:SBGGR8_1X8/640x480]'
> 
> v4l2-ctl -z platform:vimc -d "RGB/YUV Capture" -v width=1920,height=1440
> v4l2-ctl -z platform:vimc -d "Raw Capture 0" -v pixelformat=BA81
> v4l2-ctl -z platform:vimc -d "Raw Capture 1" -v pixelformat=BA81
> 
> v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video1
> v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video2
> v4l2-ctl --stream-mmap --stream-count=100 -d /dev/video3
> 
> The third patch in the series fixes a general protection fault found
> when rmmod is done while stream is active.

I applied your patch on top of media_tree/master and I did some testing.
Not sure if I did something wrong, but just adding and removing the
module generated a kernel panic:

~# modprobe vimc
~# rmmod vimc
[   16.452974] stack segment:  [#1] SMP PTI
[   16.453688] CPU: 0 PID: 2038 Comm: rmmod Not tainted 5.3.0-rc2+ #36
[   16.454678] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.12.0-20181126_142135-anatol 04/01/2014
[   16.456191] RIP: 0010:kfree+0x4d/0x240



[   16.469188] Call Trace:
[   16.469666]  vimc_remove+0x35/0x90 [vimc]
[   16.470436]  platform_drv_remove+0x1f/0x40
[   16.471233]  device_release_driver_internal+0xd3/0x1b0
[   16.472184]  driver_detach+0x37/0x6b
[   16.472882]  bus_remove_driver+0x50/0xc1
[   16.473569]  vimc_exit+0xc/0xca0 [vimc]
[   16.474231]  __x64_sys_delete_module+0x18d/0x240
[   16.475036]  do_syscall_64+0x43/0x110
[   16.475656]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   16.476504] RIP: 0033:0x7fceb8dafa4b



[   16.484853] Modules linked in: vimc(-) videobuf2_vmalloc
videobuf2_memops v4l2_tpg videobuf2_v4l2 videobuf2_common
[   16.486187] ---[ end trace 91e5e0894e254d49 ]---
[   16.486758] RIP: 0010:kfree+0x4d/0x240



fish: “rmmod vimc” terminated by signal SIGSEGV (Address boundary error)

I just added the module after booting, no other action was made. Here is
how my `git log --oneline` looks like:

897d708e922b media: vimc: Fix gpf in rmmod path when stream is active
2e4a5ad8ad6d media: vimc: Collapse component structure into a single
monolithic driver
7c8da1687e92 media: vimc: move private defines to a common header
97299a303532 media: Remove dev_err() usage after platform_get_irq()
25a3d6bac6b9 media: adv7511/cobalt: rename driver name to adv7511-v4l2
...

> 
> vimc_print_dot (--print-dot) topology after this change:
> digraph board {
>   rankdir=TB
>   n0001 [label="{{} | Sensor A\n/dev/v4l-subdev0 | { 0}}", 
> shape=Mrecord, style=filled, fillcolor=green]
>   n0001:port0 -> n0005:port0 [style=bold]
>   

Re: [RFC PATCH v2 07/19] fs/xfs: Teach xfs to use new dax_layout_busy_page()

2019-08-09 Thread Dave Chinner
On Fri, Aug 09, 2019 at 03:58:21PM -0700, ira.we...@intel.com wrote:
> From: Ira Weiny 
> 
> dax_layout_busy_page() can now operate on a sub-range of the
> address_space provided.
> 
> Have xfs specify the sub range to dax_layout_busy_page()

Hmmm. I've got patches that change all these XFS interfaces to
support range locks. I'm not sure the way the ranges are passed here
is the best way to do it, and I suspect they aren't correct in some
cases, either

> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index ff3c1fae5357..f0de5486f6c1 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -1042,10 +1042,16 @@ xfs_vn_setattr(
>   xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
>   iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
>  
> - error = xfs_break_layouts(inode, , BREAK_UNMAP);
> - if (error) {
> - xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
> - return error;
> + if (iattr->ia_size < inode->i_size) {
> + loff_t  off = iattr->ia_size;
> + loff_t  len = inode->i_size - 
> iattr->ia_size;
> +
> + error = xfs_break_layouts(inode, , off, len,
> +   BREAK_UNMAP);
> + if (error) {
> + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
> + return error;
> + }

This isn't right - truncate up still needs to break the layout on
the last filesystem block of the file, and truncate down needs to
extend to "maximum file offset" because we remove all extents beyond
EOF on a truncate down.

i.e. when we use preallocation, the extent map extends beyond EOF,
and layout leases need to be able to extend beyond the current EOF
to allow the lease owner to do extending writes, extending truncate,
preallocation beyond EOF, etc safely without having to get a new
lease to cover the new region in the extended file...

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com


[PATCH v3] perf diff: Report noisy for cycles diff

2019-08-09 Thread Jin Yao
This patch prints the stddev and hist for the cycles diff of
program block. It can help us to understand if the cycles diff
is noisy or not.

This patch is inspired by Andi Kleen's patch
https://lwn.net/Articles/600471/

We create new option '-n or --noisy'.

Example:

perf record -b ./div
perf record -b ./div
perf diff -c cycles

 # Baseline   [Program Block Range] Cycles 
Diff  Shared Object  Symbol
 #   
..  
.  
 #
 46.72% [div.c:40 -> div.c:40]  
  0  div[.] main
 46.72% [div.c:42 -> div.c:44]  
  0  div[.] main
 46.72% [div.c:42 -> div.c:39]  
  0  div[.] main
 20.54% [random_r.c:357 -> random_r.c:394]  
  1  libc-2.27.so   [.] __random_r
 20.54% [random_r.c:357 -> random_r.c:380]  
  0  libc-2.27.so   [.] __random_r
 20.54% [random_r.c:388 -> random_r.c:388]  
  0  libc-2.27.so   [.] __random_r
 20.54% [random_r.c:388 -> random_r.c:391]  
  0  libc-2.27.so   [.] __random_r
 17.04% [random.c:288 -> random.c:291]  
  0  libc-2.27.so   [.] __random
 17.04% [random.c:291 -> random.c:291]  
  0  libc-2.27.so   [.] __random
 17.04% [random.c:293 -> random.c:293]  
  0  libc-2.27.so   [.] __random
 17.04% [random.c:295 -> random.c:295]  
  0  libc-2.27.so   [.] __random
 17.04% [random.c:295 -> random.c:295]  
  0  libc-2.27.so   [.] __random
 17.04% [random.c:298 -> random.c:298]  
  0  libc-2.27.so   [.] __random
  8.40% [div.c:22 -> div.c:25]  
  0  div[.] compute_flag
  8.40% [div.c:27 -> div.c:28]  
  0  div[.] compute_flag
  5.14%   [rand.c:26 -> rand.c:27]  
  0  libc-2.27.so   [.] rand
  5.14%   [rand.c:28 -> rand.c:28]  
  0  libc-2.27.so   [.] rand
  2.15% [rand@plt+0 -> rand@plt+0]  
  0  div[.] rand@plt
  0.00% 
 [kernel.kallsyms]  [k] __x86_indirect_thunk_rax
  0.00%   [do_mmap+714 -> do_mmap+732]  
-10  [kernel.kallsyms]  [k] do_mmap
  0.00%   [do_mmap+737 -> do_mmap+765]  
  1  [kernel.kallsyms]  [k] do_mmap
  0.00%   [do_mmap+262 -> do_mmap+299]  
  0  [kernel.kallsyms]  [k] do_mmap
  0.00% [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0]  
  7  [kernel.kallsyms]  [k] __x86_indirect_thunk_r15
  0.00%   [native_sched_clock+0 -> native_sched_clock+119]  
 -1  [kernel.kallsyms]  [k] native_sched_clock
  0.00%[native_write_msr+0 -> native_write_msr+16]  
-13  [kernel.kallsyms]  [k] native_write_msr

When we enable the option '-n' or '--noisy', the output is

perf diff -c cycles -n or perf diff -c cycles --noisy

 # Baseline   [Program Block Range] Cycles 
Diffstddev/Hist  Shared Object  Symbol
 #   
..  
.  .  
 #
 46.72% [div.c:40 -> div.c:40]  
  0  ± 37.8% ▁█▁▁██▁█   div[.] main
 46.72% [div.c:42 -> div.c:44]  
  0  ± 49.4% ▁▁▂█   div[.] main
 46.72% [div.c:42 -> div.c:39]  
  0  ± 24.1% ▃█▂▄▁▃▂▁   div[.] main
 20.54% [random_r.c:357 -> random_r.c:394]  
  1  ± 33.5% ▅▂▁█▃▁▂▁   libc-2.27.so   [.] __random_r
 20.54% [random_r.c:357 -> random_r.c:380]  
  0  ± 39.4% ▁▁█▁██▅▁   libc-2.27.so   [.] __random_r
 20.54% [random_r.c:388 -> random_r.c:388]  
  0 libc-2.27.so   [.] __random_r
 20.54% [random_r.c:388 -> random_r.c:391]  
  0  ± 41.2% ▁▃▁▂█▄▃▁   libc-2.27.so   [.] __random_r
 17.04% [random.c:288 

Re: [RFC PATCH v2 08/19] fs/xfs: Fail truncate if page lease can't be broken

2019-08-09 Thread Dave Chinner
On Fri, Aug 09, 2019 at 03:58:22PM -0700, ira.we...@intel.com wrote:
> From: Ira Weiny 
> 
> If pages are under a lease fail the truncate operation.  We change the order 
> of
> lease breaks to directly fail the operation if the lease exists.
> 
> Select EXPORT_BLOCK_OPS for FS_DAX to ensure that xfs_break_lease_layouts() is
> defined for FS_DAX as well as pNFS.
> 
> Signed-off-by: Ira Weiny 
> ---
>  fs/Kconfig| 1 +
>  fs/xfs/xfs_file.c | 5 +++--
>  2 files changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/Kconfig b/fs/Kconfig
> index 14cd4abdc143..c10b91f92528 100644
> --- a/fs/Kconfig
> +++ b/fs/Kconfig
> @@ -48,6 +48,7 @@ config FS_DAX
>   select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
>   select FS_IOMAP
>   select DAX
> + select EXPORTFS_BLOCK_OPS
>   help
> Direct Access (DAX) can be used on memory-backed block devices.
> If the block device supports DAX and the filesystem supports DAX,

That looks wrong. If you require xfs_break_lease_layouts() outside
of pnfs context, then move the function in the XFS code base to a
file that is built in. It's only external dependency is on the
break_layout() function, and XFS already has other unconditional
direct calls to break_layout()...

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com


Re: checkpatch.pl should suggest __section

2019-08-09 Thread Joe Perches
On Fri, 2019-08-09 at 16:04 -0700, Nick Desaulniers wrote:
> > how about:
> > ---
> >  scripts/checkpatch.pl | 9 +
> >  1 file changed, 9 insertions(+)
> > 
> > diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> > index 1cdacb4fd207..8e6693ca772c 100755
> > --- a/scripts/checkpatch.pl
> > +++ b/scripts/checkpatch.pl
> > @@ -5901,6 +5901,15 @@ sub process {
> >  "__aligned(size) is preferred over 
> > __attribute__((aligned(size)))\n" . $herecurr);
> > }
> > 
> > +# Check for __attribute__ section, prefer __section (without quotes)
> > +   if ($realfile !~ m@\binclude/uapi/@ &&
> > +   $line =~ 
> > /\b__attribute__\s*\(\s*\(.*_*section_*\s*\(\s*("[^"]*")/) {
> > +   my $old = substr($rawline, $-[1], $+[1] - $-[1]);
> > +   my $new = substr($old, 1, -1);
> > +   WARN("PREFER_SECTION",
> > +"__section($new) is preferred over 
> > __attribute__((section($old)))\n" . $herecurr);
> > +   }
> > +
> 
> I can't read Perl, but this looks pretty good.
> Acked-by: Nick Desaulniers 

I'll add a Suggested-by: for you.

But a Tested-by would be more valuable than an Acked-by if you
don't actually know how it works.




Re: [PATCH 1/2] genirq/affinity: improve __irq_build_affinity_masks()

2019-08-09 Thread Ming Lei
On Fri, Aug 9, 2019 at 10:44 PM Keith Busch  wrote:
>
> On Fri, Aug 09, 2019 at 06:23:09PM +0800, Ming Lei wrote:
> > One invariant of __irq_build_affinity_masks() is that all CPUs in the
> > specified masks( cpu_mask AND node_to_cpumask for each node) should be
> > covered during the spread. Even though all requested vectors have been
> > reached, we still need to spread vectors among left CPUs. The similar
> > policy has been taken in case of 'numvecs <= nodes'.
> >
> > So remove the following check inside the loop:
> >
> >   if (done >= numvecs)
> >   break;
> >
> > Meantime assign at least 1 vector for left nodes if 'numvecs' vectors
> > have been spread.
> >
> > Also, if the specified cpumask for one numa node is empty, simply not
> > spread vectors on this node.
> >
> > Cc: Christoph Hellwig 
> > Cc: Keith Busch 
> > Cc: linux-n...@lists.infradead.org,
> > Cc: Jon Derrick 
> > Signed-off-by: Ming Lei 
> > ---
> >  kernel/irq/affinity.c | 33 +
> >  1 file changed, 21 insertions(+), 12 deletions(-)
> >
> > diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
> > index 6fef48033f96..bc3652a2c61b 100644
> > --- a/kernel/irq/affinity.c
> > +++ b/kernel/irq/affinity.c
> > @@ -129,21 +129,32 @@ static int __irq_build_affinity_masks(unsigned int 
> > startvec,
> >   for_each_node_mask(n, nodemsk) {
> >   unsigned int ncpus, v, vecs_to_assign, vecs_per_node;
> >
> > - /* Spread the vectors per node */
> > - vecs_per_node = (numvecs - (curvec - firstvec)) / nodes;
> > -
> >   /* Get the cpus on this node which are in the mask */
> >   cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
> > -
> > - /* Calculate the number of cpus per vector */
> >   ncpus = cpumask_weight(nmsk);
> > + if (!ncpus)
> > + continue;
>
> This shouldn't be possible, right? The nodemsk we're looping  wouldn't
> have had that node set if no CPUs intersect the node_to_cpu_mask for
> that node, so the resulting cpumask should always have a non-zero weight.

 cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);

It is often true, see the following cases:

1) all CPUs in one node are not present

OR

2) all CPUs in one node are present

>
> > @@ -153,16 +164,14 @@ static int __irq_build_affinity_masks(unsigned int 
> > startvec,
> >   }
> >   irq_spread_init_one([curvec].mask, nmsk,
> >   cpus_per_vec);
> > + if (++curvec >= last_affv)
> > + curvec = firstvec;
>
> I'm not so sure about wrapping the vector to share it across nodes. We

The wrapping is always there, not added by this patch.

Most time it won't happen since we spread vectors on remaining
(un-assigned)nodes.  And it only happens when there is remaining
nodes not spread. We have to make sure all nodes are spread.

And the similar policy is applied on the branch of 'numvecs <= nodes' too.

> have enough vectors in this path to ensure each compute node can have
> a unique one, and it's much cheaper to share these within nodes than
> across them.

The patch just moves the wrapping from loop outside into the loop, then
all 'extra_vecs' can be covered because it is always < 'vecs_to_assign'.

What matters is that the following check is removed:

   if (done >= numvecs)
break;

then all nodes can be covered.

Thanks,
Ming Lei


[rcu:dev.2019.08.01b 66/72] ERROR: "rcu_momentary_dyntick_idle" [kernel/rcu/rcutorture.ko] undefined!

2019-08-09 Thread kbuild test robot
tree:   
https://kernel.googlesource.com/pub/scm/linux/kernel/git/paulmck/linux-rcu.git 
dev.2019.08.01b
head:   6c92be8b1b81158f48ab0cb00d34d451dae1fa3c
commit: 5f4264e33ca4e7cee035cee5bfa62f6d1bbf2cda [66/72] rcutorture: Emulate 
dyntick aspect of userspace nohz_full sojourn
config: x86_64-randconfig-g001-201931 (attached as .config)
compiler: gcc-7 (Debian 7.4.0-10) 7.4.0
reproduce:
git checkout 5f4264e33ca4e7cee035cee5bfa62f6d1bbf2cda
# save the attached .config to linux build tree
make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>):

   ERROR: "tick_nohz_dep_clear_task" [kernel/rcu/rcutorture.ko] undefined!
   ERROR: "tick_nohz_dep_set_task" [kernel/rcu/rcutorture.ko] undefined!
   ERROR: "tick_nohz_full_running" [kernel/rcu/rcutorture.ko] undefined!
>> ERROR: "rcu_momentary_dyntick_idle" [kernel/rcu/rcutorture.ko] undefined!

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: checkpatch.pl should suggest __section

2019-08-09 Thread Nick Desaulniers
On Fri, Aug 9, 2019 at 3:58 PM Joe Perches  wrote:
>
> On Fri, 2019-08-09 at 15:21 -0700, Nick Desaulniers wrote:
> > Hi Joe,
> > While debugging:
> > https://github.com/ClangBuiltLinux/linux/issues/619
> > we found a bunch of places where __section is not used but could be,
> > and uses a string literal when it probably should not be.
> >
> > Just a thought that maybe checkpatch.pl could warn if
> > `__attribute__((section` appeared in the added diff, and suggest
> > __section? Then further warn to not use `""` for the section name?
>
> Hmm, that makes me wonder about the existing __section uses
> _with_ a quote are actually in the proper sections.
>
> $ git grep -n -P '\b__section\s*\(\s*"'
> arch/arm64/kernel/smp_spin_table.c:22:volatile unsigned long 
> __section(".mmuoff.data.read")
> arch/s390/boot/startup.c:49:static struct diag210 _diag210_tmp_dma 
> __section(".dma.data");
> include/linux/compiler.h:27:
> __section("_ftrace_annotated_branch")   \
> include/linux/compiler.h:63:__section("_ftrace_branch")   
>   \
> include/linux/compiler.h:121:#define __annotate_jump_table 
> __section(".rodata..c_jump_table")
> include/linux/compiler.h:158:   __section("___kentry" "+" #sym )  
>   \
> include/linux/compiler.h:301:   static void * 
> __section(".discard.addressable") __used \
> include/linux/export.h:107: static int __ksym_marker_##sym[0] 
> __section(".discard.ksym") __used
> include/linux/srcutree.h:127:   __section("___srcu_struct_ptrs") = 
> 

I'm going through and fixing all of these now.  Thinking about sending
one treewide fix to akpm.

>
> Maybe there should also be a __section("") test too.

I think so.  Some of the trickier ones are ones that use the
stringification C preprocessor operator.  I need to think more about
these.

>
> Anyway, how about:
> ---
>  scripts/checkpatch.pl | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> index 1cdacb4fd207..8e6693ca772c 100755
> --- a/scripts/checkpatch.pl
> +++ b/scripts/checkpatch.pl
> @@ -5901,6 +5901,15 @@ sub process {
>  "__aligned(size) is preferred over 
> __attribute__((aligned(size)))\n" . $herecurr);
> }
>
> +# Check for __attribute__ section, prefer __section (without quotes)
> +   if ($realfile !~ m@\binclude/uapi/@ &&
> +   $line =~ 
> /\b__attribute__\s*\(\s*\(.*_*section_*\s*\(\s*("[^"]*")/) {
> +   my $old = substr($rawline, $-[1], $+[1] - $-[1]);
> +   my $new = substr($old, 1, -1);
> +   WARN("PREFER_SECTION",
> +"__section($new) is preferred over 
> __attribute__((section($old)))\n" . $herecurr);
> +   }
> +

I can't read Perl, but this looks pretty good.
Acked-by: Nick Desaulniers 
-- 
Thanks,
~Nick Desaulniers


[RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ;-)

2019-08-09 Thread ira . weiny
From: Ira Weiny 

Pre-requisites
==
Based on mmotm tree.

Based on the feedback from LSFmm, the LWN article, the RFC series since
then, and a ton of scenarios I've worked in my mind and/or tested...[1]

Solution summary


The real issue is that there is no use case for a user to have RDMA pinn'ed
memory which is then truncated.  So really any solution we present which:

A) Prevents file system corruption or data leaks
...and...
B) Informs the user that they did something wrong

Should be an acceptable solution.

Because this is slightly new behavior.  And because this is going to be
specific to DAX (because of the lack of a page cache) we have made the user
"opt in" to this behavior.

The following patches implement the following solution.

0) Registrations to Device DAX char devs are not affected

1) The user has to opt in to allowing page pins on a file with an exclusive
   layout lease.  Both exclusive and layout lease flags are user visible now.

2) page pins will fail if the lease is not active when the file back page is
   encountered.

3) Any truncate or hole punch operation on a pinned DAX page will fail.

4) The user has the option of holding the lease or releasing it.  If they
   release it no other pin calls will work on the file.

5) Closing the file is ok.

6) Unmapping the file is ok

7) Pins against the files are tracked back to an owning file or an owning mm
   depending on the internal subsystem needs.  With RDMA there is an owning
   file which is related to the pined file.

8) Only RDMA is currently supported

9) Truncation of pages which are not actively pinned nor covered by a lease
   will succeed.


Reporting of pinned files in procfs
===

A number of alternatives were explored for how to report the file pins within
procfs.  The following incorporates ideas from Jan Kara, Jason Gunthorpe, Dave
Chinner, Dan Williams and myself.

A new entry is added to procfs

/proc//file_pins

For processes which have pinned DAX file memory file_pins reference come in 2
flavors.  Those which are attached to another open file descriptor (For example
what is done in the RDMA subsytem) and those which are attached to a process
mm.

For those which are attached to another open file descriptor (such as RDMA)
the file pin references go through the 'struct file' associated with that pin.
In RDMA this is the RDMA context struct file.

The resulting output from proc fs is something like.

$ cat /proc//file_pins
3: /dev/infiniband/uverbs0
/mnt/pmem/foo

Where '3' is the file descriptor (and file path) of the rdma context within the
process.  The paths of the files pinned using that context are then listed.

RDMA contexts may have multiple MR each of which may have multiple files pinned
within them.  So an output like the following is possible.

$ cat /proc//file_pins
4: /dev/infiniband/uverbs0
/mnt/pmem/foo
/mnt/pmem/bar
/mnt/pmem/another
/mnt/pmem/one

The actual memory regions associated with the file pins are not reported.

For processes which are pinning memory which is not associated with a specific
file descriptor memory pins are reported directly as paths to the file.

$ cat /proc//file_pins
/mnt/pmem/foo

Putting the above together if a process was using RDMA and another subsystem
the output could be something like:


$ cat /proc//file_pins
4: /dev/infiniband/uverbs0
/mnt/pmem/foo
/mnt/pmem/bar
/mnt/pmem/another
/mnt/pmem/one
/mnt/pmem/foo
/mnt/pmem/another
/mnt/pmem/mm_mapped_file


[1] https://lkml.org/lkml/2019/6/5/1046


Background
==

It should be noted that one solution for this problem is to use RDMA's On
Demand Paging (ODP).  There are 2 big reasons this may not work.

1) The hardware being used for RDMA may not support ODP
2) ODP may be detrimental to the over all network (cluster or cloud)
   performance

Therefore, in order to support RDMA to File system pages without On Demand
Paging (ODP) a number of things need to be done.

1) "longterm" GUP users need to inform other subsystems that they have taken a
   pin on a page which may remain pinned for a very "long time".  The
   definition of long time is debatable but it has been established that RDMAs
   use of pages for, minutes, hours, or even days after the pin is the extreme
   case which makes this problem most severe.

2) Any page which is "controlled" by a file system needs to have special
   handling.  The details of the handling depends on if the page is page cache
   fronted or not.

   2a) A page cache fronted page which has been pinned by GUP long term can use 
a
   bounce buffer to allow the file system to write back snap shots of the page.
   This is handled by the FS recognizing the GUP long term pin and making a copy
   of the page to be written back.
NOTE: this patch set does not address this path.

   2b) A FS "controlled" page which is not page 

[RFC PATCH v2 05/19] fs/ext4: Teach ext4 to break layout leases

2019-08-09 Thread ira . weiny
From: Ira Weiny 

ext4 must attempt to break a layout lease if it is held to know if the
layout can be modified.

Split out the logic to determine if a mapping is DAX, export it, and then
break layout leases if a mapping is DAX.

Signed-off-by: Ira Weiny 

---
Changes from RFC v1:

Based on feedback from Dave Chinner, add support to fail all
other layout breaks when a lease is held.

 fs/dax.c| 23 ---
 fs/ext4/inode.c |  7 +++
 include/linux/dax.h |  6 ++
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index b64964ef44f6..a14ec32255d8 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -557,6 +557,21 @@ static void *grab_mapping_entry(struct xa_state *xas,
return xa_mk_internal(VM_FAULT_FALLBACK);
 }
 
+bool dax_mapping_is_dax(struct address_space *mapping)
+{
+   /*
+* In the 'limited' case get_user_pages() for dax is disabled.
+*/
+   if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+   return false;
+
+   if (!dax_mapping(mapping) || !mapping_mapped(mapping))
+   return false;
+
+   return true;
+}
+EXPORT_SYMBOL_GPL(dax_mapping_is_dax);
+
 /**
  * dax_layout_busy_page - find first pinned page in @mapping
  * @mapping: address space to scan for a page with ref count > 1
@@ -579,13 +594,7 @@ struct page *dax_layout_busy_page(struct address_space 
*mapping)
unsigned int scanned = 0;
struct page *page = NULL;
 
-   /*
-* In the 'limited' case get_user_pages() for dax is disabled.
-*/
-   if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
-   return NULL;
-
-   if (!dax_mapping(mapping) || !mapping_mapped(mapping))
+   if (!dax_mapping_is_dax(mapping))
return NULL;
 
/*
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b2c8d09acf65..f08f48de52c5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4271,6 +4271,13 @@ int ext4_break_layouts(struct inode *inode)
if (WARN_ON_ONCE(!rwsem_is_locked(>i_mmap_sem)))
return -EINVAL;
 
+   /* Break layout leases if active */
+   if (dax_mapping_is_dax(inode->i_mapping)) {
+   error = break_layout(inode, true);
+   if (error)
+   return error;
+   }
+
do {
page = dax_layout_busy_page(inode->i_mapping);
if (!page)
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9bd8528bd305..da0768b34b48 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -143,6 +143,7 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device 
*bdev);
 int dax_writeback_mapping_range(struct address_space *mapping,
struct block_device *bdev, struct writeback_control *wbc);
 
+bool dax_mapping_is_dax(struct address_space *mapping);
 struct page *dax_layout_busy_page(struct address_space *mapping);
 dax_entry_t dax_lock_page(struct page *page);
 void dax_unlock_page(struct page *page, dax_entry_t cookie);
@@ -174,6 +175,11 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct 
block_device *bdev)
return NULL;
 }
 
+static inline bool dax_mapping_is_dax(struct address_space *mapping)
+{
+   return false;
+}
+
 static inline struct page *dax_layout_busy_page(struct address_space *mapping)
 {
return NULL;
-- 
2.20.1



[RFC PATCH v2 03/19] mm/gup: Pass flags down to __gup_device_huge* calls

2019-08-09 Thread ira . weiny
From: Ira Weiny 

In order to support checking for a layout lease on a FS DAX inode these
calls need to know if FOLL_LONGTERM was specified.

Signed-off-by: Ira Weiny 
---
 mm/gup.c | 26 +-
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index b6a293bf1267..80423779a50a 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1881,7 +1881,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, 
unsigned long end,
 
 #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
 static int __gup_device_huge(unsigned long pfn, unsigned long addr,
-   unsigned long end, struct page **pages, int *nr)
+   unsigned long end, struct page **pages, int *nr,
+   unsigned int flags)
 {
int nr_start = *nr;
struct dev_pagemap *pgmap = NULL;
@@ -1907,30 +1908,33 @@ static int __gup_device_huge(unsigned long pfn, 
unsigned long addr,
 }
 
 static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
-   unsigned long end, struct page **pages, int *nr)
+   unsigned long end, struct page **pages, int *nr,
+   unsigned int flags)
 {
unsigned long fault_pfn;
int nr_start = *nr;
 
fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
-   if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
+   if (!__gup_device_huge(fault_pfn, addr, end, pages, nr, flags))
return 0;
 
if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
undo_dev_pagemap(nr, nr_start, pages);
return 0;
}
+
return 1;
 }
 
 static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
-   unsigned long end, struct page **pages, int *nr)
+   unsigned long end, struct page **pages, int *nr,
+   unsigned int flags)
 {
unsigned long fault_pfn;
int nr_start = *nr;
 
fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
-   if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
+   if (!__gup_device_huge(fault_pfn, addr, end, pages, nr, flags))
return 0;
 
if (unlikely(pud_val(orig) != pud_val(*pudp))) {
@@ -1941,14 +1945,16 @@ static int __gup_device_huge_pud(pud_t orig, pud_t 
*pudp, unsigned long addr,
 }
 #else
 static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
-   unsigned long end, struct page **pages, int *nr)
+   unsigned long end, struct page **pages, int *nr,
+   unsigned int flags)
 {
BUILD_BUG();
return 0;
 }
 
 static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
-   unsigned long end, struct page **pages, int *nr)
+   unsigned long end, struct page **pages, int *nr,
+   unsigned int flags)
 {
BUILD_BUG();
return 0;
@@ -2051,7 +2057,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned 
long addr,
if (pmd_devmap(orig)) {
if (unlikely(flags & FOLL_LONGTERM))
return 0;
-   return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr);
+   return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr,
+flags);
}
 
refs = 0;
@@ -2092,7 +2099,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned 
long addr,
if (pud_devmap(orig)) {
if (unlikely(flags & FOLL_LONGTERM))
return 0;
-   return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr);
+   return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr,
+flags);
}
 
refs = 0;
-- 
2.20.1



[RFC PATCH v2 02/19] fs/locks: Add Exclusive flag to user Layout lease

2019-08-09 Thread ira . weiny
From: Ira Weiny 

Add an exclusive lease flag which indicates that the layout mechanism
can not be broken.

Exclusive layout leases allow the file system to know that pages may be
GUP pined and that attempts to change the layout, ie truncate, should be
failed.

A process which attempts to break it's own exclusive lease gets an
EDEADLOCK return to help determine that this is likely a programming bug
vs someone else holding a resource.

Signed-off-by: Ira Weiny 
---
 fs/locks.c   | 23 +--
 include/linux/fs.h   |  1 +
 include/uapi/asm-generic/fcntl.h |  2 ++
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index ad17c6ffca06..0c7359cdab92 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -626,6 +626,8 @@ static int lease_init(struct file *filp, long type, 
unsigned int flags,
fl->fl_flags = FL_LEASE;
if (flags & FL_LAYOUT)
fl->fl_flags |= FL_LAYOUT;
+   if (flags & FL_EXCLUSIVE)
+   fl->fl_flags |= FL_EXCLUSIVE;
fl->fl_start = 0;
fl->fl_end = OFFSET_MAX;
fl->fl_ops = NULL;
@@ -1619,6 +1621,14 @@ int __break_lease(struct inode *inode, unsigned int 
mode, unsigned int type)
list_for_each_entry_safe(fl, tmp, >flc_lease, fl_list) {
if (!leases_conflict(fl, new_fl))
continue;
+   if (fl->fl_flags & FL_EXCLUSIVE) {
+   error = -ETXTBSY;
+   if (new_fl->fl_pid == fl->fl_pid) {
+   error = -EDEADLOCK;
+   goto out;
+   }
+   continue;
+   }
if (want_write) {
if (fl->fl_flags & FL_UNLOCK_PENDING)
continue;
@@ -1634,6 +1644,13 @@ int __break_lease(struct inode *inode, unsigned int 
mode, unsigned int type)
locks_delete_lock_ctx(fl, );
}
 
+   /* We differentiate between -EDEADLOCK and -ETXTBSY so the above loop
+* continues with -ETXTBSY looking for a potential deadlock instead.
+* If deadlock is not found go ahead and return -ETXTBSY.
+*/
+   if (error == -ETXTBSY)
+   goto out;
+
if (list_empty(>flc_lease))
goto out;
 
@@ -2044,9 +2061,11 @@ static int do_fcntl_add_lease(unsigned int fd, struct 
file *filp, long arg)
 * to revoke the lease in break_layout()  And this is done by using
 * F_WRLCK in the break code.
 */
-   if (arg == F_LAYOUT) {
+   if ((arg & F_LAYOUT) == F_LAYOUT) {
+   if ((arg & F_EXCLUSIVE) == F_EXCLUSIVE)
+   flags |= FL_EXCLUSIVE;
arg = F_RDLCK;
-   flags = FL_LAYOUT;
+   flags |= FL_LAYOUT;
}
 
fl = lease_alloc(filp, arg, flags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd60d5be9886..2e41ce547913 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1005,6 +1005,7 @@ static inline struct file *get_file(struct file *f)
 #define FL_UNLOCK_PENDING  512 /* Lease is being broken */
 #define FL_OFDLCK  1024/* lock is "owned" by struct file */
 #define FL_LAYOUT  2048/* outstanding pNFS layout or user held pin */
+#define FL_EXCLUSIVE   4096/* Layout lease is exclusive */
 
 #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
 
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index baddd54f3031..88b175ceccbc 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -176,6 +176,8 @@ struct f_owner_ex {
 
 #define F_LAYOUT   16  /* layout lease to allow longterm pins such as
   RDMA */
+#define F_EXCLUSIVE32  /* layout lease is exclusive */
+   /* FIXME or shoudl this be F_EXLCK??? */
 
 /* operations for bsd flock(), also used by the kernel implementation */
 #define LOCK_SH1   /* shared lock */
-- 
2.20.1



Re: checkpatch.pl should suggest __section

2019-08-09 Thread Joe Perches
On Fri, 2019-08-09 at 15:21 -0700, Nick Desaulniers wrote:
> Hi Joe,
> While debugging:
> https://github.com/ClangBuiltLinux/linux/issues/619
> we found a bunch of places where __section is not used but could be,
> and uses a string literal when it probably should not be.
> 
> Just a thought that maybe checkpatch.pl could warn if
> `__attribute__((section` appeared in the added diff, and suggest
> __section? Then further warn to not use `""` for the section name?

Hmm, that makes me wonder about the existing __section uses
_with_ a quote are actually in the proper sections.

$ git grep -n -P '\b__section\s*\(\s*"'
arch/arm64/kernel/smp_spin_table.c:22:volatile unsigned long 
__section(".mmuoff.data.read")
arch/s390/boot/startup.c:49:static struct diag210 _diag210_tmp_dma 
__section(".dma.data");
include/linux/compiler.h:27:
__section("_ftrace_annotated_branch")   \
include/linux/compiler.h:63:__section("_ftrace_branch") 
\
include/linux/compiler.h:121:#define __annotate_jump_table 
__section(".rodata..c_jump_table")
include/linux/compiler.h:158:   __section("___kentry" "+" #sym )
\
include/linux/compiler.h:301:   static void * __section(".discard.addressable") 
__used \
include/linux/export.h:107: static int __ksym_marker_##sym[0] 
__section(".discard.ksym") __used
include/linux/srcutree.h:127:   __section("___srcu_struct_ptrs") = 

Maybe there should also be a __section("") test too.

Anyway, how about:
---
 scripts/checkpatch.pl | 9 +
 1 file changed, 9 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 1cdacb4fd207..8e6693ca772c 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -5901,6 +5901,15 @@ sub process {
 "__aligned(size) is preferred over 
__attribute__((aligned(size)))\n" . $herecurr);
}
 
+# Check for __attribute__ section, prefer __section (without quotes)
+   if ($realfile !~ m@\binclude/uapi/@ &&
+   $line =~ 
/\b__attribute__\s*\(\s*\(.*_*section_*\s*\(\s*("[^"]*")/) {
+   my $old = substr($rawline, $-[1], $+[1] - $-[1]);
+   my $new = substr($old, 1, -1);
+   WARN("PREFER_SECTION",
+"__section($new) is preferred over 
__attribute__((section($old)))\n" . $herecurr);
+   }
+
 # Check for __attribute__ format(printf, prefer __printf
if ($realfile !~ m@\binclude/uapi/@ &&
$line =~ 
/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf/) {





[RFC PATCH v2 06/19] fs/ext4: Teach dax_layout_busy_page() to operate on a sub-range

2019-08-09 Thread ira . weiny
From: Ira Weiny 

Callers of dax_layout_busy_page() are only rarely operating on the
entire file of concern.

Teach dax_layout_busy_page() to operate on a sub-range of the
address_space provided.  Specifying 0 - ULONG_MAX however, will continue
to operate on the "entire file" and XFS is split out to a separate patch
by this method.

This could potentially speed up dax_layout_busy_page() as well.

Signed-off-by: Ira Weiny 

---
Changes from RFC v1
Fix 0-day build errors

 fs/dax.c| 15 +++
 fs/ext4/ext4.h  |  2 +-
 fs/ext4/extents.c   |  6 +++---
 fs/ext4/inode.c | 19 ---
 fs/xfs/xfs_file.c   |  3 ++-
 include/linux/dax.h |  6 --
 6 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index a14ec32255d8..3ad19c384454 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -573,8 +573,11 @@ bool dax_mapping_is_dax(struct address_space *mapping)
 EXPORT_SYMBOL_GPL(dax_mapping_is_dax);
 
 /**
- * dax_layout_busy_page - find first pinned page in @mapping
+ * dax_layout_busy_page - find first pinned page in @mapping within
+ *the range @off - @off + @len
  * @mapping: address space to scan for a page with ref count > 1
+ * @off: offset to start at
+ * @len: length to scan through
  *
  * DAX requires ZONE_DEVICE mapped pages. These pages are never
  * 'onlined' to the page allocator so they are considered idle when
@@ -587,9 +590,13 @@ EXPORT_SYMBOL_GPL(dax_mapping_is_dax);
  * to be able to run unmap_mapping_range() and subsequently not race
  * mapping_mapped() becoming true.
  */
-struct page *dax_layout_busy_page(struct address_space *mapping)
+struct page *dax_layout_busy_page(struct address_space *mapping,
+ loff_t off, loff_t len)
 {
-   XA_STATE(xas, >i_pages, 0);
+   unsigned long start_idx = off >> PAGE_SHIFT;
+   unsigned long end_idx = (len == ULONG_MAX) ? ULONG_MAX
+   : start_idx + (len >> PAGE_SHIFT);
+   XA_STATE(xas, >i_pages, start_idx);
void *entry;
unsigned int scanned = 0;
struct page *page = NULL;
@@ -612,7 +619,7 @@ struct page *dax_layout_busy_page(struct address_space 
*mapping)
unmap_mapping_range(mapping, 0, 0, 1);
 
xas_lock_irq();
-   xas_for_each(, entry, ULONG_MAX) {
+   xas_for_each(, entry, end_idx) {
if (WARN_ON_ONCE(!xa_is_value(entry)))
continue;
if (unlikely(dax_is_locked(entry)))
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9c7f4036021b..32738ccdac1d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2578,7 +2578,7 @@ extern int ext4_get_inode_loc(struct inode *, struct 
ext4_iloc *);
 extern int ext4_inode_attach_jinode(struct inode *inode);
 extern int ext4_can_truncate(struct inode *inode);
 extern int ext4_truncate(struct inode *);
-extern int ext4_break_layouts(struct inode *);
+extern int ext4_break_layouts(struct inode *inode, loff_t offset, loff_t len);
 extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int 
nblocks);
 extern void ext4_set_inode_flags(struct inode *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 92266a2da7d6..ded4b1d92299 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4736,7 +4736,7 @@ static long ext4_zero_range(struct file *file, loff_t 
offset,
 */
down_write(_I(inode)->i_mmap_sem);
 
-   ret = ext4_break_layouts(inode);
+   ret = ext4_break_layouts(inode, offset, len);
if (ret) {
up_write(_I(inode)->i_mmap_sem);
goto out_mutex;
@@ -5419,7 +5419,7 @@ int ext4_collapse_range(struct inode *inode, loff_t 
offset, loff_t len)
 */
down_write(_I(inode)->i_mmap_sem);
 
-   ret = ext4_break_layouts(inode);
+   ret = ext4_break_layouts(inode, offset, len);
if (ret)
goto out_mmap;
 
@@ -5572,7 +5572,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, 
loff_t len)
 */
down_write(_I(inode)->i_mmap_sem);
 
-   ret = ext4_break_layouts(inode);
+   ret = ext4_break_layouts(inode, offset, len);
if (ret)
goto out_mmap;
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f08f48de52c5..d3fc6035428c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4262,7 +4262,7 @@ static void ext4_wait_dax_page(struct ext4_inode_info *ei)
down_write(>i_mmap_sem);
 }
 
-int ext4_break_layouts(struct inode *inode)
+int ext4_break_layouts(struct inode *inode, loff_t offset, loff_t len)
 {
struct ext4_inode_info *ei = EXT4_I(inode);
struct page *page;
@@ -4279,7 +4279,7 @@ int ext4_break_layouts(struct inode *inode)
}
 
do {
-   page = dax_layout_busy_page(inode->i_mapping);
+   page = 

[RFC PATCH v2 18/19] {mm,procfs}: Add display file_pins proc

2019-08-09 Thread ira . weiny
From: Ira Weiny 

Now that we have the file pins information stored add a new procfs entry
to display them to the user.

NOTE output will be dependant on where the file pin is tied to.  Some
processes may have the pin associated with a file descriptor in which
case that file is reported as well.

Others are associated directly with the process mm and are reported as
such.

For example of a file pinned to an RDMA open context (fd 4) and a file
pinned to the mm of that process:

4: /dev/infiniband/uverbs0
   /mnt/pmem/foo
/mnt/pmem/bar

Signed-off-by: Ira Weiny 
---
 fs/proc/base.c | 214 +
 1 file changed, 214 insertions(+)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index ebea9501afb8..f4d219172235 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2995,6 +2995,7 @@ static int proc_stack_depth(struct seq_file *m, struct 
pid_namespace *ns,
  */
 static const struct file_operations proc_task_operations;
 static const struct inode_operations proc_task_inode_operations;
+static const struct file_operations proc_pid_file_pins_operations;
 
 static const struct pid_entry tgid_base_stuff[] = {
DIR("task",   S_IRUGO|S_IXUGO, proc_task_inode_operations, 
proc_task_operations),
@@ -3024,6 +3025,7 @@ static const struct pid_entry tgid_base_stuff[] = {
ONE("stat",   S_IRUGO, proc_tgid_stat),
ONE("statm",  S_IRUGO, proc_pid_statm),
REG("maps",   S_IRUGO, proc_pid_maps_operations),
+   REG("file_pins",  S_IRUGO, proc_pid_file_pins_operations),
 #ifdef CONFIG_NUMA
REG("numa_maps",  S_IRUGO, proc_pid_numa_maps_operations),
 #endif
@@ -3422,6 +3424,7 @@ static const struct pid_entry tid_base_stuff[] = {
ONE("stat",  S_IRUGO, proc_tid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
REG("maps",  S_IRUGO, proc_pid_maps_operations),
+   REG("file_pins", S_IRUGO, proc_pid_file_pins_operations),
 #ifdef CONFIG_PROC_CHILDREN
REG("children",  S_IRUGO, proc_tid_children_operations),
 #endif
@@ -3718,3 +3721,214 @@ void __init set_proc_pid_nlink(void)
nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
nlink_tgid = pid_entry_nlink(tgid_base_stuff, 
ARRAY_SIZE(tgid_base_stuff));
 }
+
+/**
+ * file_pin information below.
+ */
+
+struct proc_file_pins_private {
+   struct inode *inode;
+   struct task_struct *task;
+   struct mm_struct *mm;
+   struct files_struct *files;
+   unsigned int nr_pins;
+   struct xarray fps;
+} __randomize_layout;
+
+static void release_fp(struct proc_file_pins_private *priv)
+{
+   up_read(>mm->mmap_sem);
+   mmput(priv->mm);
+}
+
+static void print_fd_file_pin(struct seq_file *m, struct file *file,
+   unsigned long i)
+{
+   struct file_file_pin *fp;
+   struct file_file_pin *tmp;
+
+   if (list_empty_careful(>file_pins))
+   return;
+
+   seq_printf(m, "%lu: ", i);
+   seq_file_path(m, file, "\n");
+   seq_putc(m, '\n');
+
+   list_for_each_entry_safe(fp, tmp, >file_pins, list) {
+   seq_puts(m, "   ");
+   seq_file_path(m, fp->file, "\n");
+   seq_putc(m, '\n');
+   }
+}
+
+/* We are storing the index's within the FD table for later retrieval */
+static int store_fd(const void *priv , struct file *file, unsigned i)
+{
+   struct proc_file_pins_private *fp_priv;
+
+   /* cast away const... */
+   fp_priv = (struct proc_file_pins_private *)priv;
+
+   if (list_empty_careful(>file_pins))
+   return 0;
+
+   /* can't sleep in the iterate of the fd table */
+   xa_store(_priv->fps, fp_priv->nr_pins, xa_mk_value(i), GFP_ATOMIC);
+   fp_priv->nr_pins++;
+
+   return 0;
+}
+
+static void store_mm_pins(struct proc_file_pins_private *priv)
+{
+   struct mm_file_pin *fp;
+   struct mm_file_pin *tmp;
+
+   list_for_each_entry_safe(fp, tmp, >mm->file_pins, list) {
+   xa_store(>fps, priv->nr_pins, fp, GFP_KERNEL);
+   priv->nr_pins++;
+   }
+}
+
+
+static void *fp_start(struct seq_file *m, loff_t *ppos)
+{
+   struct proc_file_pins_private *priv = m->private;
+   unsigned int pos = *ppos;
+
+   priv->task = get_proc_task(priv->inode);
+   if (!priv->task)
+   return ERR_PTR(-ESRCH);
+
+   if (!priv->mm || !mmget_not_zero(priv->mm))
+   return NULL;
+
+   priv->files = get_files_struct(priv->task);
+   down_read(>mm->mmap_sem);
+
+   xa_destroy(>fps);
+   priv->nr_pins = 0;
+
+   /* grab fds of "files" which have pins and store as xa values */
+   if (priv->files)
+   iterate_fd(priv->files, 0, store_fd, priv);
+
+   /* store mm_file_pins as xa entries */
+   store_mm_pins(priv);
+
+   if (pos >= priv->nr_pins) {
+   release_fp(priv);
+   return NULL;
+   }
+
+   return xa_load(>fps, 

  1   2   3   4   5   6   7   8   9   10   >