date:20200512

[PATCH v5 3/5] remoteproc: qcom: Update PIL relocation info on load

2020-05-12 Thread Bjorn Andersson

Update the PIL relocation information in IMEM with information about
where the firmware for various remoteprocs are loaded.

Signed-off-by: Bjorn Andersson 
---

Changes since v4:
- Dropped unnecessary comment about ignoring return value.

 drivers/remoteproc/Kconfig  |  3 +++
 drivers/remoteproc/qcom_q6v5_adsp.c | 16 +---
 drivers/remoteproc/qcom_q6v5_mss.c  |  3 +++
 drivers/remoteproc/qcom_q6v5_pas.c  | 15 ---
 drivers/remoteproc/qcom_q6v5_wcss.c | 14 +++---
 drivers/remoteproc/qcom_wcnss.c | 14 +++---
 6 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index 8088ca4dd6dc..6bd42a411ca8 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -126,6 +126,7 @@ config QCOM_Q6V5_ADSP
depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
depends on QCOM_SYSMON || QCOM_SYSMON=n
select MFD_SYSCON
+   select QCOM_PIL_INFO
select QCOM_MDT_LOADER
select QCOM_Q6V5_COMMON
select QCOM_RPROC_COMMON
@@ -158,6 +159,7 @@ config QCOM_Q6V5_PAS
depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
depends on QCOM_SYSMON || QCOM_SYSMON=n
select MFD_SYSCON
+   select QCOM_PIL_INFO
select QCOM_MDT_LOADER
select QCOM_Q6V5_COMMON
select QCOM_RPROC_COMMON
@@ -209,6 +211,7 @@ config QCOM_WCNSS_PIL
depends on QCOM_SMEM
depends on QCOM_SYSMON || QCOM_SYSMON=n
select QCOM_MDT_LOADER
+   select QCOM_PIL_INFO
select QCOM_RPROC_COMMON
select QCOM_SCM
help
diff --git a/drivers/remoteproc/qcom_q6v5_adsp.c 
b/drivers/remoteproc/qcom_q6v5_adsp.c
index d2a2574dcf35..c539e89664cb 100644
--- a/drivers/remoteproc/qcom_q6v5_adsp.c
+++ b/drivers/remoteproc/qcom_q6v5_adsp.c
@@ -26,6 +26,7 @@
 #include 
 
 #include "qcom_common.h"
+#include "qcom_pil_info.h"
 #include "qcom_q6v5.h"
 #include "remoteproc_internal.h"
 
@@ -82,6 +83,7 @@ struct qcom_adsp {
unsigned int halt_lpass;
 
int crash_reason_smem;
+   const char *info_name;
 
struct completion start_done;
struct completion stop_done;
@@ -164,10 +166,17 @@ static int qcom_adsp_shutdown(struct qcom_adsp *adsp)
 static int adsp_load(struct rproc *rproc, const struct firmware *fw)
 {
struct qcom_adsp *adsp = (struct qcom_adsp *)rproc->priv;
+   int ret;
+
+   ret = qcom_mdt_load_no_init(adsp->dev, fw, rproc->firmware, 0,
+   adsp->mem_region, adsp->mem_phys,
+   adsp->mem_size, >mem_reloc);
+   if (ret)
+   return ret;
+
+   qcom_pil_info_store(adsp->info_name, adsp->mem_reloc, adsp->mem_size);
 
-   return qcom_mdt_load_no_init(adsp->dev, fw, rproc->firmware, 0,
-adsp->mem_region, adsp->mem_phys, adsp->mem_size,
->mem_reloc);
+   return 0;
 }
 
 static int adsp_start(struct rproc *rproc)
@@ -436,6 +445,7 @@ static int adsp_probe(struct platform_device *pdev)
adsp = (struct qcom_adsp *)rproc->priv;
adsp->dev = >dev;
adsp->rproc = rproc;
+   adsp->info_name = desc->sysmon_name;
platform_set_drvdata(pdev, adsp);
 
ret = adsp_alloc_memory_region(adsp);
diff --git a/drivers/remoteproc/qcom_q6v5_mss.c 
b/drivers/remoteproc/qcom_q6v5_mss.c
index c4936f4d1e80..fdbcae11ae64 100644
--- a/drivers/remoteproc/qcom_q6v5_mss.c
+++ b/drivers/remoteproc/qcom_q6v5_mss.c
@@ -29,6 +29,7 @@
 
 #include "remoteproc_internal.h"
 #include "qcom_common.h"
+#include "qcom_pil_info.h"
 #include "qcom_q6v5.h"
 
 #include 
@@ -1221,6 +1222,8 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
else if (ret < 0)
dev_err(qproc->dev, "MPSS authentication failed: %d\n", ret);
 
+   qcom_pil_info_store("modem", mpss_reloc, qproc->mpss_size);
+
 release_firmware:
release_firmware(fw);
 out:
diff --git a/drivers/remoteproc/qcom_q6v5_pas.c 
b/drivers/remoteproc/qcom_q6v5_pas.c
index 3bb69f58e086..84cb19231c35 100644
--- a/drivers/remoteproc/qcom_q6v5_pas.c
+++ b/drivers/remoteproc/qcom_q6v5_pas.c
@@ -25,6 +25,7 @@
 #include 
 
 #include "qcom_common.h"
+#include "qcom_pil_info.h"
 #include "qcom_q6v5.h"
 #include "remoteproc_internal.h"
 
@@ -64,6 +65,7 @@ struct qcom_adsp {
int pas_id;
int crash_reason_smem;
bool has_aggre2_clk;
+   const char *info_name;
 
struct completion start_done;
struct completion stop_done;
@@ -117,11 +119,17 @@ static void adsp_pds_disable(struct qcom_adsp *adsp, 
struct device **pds,
 static int adsp_load(struct rproc *rproc, const struct firmware *fw)
 {
struct qcom_adsp *adsp = (struct qcom_adsp *)rproc->priv;
+   int ret;
 
-   return qcom_mdt_load(adsp->dev, fw, rproc->firmware, adsp->pas_id,
-adsp->mem_region, adsp->mem_phys,

[PATCH v5 4/5] arm64: dts: qcom: qcs404: Add IMEM and PIL info region

2020-05-12 Thread Bjorn Andersson

Add a simple-mfd representing IMEM on QCS404 and define the PIL
relocation info region, so that post mortem tools will be able to locate
the loaded remoteprocs.

Reviewed-by: Stephen Boyd 
Signed-off-by: Bjorn Andersson 
---

Changes since v4:
- imem is no longer compatible with "syscon"

 arch/arm64/boot/dts/qcom/qcs404.dtsi | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi 
b/arch/arm64/boot/dts/qcom/qcs404.dtsi
index c685a1664810..b654b802e95c 100644
--- a/arch/arm64/boot/dts/qcom/qcs404.dtsi
+++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi
@@ -1097,6 +1097,21 @@ blsp2_spi0: spi@7af5000 {
status = "disabled";
};
 
+   imem@860 {
+   compatible = "simple-mfd";
+   reg = <0x0860 0x1000>;
+
+   #address-cells = <1>;
+   #size-cells = <1>;
+
+   ranges = <0 0x0860 0x1000>;
+
+   pil-reloc@94c {
+   compatible = "qcom,pil-reloc-info";
+   reg = <0x94c 0xc8>;
+   };
+   };
+
intc: interrupt-controller@b00 {
compatible = "qcom,msm-qgic2";
interrupt-controller;
-- 
2.26.2

[PATCH v5 5/5] arm64: dts: qcom: sdm845: Add IMEM and PIL info region

2020-05-12 Thread Bjorn Andersson

Add a simple-mfd representing IMEM on SDM845 and define the PIL
relocation info region, so that post mortem tools will be able to locate
the loaded remoteprocs.

Reviewed-by: Stephen Boyd 
Signed-off-by: Bjorn Andersson 
---

Changes since v4:
- imem is no longer compatible with "syscon"

 arch/arm64/boot/dts/qcom/sdm845.dtsi | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index 7cce6f1b7c9e..1abbbe7a43a0 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -3716,6 +3716,21 @@ spmi_bus: spmi@c44 {
cell-index = <0>;
};
 
+   imem@146bf000 {
+   compatible = "simple-mfd";
+   reg = <0 0x146bf000 0 0x1000>;
+
+   #address-cells = <1>;
+   #size-cells = <1>;
+
+   ranges = <0 0 0x146bf000 0x1000>;
+
+   pil-reloc@94c {
+   compatible = "qcom,pil-reloc-info";
+   reg = <0x94c 0xc8>;
+   };
+   };
+
apps_smmu: iommu@1500 {
compatible = "qcom,sdm845-smmu-500", "arm,mmu-500";
reg = <0 0x1500 0 0x8>;
-- 
2.26.2

[PATCH v5 1/5] dt-bindings: remoteproc: Add Qualcomm PIL info binding

2020-05-12 Thread Bjorn Andersson

Add a devicetree binding for the Qualcomm peripheral image loader
relocation information region found in the IMEM.

Reviewed-by: Stephen Boyd 
Signed-off-by: Bjorn Andersson 
---

Changes since v4:
- Fixed reg in example to make it compile

 .../bindings/remoteproc/qcom,pil-info.yaml| 44 +++
 1 file changed, 44 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml

diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml 
b/Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml
new file mode 100644
index ..87c52316ddbd
--- /dev/null
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/remoteproc/qcom,pil-info.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm peripheral image loader relocation info binding
+
+maintainers:
+  - Bjorn Andersson 
+
+description:
+  The Qualcomm peripheral image loader relocation memory region, in IMEM, is
+  used for communicating remoteproc relocation information to post mortem
+  debugging tools.
+
+properties:
+  compatible:
+const: qcom,pil-reloc-info
+
+  reg:
+maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+imem@146bf000 {
+  compatible = "syscon", "simple-mfd";
+  reg = <0x146bf000 0x1000>;
+
+  #address-cells = <1>;
+  #size-cells = <1>;
+
+  ranges = <0 0x146bf000 0x1000>;
+
+  pil-reloc@94c {
+compatible = "qcom,pil-reloc-info";
+reg = <0x94c 0xc8>;
+  };
+};
+...
-- 
2.26.2

Re: [PATCH V2] ifcvf: move IRQ request/free to status change handlers

2020-05-12 Thread Jason Wang




On 2020/5/13 下午12:42, Zhu, Lingshan wrote:



On 5/13/2020 12:12 PM, Jason Wang wrote:


On 2020/5/12 下午4:00, Zhu Lingshan wrote:

This commit move IRQ request and free operations from probe()
to VIRTIO status change handler to comply with VIRTIO spec.

VIRTIO spec 1.1, section 2.1.2 Device Requirements: Device Status Field
The device MUST NOT consume buffers or send any used buffer
notifications to the driver before DRIVER_OK.



This comment needs to be checked as I said previously. It's only 
needed if we're sure ifcvf can generate interrupt before DRIVER_OK.





Signed-off-by: Zhu Lingshan 
---
changes from V1:
remove ifcvf_stop_datapath() in status == 0 handler, we don't need 
to do this
twice; handle status == 0 after DRIVER_OK -> !DRIVER_OK handler 
(Jason Wang)



Patch looks good to me, but with this patch ping cannot work on my 
machine. (It works without this patch).


Thanks

This is strange, it works on my machines, let's have a check offline.

Thanks,
BR
Zhu Lingshan



Note that I tested the patch with vhost-vpda.

Thanks.

[PATCH v5 2/5] remoteproc: qcom: Introduce helper to store pil info in IMEM

2020-05-12 Thread Bjorn Andersson

A region in IMEM is used to communicate load addresses of remoteproc to
post mortem debug tools. Implement a helper function that can be used to
store this information in order to enable these tools to process
collected ramdumps.

Signed-off-by: Bjorn Andersson 
---

Changes since v4:
- Replaced platform_driver by just a single helper function
- Lazy initialization of mapping
- Cleaned up search loop
- Replaced regmap access of IMEM with ioremap and normal accessors

 drivers/remoteproc/Kconfig |   3 +
 drivers/remoteproc/Makefile|   1 +
 drivers/remoteproc/qcom_pil_info.c | 124 +
 drivers/remoteproc/qcom_pil_info.h |   7 ++
 4 files changed, 135 insertions(+)
 create mode 100644 drivers/remoteproc/qcom_pil_info.c
 create mode 100644 drivers/remoteproc/qcom_pil_info.h

diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index fbaed079b299..8088ca4dd6dc 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -107,6 +107,9 @@ config KEYSTONE_REMOTEPROC
  It's safe to say N here if you're not interested in the Keystone
  DSPs or just want to use a bare minimum kernel.
 
+config QCOM_PIL_INFO
+   tristate
+
 config QCOM_RPROC_COMMON
tristate
 
diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
index 0effd3825035..cc0f631adb3b 100644
--- a/drivers/remoteproc/Makefile
+++ b/drivers/remoteproc/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_OMAP_REMOTEPROC) += omap_remoteproc.o
 obj-$(CONFIG_WKUP_M3_RPROC)+= wkup_m3_rproc.o
 obj-$(CONFIG_DA8XX_REMOTEPROC) += da8xx_remoteproc.o
 obj-$(CONFIG_KEYSTONE_REMOTEPROC)  += keystone_remoteproc.o
+obj-$(CONFIG_QCOM_PIL_INFO)+= qcom_pil_info.o
 obj-$(CONFIG_QCOM_RPROC_COMMON)+= qcom_common.o
 obj-$(CONFIG_QCOM_Q6V5_COMMON) += qcom_q6v5.o
 obj-$(CONFIG_QCOM_Q6V5_ADSP)   += qcom_q6v5_adsp.o
diff --git a/drivers/remoteproc/qcom_pil_info.c 
b/drivers/remoteproc/qcom_pil_info.c
new file mode 100644
index ..0785c7cde2d3
--- /dev/null
+++ b/drivers/remoteproc/qcom_pil_info.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2019-2020 Linaro Ltd.
+ */
+#include 
+#include 
+#include 
+#include 
+
+#define PIL_RELOC_NAME_LEN 8
+
+struct pil_reloc_entry {
+   char name[PIL_RELOC_NAME_LEN];
+   __le64 base;
+   __le32 size;
+} __packed;
+
+struct pil_reloc {
+   struct device *dev;
+   void __iomem *base;
+   size_t num_entries;
+};
+
+static struct pil_reloc _reloc __read_mostly;
+static DEFINE_MUTEX(reloc_mutex);
+
+static int qcom_pil_info_init(void)
+{
+   struct device_node *np;
+   struct resource imem;
+   void __iomem *base;
+   int ret;
+
+   /* Already initialized? */
+   if (_reloc.base)
+   return 0;
+
+   np = of_find_compatible_node(NULL, NULL, "qcom,pil-reloc-info");
+   if (!np)
+   return -ENOENT;
+
+   ret = of_address_to_resource(np, 0, );
+   of_node_put(np);
+   if (ret < 0)
+   return ret;
+
+   base = ioremap(imem.start, resource_size());
+   if (!base) {
+   pr_err("failed to map PIL relocation info region\n");
+   return -ENOMEM;
+   }
+
+   memset_io(base, 0, resource_size());
+
+   _reloc.base = base;
+   _reloc.num_entries = resource_size() / sizeof(struct 
pil_reloc_entry);
+
+   return 0;
+}
+
+/**
+ * qcom_pil_info_store() - store PIL information of image in IMEM
+ * @image: name of the image
+ * @base:  base address of the loaded image
+ * @size:  size of the loaded image
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+int qcom_pil_info_store(const char *image, phys_addr_t base, size_t size)
+{
+   char buf[PIL_RELOC_NAME_LEN];
+   void __iomem *entry;
+   int ret;
+   int i;
+
+   mutex_lock(_mutex);
+   ret = qcom_pil_info_init();
+   if (ret < 0) {
+   mutex_unlock(_mutex);
+   return ret;
+   }
+
+   for (i = 0; i < _reloc.num_entries; i++) {
+   entry = _reloc.base + i * sizeof(struct pil_reloc_entry);
+
+   memcpy_fromio(buf, entry, PIL_RELOC_NAME_LEN);
+
+   /*
+* An empty record means we didn't find it, given that the
+* records are packed.
+*/
+   if (!buf[0])
+   goto found_unused;
+
+   if (!strncmp(buf, image, PIL_RELOC_NAME_LEN))
+   goto found_existing;
+   }
+
+   pr_warn("insufficient PIL info slots\n");
+   mutex_unlock(_mutex);
+   return -ENOMEM;
+
+found_unused:
+   memcpy_toio(entry, image, PIL_RELOC_NAME_LEN);
+found_existing:
+   writel(base, entry + offsetof(struct pil_reloc_entry, base));
+   writel(size, entry + offsetof(struct pil_reloc_entry, size));
+

[PATCH v5 0/5] remoteproc: qcom: PIL info support

2020-05-12 Thread Bjorn Andersson

Introduce support for filling out the relocation information in IMEM, to aid
post mortem debug tools to locate the various remoteprocs.

Bjorn Andersson (5):
  dt-bindings: remoteproc: Add Qualcomm PIL info binding
  remoteproc: qcom: Introduce helper to store pil info in IMEM
  remoteproc: qcom: Update PIL relocation info on load
  arm64: dts: qcom: qcs404: Add IMEM and PIL info region
  arm64: dts: qcom: sdm845: Add IMEM and PIL info region

 .../bindings/remoteproc/qcom,pil-info.yaml|  44 +++
 arch/arm64/boot/dts/qcom/qcs404.dtsi  |  15 +++
 arch/arm64/boot/dts/qcom/sdm845.dtsi  |  15 +++
 drivers/remoteproc/Kconfig|   6 +
 drivers/remoteproc/Makefile   |   1 +
 drivers/remoteproc/qcom_pil_info.c| 124 ++
 drivers/remoteproc/qcom_pil_info.h|   7 +
 drivers/remoteproc/qcom_q6v5_adsp.c   |  16 ++-
 drivers/remoteproc/qcom_q6v5_mss.c|   3 +
 drivers/remoteproc/qcom_q6v5_pas.c|  15 ++-
 drivers/remoteproc/qcom_q6v5_wcss.c   |  14 +-
 drivers/remoteproc/qcom_wcnss.c   |  14 +-
 12 files changed, 262 insertions(+), 12 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml
 create mode 100644 drivers/remoteproc/qcom_pil_info.c
 create mode 100644 drivers/remoteproc/qcom_pil_info.h

-- 
2.26.2

Re: [PATCH 2/2] clk: qcom: gcc: Add missing UFS clocks for SM8150

2020-05-12 Thread Vinod Koul

On 12-05-20, 18:25, Stephen Boyd wrote:
> Quoting Vinod Koul (2020-04-26 21:55:34)
> > On 25-04-20, 12:11, Stephen Boyd wrote:
> > > Quoting Vinod Koul (2020-04-23 21:43:11)
> > > > Add the missing ufs card and ufs phy clocks for SM8150. They were missed
> > > > in earlier addition of clock driver.
> > > > 
> > > > Signed-off-by: Vinod Koul 
> > > > ---
> > > >  drivers/clk/qcom/gcc-sm8150.c | 84 +++
> > > >  1 file changed, 84 insertions(+)
> > > > 
> > > > diff --git a/drivers/clk/qcom/gcc-sm8150.c 
> > > > b/drivers/clk/qcom/gcc-sm8150.c
> > > > index 5c3dc34c955e..4354620fa12d 100644
> > > > --- a/drivers/clk/qcom/gcc-sm8150.c
> > > > +++ b/drivers/clk/qcom/gcc-sm8150.c
> > > > @@ -2881,6 +2881,45 @@ static struct clk_branch 
> > > > gcc_ufs_card_phy_aux_hw_ctl_clk = {
> > > > },
> > > >  };
> > > >  
> > > > +/* external clocks so add BRANCH_HALT_SKIP */
> > > > +static struct clk_branch gcc_ufs_card_rx_symbol_0_clk = {
> > > > +   .halt_check = BRANCH_HALT_SKIP,
> > > > +   .clkr = {
> > > > +   .enable_reg = 0x7501c,
> > > > +   .enable_mask = BIT(0),
> > > > +   .hw.init = &(struct clk_init_data){
> > > > +   .name = "gcc_ufs_card_rx_symbol_0_clk",
> > > 
> > > Any reason to not use .fw_name?
> > 
> > Did i understand it correct that you would like these to have .fw_name
> > for parent? Should we start adding these clocks in DT description?
> 
> Sorry I misread the patch. This isn't a parent name description so .name
> is correct here.

No worries, I will add fixes and send the update

Thanks
-- 
~Vinod

Re: [PATCH v2 2/2] fs: avoid fdput() after failed fdget() in kernel_read_file_from_fd()

2020-05-12 Thread Al Viro

On Tue, May 12, 2020 at 01:43:05PM -0600, Shuah Khan wrote:
> Fix kernel_read_file_from_fd() to avoid fdput() after a failed fdget().
> fdput() doesn't do fput() on this file since FDPUT_FPUT isn't set
> in fd.flags. Fix it anyway since failed fdget() doesn't require
> a fdput().
> 
> This was introduced in a commit that added kernel_read_file_from_fd() as
> a wrapper for the VFS common kernel_read_file().
> 
> Fixes: b844f0ecbc56 ("vfs: define kernel_copy_file_from_fd()")
> Signed-off-by: Shuah Khan 
> ---
>  fs/exec.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index 06b4c550af5d..ea24bdce939d 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1021,8 +1021,8 @@ int kernel_read_file_from_fd(int fd, void **buf, loff_t 
> *size, loff_t max_size,
>   goto out;
>  
>   ret = kernel_read_file(f.file, buf, size, max_size, id);
> -out:
>   fdput(f);
> +out:
>   return ret;

Again, that goto is a pointless obfuscation; just return -EBADF
and be done with that.

Incidentally, why is that thing exported?

Re: [PATCH v2 1/2] fs: avoid fdput() after failed fdget() in ksys_sync_file_range()

2020-05-12 Thread Al Viro

On Tue, May 12, 2020 at 01:43:04PM -0600, Shuah Khan wrote:

> @@ -364,15 +364,15 @@ int sync_file_range(struct file *file, loff_t offset, 
> loff_t nbytes,
>  int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
>unsigned int flags)
>  {
> - int ret;
> - struct fd f;
> + int ret = -EBADF;
> + struct fd f = fdget(fd);
>  
> - ret = -EBADF;
> - f = fdget(fd);
> - if (f.file)
> - ret = sync_file_range(f.file, offset, nbytes, flags);
> + if (!f.file)
> + goto out;
>  
> + ret = sync_file_range(f.file, offset, nbytes, flags);
>   fdput(f);
> +out:
>   return ret;

IDGI...  What's the point of that goto out, when it leads straight to return?

[PATCH 0/9] Enable ext4 support for per-file/directory DAX operations

2020-05-12 Thread ira . weiny

From: Ira Weiny 

Enable the same per file DAX support in ext4 as was done for xfs.  This series
builds and depends on the V11 series for xfs.[1]

This passes the same xfstests test as XFS.

The only issue is that this modifies the old mount option parsing code rather
than waiting for the new parsing code to be finalized.

This series starts with 3 fixes which include making Verity and Encrypt truly
mutually exclusive from DAX.  I think these first 3 patches should be picked up
for 5.8 regardless of what is decided regarding the mount parsing.

[1] https://lore.kernel.org/lkml/20200428002142.404144-1-ira.we...@intel.com/

To: linux-kernel@vger.kernel.org
Cc: "Darrick J. Wong" 
Cc: Dan Williams 
Cc: Dave Chinner 
Cc: Christoph Hellwig 
Cc: "Theodore Y. Ts'o" 
Cc: Jan Kara 
Cc: linux-e...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-fsde...@vger.kernel.org

Ira Weiny (9):
  fs/ext4: Narrow scope of DAX check in setflags
  fs/ext4: Disallow verity if inode is DAX
  fs/ext4: Disallow encryption if inode is DAX
  fs/ext4: Change EXT4_MOUNT_DAX to EXT4_MOUNT_DAX_ALWAYS
  fs/ext4: Update ext4_should_use_dax()
  fs/ext4: Only change S_DAX on inode load
  fs/ext4: Make DAX mount option a tri-state
  fs/ext4: Introduce DAX inode flag
  Documentation/dax: Update DAX enablement for ext4

 Documentation/filesystems/dax.txt |  6 +-
 Documentation/filesystems/ext4/verity.rst |  7 +++
 Documentation/filesystems/fscrypt.rst |  4 +-
 fs/ext4/ext4.h| 20 ---
 fs/ext4/ialloc.c  |  2 +-
 fs/ext4/inode.c   | 27 +++--
 fs/ext4/ioctl.c   | 32 +--
 fs/ext4/super.c   | 67 +++
 fs/ext4/verity.c  |  5 +-
 9 files changed, 125 insertions(+), 45 deletions(-)

-- 
2.25.1

[PATCH 5/9] fs/ext4: Update ext4_should_use_dax()

2020-05-12 Thread ira . weiny

From: Ira Weiny 

S_DAX should only be enabled when the underlying block device supports
dax.

Change ext4_should_use_dax() to check for device support prior to the
over riding mount option.

While we are at it change the function to ext4_should_enable_dax() as
this better reflects the ask as well as matches xfs.

Signed-off-by: Ira Weiny 

---
Changes from RFC
Change function name to 'should enable'
Clean up bool conversion
Reorder this for better bisect-ability
---
 fs/ext4/inode.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a10ff12194db..d3a4c2ed7a1c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4398,10 +4398,8 @@ int ext4_get_inode_loc(struct inode *inode, struct 
ext4_iloc *iloc)
!ext4_test_inode_state(inode, EXT4_STATE_XATTR));
 }
 
-static bool ext4_should_use_dax(struct inode *inode)
+static bool ext4_should_enable_dax(struct inode *inode)
 {
-   if (!test_opt(inode->i_sb, DAX_ALWAYS))
-   return false;
if (!S_ISREG(inode->i_mode))
return false;
if (ext4_should_journal_data(inode))
@@ -4412,7 +4410,13 @@ static bool ext4_should_use_dax(struct inode *inode)
return false;
if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
return false;
-   return true;
+   if (!bdev_dax_supported(inode->i_sb->s_bdev,
+   inode->i_sb->s_blocksize))
+   return false;
+   if (test_opt(inode->i_sb, DAX_ALWAYS))
+   return true;
+
+   return false;
 }
 
 void ext4_set_inode_flags(struct inode *inode)
@@ -4430,7 +4434,7 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
-   if (ext4_should_use_dax(inode))
+   if (ext4_should_enable_dax(inode))
new_fl |= S_DAX;
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
-- 
2.25.1

[PATCH 9/9] Documentation/dax: Update DAX enablement for ext4

2020-05-12 Thread ira . weiny

From: Ira Weiny 

Update the document to reflect ext4 and xfs now behave the same.

Signed-off-by: Ira Weiny 

---
Changes from RFC:
Update with ext2 text...
---
 Documentation/filesystems/dax.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/dax.txt 
b/Documentation/filesystems/dax.txt
index 735fb4b54117..265c4f808dbf 100644
--- a/Documentation/filesystems/dax.txt
+++ b/Documentation/filesystems/dax.txt
@@ -25,7 +25,7 @@ size when creating the filesystem.
 Currently 3 filesystems support DAX: ext2, ext4 and xfs.  Enabling DAX on them
 is different.
 
-Enabling DAX on ext4 and ext2
+Enabling DAX on ext2
 -
 
 When mounting the filesystem, use the "-o dax" option on the command line or
@@ -33,8 +33,8 @@ add 'dax' to the options in /etc/fstab.  This works to enable 
DAX on all files
 within the filesystem.  It is equivalent to the '-o dax=always' behavior below.
 
 
-Enabling DAX on xfs

+Enabling DAX on xfs and ext4
+
 
 Summary
 ---
-- 
2.25.1

[PATCH 8/9] fs/ext4: Introduce DAX inode flag

2020-05-12 Thread ira . weiny

From: Ira Weiny 

Add a flag to preserve FS_XFLAG_DAX in the ext4 inode.

Set the flag to be user visible and changeable.  Set the flag to be
inherited.  Allow applications to change the flag at any time.

Finally, on regular files, flag the inode to not be cached to facilitate
changing S_DAX on the next creation of the inode.

Signed-off-by: Ira Weiny 

---
Change from RFC:
use new d_mark_dontcache()
Allow caching if ALWAYS/NEVER is set
Rebased to latest Linus master
Change flag to unused 0x0100
update ext4_should_enable_dax()
---
 fs/ext4/ext4.h  | 13 +
 fs/ext4/inode.c |  4 +++-
 fs/ext4/ioctl.c | 25 -
 3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 01d1de838896..715f8f2029b2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -415,13 +415,16 @@ struct flex_groups {
 #define EXT4_VERITY_FL 0x0010 /* Verity protected inode */
 #define EXT4_EA_INODE_FL   0x0020 /* Inode used for large EA */
 /* 0x0040 was formerly EXT4_EOFBLOCKS_FL */
+
+#define EXT4_DAX_FL0x0100 /* Inode is DAX */
+
 #define EXT4_INLINE_DATA_FL0x1000 /* Inode has inline data. */
 #define EXT4_PROJINHERIT_FL0x2000 /* Create with parents 
projid */
 #define EXT4_CASEFOLD_FL   0x4000 /* Casefolded file */
 #define EXT4_RESERVED_FL   0x8000 /* reserved for ext4 lib */
 
-#define EXT4_FL_USER_VISIBLE   0x705BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE0x604BC0FF /* User modifiable 
flags */
+#define EXT4_FL_USER_VISIBLE   0x715BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE0x614BC0FF /* User modifiable 
flags */
 
 /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
 #define EXT4_FL_XFLAG_VISIBLE  (EXT4_SYNC_FL | \
@@ -429,14 +432,16 @@ struct flex_groups {
 EXT4_APPEND_FL | \
 EXT4_NODUMP_FL | \
 EXT4_NOATIME_FL | \
-EXT4_PROJINHERIT_FL)
+EXT4_PROJINHERIT_FL | \
+EXT4_DAX_FL)
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
   EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
   EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
   EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
-  EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
+  EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\
+  EXT4_DAX_FL)
 
 /* Flags that are appropriate for regular files (all but dir-specific ones). */
 #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL 
|\
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 140b1930e2f4..105cf04f7940 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4400,6 +4400,8 @@ int ext4_get_inode_loc(struct inode *inode, struct 
ext4_iloc *iloc)
 
 static bool ext4_should_enable_dax(struct inode *inode)
 {
+   unsigned int flags = EXT4_I(inode)->i_flags;
+
if (test_opt2(inode->i_sb, DAX_NEVER))
return false;
if (!S_ISREG(inode->i_mode))
@@ -4418,7 +4420,7 @@ static bool ext4_should_enable_dax(struct inode *inode)
if (test_opt(inode->i_sb, DAX_ALWAYS))
return true;
 
-   return false;
+   return flags & EXT4_DAX_FL;
 }
 
 void ext4_set_inode_flags(struct inode *inode, bool init)
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 145083e8cd1e..6996a5c3e101 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -528,12 +528,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long 
iflags)
xflags |= FS_XFLAG_NOATIME;
if (iflags & EXT4_PROJINHERIT_FL)
xflags |= FS_XFLAG_PROJINHERIT;
+   if (iflags & EXT4_DAX_FL)
+   xflags |= FS_XFLAG_DAX;
return xflags;
 }
 
 #define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \
  FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \
- FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT)
+ FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \
+ FS_XFLAG_DAX)
 
 /* Transfer xflags flags to internal */
 static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
@@ -552,6 +555,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 
xflags)
iflags |= EXT4_NOATIME_FL;
if (xflags & FS_XFLAG_PROJINHERIT)
iflags |= EXT4_PROJINHERIT_FL;
+   if (xflags & FS_XFLAG_DAX)
+   iflags |=

[PATCH 3/9] fs/ext4: Disallow encryption if inode is DAX

2020-05-12 Thread ira . weiny

From: Ira Weiny 

Encryption and DAX are incompatible.  Changing the DAX mode due to a
change in Encryption mode is wrong without a corresponding
address_space_operations update.

Make the 2 options mutually exclusive by returning an error if DAX was
set first.

Furthermore, clarify the documentation of the exclusivity and how that
will work.

Signed-off-by: Ira Weiny 

---
Changes:
remove WARN_ON_ONCE
Add documentation to the encrypt doc WRT DAX
---
 Documentation/filesystems/fscrypt.rst |  4 +++-
 fs/ext4/super.c   | 10 +-
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/Documentation/filesystems/fscrypt.rst 
b/Documentation/filesystems/fscrypt.rst
index aa072112cfff..1475b8d52fef 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -1038,7 +1038,9 @@ astute users may notice some differences in behavior:
 - The ext4 filesystem does not support data journaling with encrypted
   regular files.  It will fall back to ordered data mode instead.
 
-- DAX (Direct Access) is not supported on encrypted files.
+- DAX (Direct Access) is not supported on encrypted files.  Attempts to enable
+  DAX on an encrypted file will fail.  Mount options will _not_ enable DAX on
+  encrypted files.
 
 - The st_size of an encrypted symlink will not necessarily give the
   length of the symlink target as required by POSIX.  It will actually
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index bf5fcb477f66..9873ab27e3fa 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1320,7 +1320,7 @@ static int ext4_set_context(struct inode *inode, const 
void *ctx, size_t len,
if (inode->i_ino == EXT4_ROOT_INO)
return -EPERM;
 
-   if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
+   if (IS_DAX(inode))
return -EINVAL;
 
res = ext4_convert_inline_data(inode);
@@ -1344,10 +1344,6 @@ static int ext4_set_context(struct inode *inode, const 
void *ctx, size_t len,
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
ext4_clear_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
-   /*
-* Update inode->i_flags - S_ENCRYPTED will be enabled,
-* S_DAX may be disabled
-*/
ext4_set_inode_flags(inode);
}
return res;
@@ -1371,10 +1367,6 @@ static int ext4_set_context(struct inode *inode, const 
void *ctx, size_t len,
ctx, len, 0);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
-   /*
-* Update inode->i_flags - S_ENCRYPTED will be enabled,
-* S_DAX may be disabled
-*/
ext4_set_inode_flags(inode);
res = ext4_mark_inode_dirty(handle, inode);
if (res)
-- 
2.25.1

[PATCH 4/9] fs/ext4: Change EXT4_MOUNT_DAX to EXT4_MOUNT_DAX_ALWAYS

2020-05-12 Thread ira . weiny

From: Ira Weiny 

In prep for the new tri-state mount option which then introduces
EXT4_MOUNT_DAX_NEVER.

Signed-off-by: Ira Weiny 

---
Changes:
New patch
---
 fs/ext4/ext4.h  |  4 ++--
 fs/ext4/inode.c |  2 +-
 fs/ext4/super.c | 12 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 91eb4381cae5..1a3daf2d18ef 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1123,9 +1123,9 @@ struct ext4_inode_info {
 #define EXT4_MOUNT_MINIX_DF0x00080 /* Mimics the Minix statfs */
 #define EXT4_MOUNT_NOLOAD  0x00100 /* Don't use existing journal*/
 #ifdef CONFIG_FS_DAX
-#define EXT4_MOUNT_DAX 0x00200 /* Direct Access */
+#define EXT4_MOUNT_DAX_ALWAYS  0x00200 /* Direct Access */
 #else
-#define EXT4_MOUNT_DAX 0
+#define EXT4_MOUNT_DAX_ALWAYS  0
 #endif
 #define EXT4_MOUNT_DATA_FLAGS  0x00C00 /* Mode for data writes: */
 #define EXT4_MOUNT_JOURNAL_DATA0x00400 /* Write data to 
journal */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2a4aae6acdcb..a10ff12194db 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4400,7 +4400,7 @@ int ext4_get_inode_loc(struct inode *inode, struct 
ext4_iloc *iloc)
 
 static bool ext4_should_use_dax(struct inode *inode)
 {
-   if (!test_opt(inode->i_sb, DAX))
+   if (!test_opt(inode->i_sb, DAX_ALWAYS))
return false;
if (!S_ISREG(inode->i_mode))
return false;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9873ab27e3fa..d0434b513919 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1767,7 +1767,7 @@ static const struct mount_opts {
{Opt_min_batch_time, 0, MOPT_GTE0},
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
{Opt_init_itable, 0, MOPT_GTE0},
-   {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
+   {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET},
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
@@ -3974,7 +3974,7 @@ static int ext4_fill_super(struct super_block *sb, void 
*data, int silent)
 "both data=journal and dioread_nolock");
goto failed_mount;
}
-   if (test_opt(sb, DAX)) {
+   if (test_opt(sb, DAX_ALWAYS)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
 "both data=journal and dax");
goto failed_mount;
@@ -4084,7 +4084,7 @@ static int ext4_fill_super(struct super_block *sb, void 
*data, int silent)
goto failed_mount;
}
 
-   if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+   if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
@@ -5404,7 +5404,7 @@ static int ext4_remount(struct super_block *sb, int 
*flags, char *data)
err = -EINVAL;
goto restore_opts;
}
-   if (test_opt(sb, DAX)) {
+   if (test_opt(sb, DAX_ALWAYS)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
 "both data=journal and dax");
err = -EINVAL;
@@ -5425,10 +5425,10 @@ static int ext4_remount(struct super_block *sb, int 
*flags, char *data)
goto restore_opts;
}
 
-   if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
+   if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS) {
ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
"dax flag with busy inodes while remounting");
-   sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
+   sbi->s_mount_opt ^= EXT4_MOUNT_DAX_ALWAYS;
}
 
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
-- 
2.25.1

[PATCH 1/9] fs/ext4: Narrow scope of DAX check in setflags

2020-05-12 Thread ira . weiny

From: Ira Weiny 

When preventing DAX and journaling on an inode.  Use the effective DAX
check rather than the mount option.

This will be required to support per inode DAX flags.

Reviewed-by: Jan Kara 
Signed-off-by: Ira Weiny 
---
 fs/ext4/ioctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bfc1281fc4cb..5813e5e73eab 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -393,9 +393,9 @@ static int ext4_ioctl_setflags(struct inode *inode,
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
/*
 * Changes to the journaling mode can cause unsafe changes to
-* S_DAX if we are using the DAX mount option.
+* S_DAX if the inode is DAX
 */
-   if (test_opt(inode->i_sb, DAX)) {
+   if (IS_DAX(inode)) {
err = -EBUSY;
goto flags_out;
}
-- 
2.25.1

[PATCH 2/9] fs/ext4: Disallow verity if inode is DAX

2020-05-12 Thread ira . weiny

From: Ira Weiny 

Verity and DAX are incompatible.  Changing the DAX mode due to a verity
flag change is wrong without a corresponding address_space_operations
update.

Make the 2 options mutually exclusive by returning an error if DAX was
set first.

(Setting DAX is already disabled if Verity is set first.)

Signed-off-by: Ira Weiny 

---
Changes:
remove WARN_ON_ONCE
Add documentation for DAX/Verity exclusivity
---
 Documentation/filesystems/ext4/verity.rst | 7 +++
 fs/ext4/verity.c  | 3 +++
 2 files changed, 10 insertions(+)

diff --git a/Documentation/filesystems/ext4/verity.rst 
b/Documentation/filesystems/ext4/verity.rst
index 3e4c0ee0e068..51ab1aa17e59 100644
--- a/Documentation/filesystems/ext4/verity.rst
+++ b/Documentation/filesystems/ext4/verity.rst
@@ -39,3 +39,10 @@ is encrypted as well as the data itself.
 
 Verity files cannot have blocks allocated past the end of the verity
 metadata.
+
+Verity and DAX
+--
+
+Verity and DAX are not compatible and attempts to set both of these flags on a
+file will fail.
+
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index dc5ec724d889..f05a09fb2ae4 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -113,6 +113,9 @@ static int ext4_begin_enable_verity(struct file *filp)
handle_t *handle;
int err;
 
+   if (IS_DAX(inode))
+   return -EINVAL;
+
if (ext4_verity_in_progress(inode))
return -EBUSY;
 
-- 
2.25.1

[PATCH 6/9] fs/ext4: Only change S_DAX on inode load

2020-05-12 Thread ira . weiny

From: Ira Weiny 

To prevent complications with in memory inodes we only set S_DAX on
inode load.  FS_XFLAG_DAX can be changed at any time and S_DAX will
change after inode eviction and reload.

Add init bool to ext4_set_inode_flags() to indicate if the inode is
being newly initialized.

Assert that S_DAX is not set on an inode which is just being loaded.

Signed-off-by: Ira Weiny 

---
Changes from RFC:
Change J_ASSERT() to WARN_ON_ONCE()
Fix bug which would clear S_DAX incorrectly
---
 fs/ext4/ext4.h   |  2 +-
 fs/ext4/ialloc.c |  2 +-
 fs/ext4/inode.c  | 13 ++---
 fs/ext4/ioctl.c  |  3 ++-
 fs/ext4/super.c  |  4 ++--
 fs/ext4/verity.c |  2 +-
 6 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1a3daf2d18ef..86a0994332ce 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2692,7 +2692,7 @@ extern int ext4_can_truncate(struct inode *inode);
 extern int ext4_truncate(struct inode *);
 extern int ext4_break_layouts(struct inode *);
 extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
-extern void ext4_set_inode_flags(struct inode *);
+extern void ext4_set_inode_flags(struct inode *, bool init);
 extern int ext4_alloc_da_blocks(struct inode *inode);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4b8c9a9bdf0c..7941c140723f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1116,7 +1116,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct 
inode *dir,
ei->i_block_group = group;
ei->i_last_alloc_group = ~0;
 
-   ext4_set_inode_flags(inode);
+   ext4_set_inode_flags(inode, true);
if (IS_DIRSYNC(inode))
ext4_handle_sync(handle);
if (insert_inode_locked(inode) < 0) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d3a4c2ed7a1c..23e42a223235 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4419,11 +4419,13 @@ static bool ext4_should_enable_dax(struct inode *inode)
return false;
 }
 
-void ext4_set_inode_flags(struct inode *inode)
+void ext4_set_inode_flags(struct inode *inode, bool init)
 {
unsigned int flags = EXT4_I(inode)->i_flags;
unsigned int new_fl = 0;
 
+   WARN_ON_ONCE(IS_DAX(inode) && init);
+
if (flags & EXT4_SYNC_FL)
new_fl |= S_SYNC;
if (flags & EXT4_APPEND_FL)
@@ -4434,8 +4436,13 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
-   if (ext4_should_enable_dax(inode))
+
+   /* Because of the way inode_set_flags() works we must preserve S_DAX
+* here if already set. */
+   new_fl |= (inode->i_flags & S_DAX);
+   if (init && ext4_should_enable_dax(inode))
new_fl |= S_DAX;
+
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
if (flags & EXT4_CASEFOLD_FL)
@@ -4649,7 +4656,7 @@ struct inode *__ext4_iget(struct super_block *sb, 
unsigned long ino,
 * not initialized on a new filesystem. */
}
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
-   ext4_set_inode_flags(inode);
+   ext4_set_inode_flags(inode, true);
inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
if (ext4_has_feature_64bit(sb))
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5813e5e73eab..145083e8cd1e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -381,7 +381,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
ext4_clear_inode_flag(inode, i);
}
 
-   ext4_set_inode_flags(inode);
+   ext4_set_inode_flags(inode, false);
+
inode->i_ctime = current_time(inode);
 
err = ext4_mark_iloc_dirty(handle, inode, );
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d0434b513919..5ec900fdf73c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1344,7 +1344,7 @@ static int ext4_set_context(struct inode *inode, const 
void *ctx, size_t len,
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
ext4_clear_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
-   ext4_set_inode_flags(inode);
+   ext4_set_inode_flags(inode, false);
}
return res;
}
@@ -1367,7 +1367,7 @@ static int ext4_set_context(struct inode *inode, const 
void *ctx, size_t len,
ctx, len, 0);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
-   ext4_set_inode_flags(inode);
+   ext4_set_inode_flags(inode, false);
res = ext4_mark_inode_dirty(handle, inode);
if (res)

[PATCH 7/9] fs/ext4: Make DAX mount option a tri-state

2020-05-12 Thread ira . weiny

From: Ira Weiny 

We add 'always', 'never', and 'inode' (default).  '-o dax' continue to
operate the same.

Specifically we introduce a 2nd DAX mount flag EXT4_MOUNT2_DAX_NEVER and set
it and EXT4_MOUNT_DAX_ALWAYS appropriately.

We also force EXT4_MOUNT2_DAX_NEVER if !CONFIG_FS_DAX.

https://lore.kernel.org/lkml/20200405061945.ga94...@iweiny-desk2.sc.intel.com/

Signed-off-by: Ira Weiny 

---
Changes from RFC:
Combine remount check for DAX_NEVER with DAX_ALWAYS
Update ext4_should_enable_dax()
---
 fs/ext4/ext4.h  |  1 +
 fs/ext4/inode.c |  2 ++
 fs/ext4/super.c | 43 +--
 3 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 86a0994332ce..01d1de838896 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1168,6 +1168,7 @@ struct ext4_inode_info {
  blocks */
 #define EXT4_MOUNT2_HURD_COMPAT0x0004 /* Support 
HURD-castrated
  file systems */
+#define EXT4_MOUNT2_DAX_NEVER  0x0008 /* Do not allow Direct 
Access */
 
 #define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM  0x0008 /* User explicitly
specified journal checksum */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 23e42a223235..140b1930e2f4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4400,6 +4400,8 @@ int ext4_get_inode_loc(struct inode *inode, struct 
ext4_iloc *iloc)
 
 static bool ext4_should_enable_dax(struct inode *inode)
 {
+   if (test_opt2(inode->i_sb, DAX_NEVER))
+   return false;
if (!S_ISREG(inode->i_mode))
return false;
if (ext4_should_journal_data(inode))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5ec900fdf73c..e01a040a58a9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1505,6 +1505,7 @@ enum {
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
+   Opt_dax_str,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
Opt_nowarn_on_error, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
@@ -1570,6 +1571,7 @@ static const match_table_t tokens = {
{Opt_barrier, "barrier"},
{Opt_nobarrier, "nobarrier"},
{Opt_i_version, "i_version"},
+   {Opt_dax_str, "dax=%s"},
{Opt_dax, "dax"},
{Opt_stripe, "stripe=%u"},
{Opt_delalloc, "delalloc"},
@@ -1767,6 +1769,7 @@ static const struct mount_opts {
{Opt_min_batch_time, 0, MOPT_GTE0},
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
{Opt_init_itable, 0, MOPT_GTE0},
+   {Opt_dax_str, 0, MOPT_STRING},
{Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET},
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
@@ -2076,13 +2079,32 @@ static int handle_mount_opt(struct super_block *sb, 
char *opt, int token,
}
sbi->s_jquota_fmt = m->mount_opt;
 #endif
-   } else if (token == Opt_dax) {
+   } else if (token == Opt_dax || token == Opt_dax_str) {
 #ifdef CONFIG_FS_DAX
-   ext4_msg(sb, KERN_WARNING,
-   "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
-   sbi->s_mount_opt |= m->mount_opt;
+   char *tmp = match_strdup([0]);
+
+   if (!tmp || !strcmp(tmp, "always")) {
+   ext4_msg(sb, KERN_WARNING,
+   "DAX enabled. Warning: EXPERIMENTAL, use at 
your own risk");
+   sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
+   sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
+   } else if (!strcmp(tmp, "never")) {
+   sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
+   sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
+   } else if (!strcmp(tmp, "inode")) {
+   sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
+   sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
+   } else {
+   ext4_msg(sb, KERN_WARNING, "DAX invalid option.");
+   kfree(tmp);
+   return -1;
+   }
+
+   kfree(tmp);
 #else
ext4_msg(sb, KERN_INFO, "dax option not supported");
+   sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
+   sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
return -1;
 #endif
} else if (token == Opt_data_err_abort) {
@@ -2306,6 +2328,13 @@ static int _ext4_show_options(struct seq_file *seq, 
struct super_block *sb,
if (DUMMY_ENCRYPTION_ENABLED(sbi))
SEQ_OPTS_PUTS("test_dummy_encryption");
 
+   if (test_opt2(sb, DAX_NEVER))
+

Re: [PATCH 3/4] dt-bindings: i2c-stm32: add SMBus Alert bindings

2020-05-12 Thread Alain Volmat

Hello Rob,

On Wed, May 13, 2020 at 02:19:32AM +, Rob Herring wrote:
> On Tue, May 05, 2020 at 07:51:10AM +0200, Alain Volmat wrote:
> > Add a new binding of the i2c-stm32f7 driver to enable the handling
> > of the SMBUS-Alert
> > 
> > Signed-off-by: Alain Volmat 
> > ---
> >  Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml | 4 
> >  1 file changed, 4 insertions(+)
> > 
> > diff --git a/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml 
> > b/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml
> > index b50a2f420b36..04c0882c3661 100644
> > --- a/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml
> > +++ b/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml
> > @@ -36,6 +36,10 @@ allOf:
> >  minItems: 3
> >  maxItems: 3
> >  
> > +st,smbus-alert:
> > +  description: Enable the SMBus Alert feature
> > +  $ref: /schemas/types.yaml#/definitions/flag
> > +
> 
> We already have smbus_alert interrupt. Can't you just check for this in 
> the slave nodes and enable if found?

My understanding reading the code (smbalert_probe within i2c-smbus.c, 
of_i2c_setup_smbus_alert called when
registering an adapter within i2c-core-smbus.c) is that smbus_alert refers to 
an interrupt on the
adapter side. That is an interrupt that would be triggered when the adapter is 
receiving an smbus_alert
message.
In our case (stm32f7), we do not have specific interrupt for that purpose. The 
interrupt triggered when
an SMBUS Alert is received (by the adapter) is the same interrupt as for other 
reasons and we check
within the irq handler within stm32f7 the reason before calling 
i2c_handle_smbus_alert if the status
register indicated an SMBUS Alert.
So my understanding is that we cannot rely on the mechanism of naming an 
interrupt smbus_alert.
Did I misunderstood something ?

> 
> >- if:
> >properties:
> >  compatible:
> > -- 
> > 2.17.1
> >

[PATCH] dt-bindings: watchdog: Convert UniPhier watchdog timer to json-schema

2020-05-12 Thread Kunihiko Hayashi

Convert UniPhier watchdog timer binding to DT schema format.

Cc: Keiji Hayashibara 
Signed-off-by: Kunihiko Hayashi 
---
 .../bindings/watchdog/socionext,uniphier-wdt.yaml  | 36 ++
 .../devicetree/bindings/watchdog/uniphier-wdt.txt  | 20 
 2 files changed, 36 insertions(+), 20 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/watchdog/socionext,uniphier-wdt.yaml
 delete mode 100644 Documentation/devicetree/bindings/watchdog/uniphier-wdt.txt

diff --git 
a/Documentation/devicetree/bindings/watchdog/socionext,uniphier-wdt.yaml 
b/Documentation/devicetree/bindings/watchdog/socionext,uniphier-wdt.yaml
new file mode 100644
index 000..a059d16
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/socionext,uniphier-wdt.yaml
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/socionext,uniphier-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Socionext UniPhier watchdog timer
+
+maintainers:
+  - Keiji Hayashibara 
+
+allOf:
+  - $ref: "watchdog.yaml#"
+
+properties:
+  compatible:
+const: socionext,uniphier-wdt
+
+required:
+  - compatible
+
+additionalProperties: false
+
+examples:
+  - |
+// The UniPhier watchdog should be a subnode of a "syscon" compatible node.
+
+sysctrl@6184 {
+compatible = "socionext,uniphier-ld11-sysctrl",
+ "simple-mfd", "syscon";
+reg = <0x6184 0x1>;
+
+watchdog {
+compatible = "socionext,uniphier-wdt";
+};
+};
diff --git a/Documentation/devicetree/bindings/watchdog/uniphier-wdt.txt 
b/Documentation/devicetree/bindings/watchdog/uniphier-wdt.txt
deleted file mode 100644
index bf63375..000
--- a/Documentation/devicetree/bindings/watchdog/uniphier-wdt.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-UniPhier watchdog timer controller
-
-This UniPhier watchdog timer controller must be under sysctrl node.
-
-Required properties:
-- compatible: should be "socionext,uniphier-wdt"
-
-Example:
-
-   sysctrl@6184 {
-   compatible = "socionext,uniphier-ld11-sysctrl",
-"simple-mfd", "syscon";
-   reg = <0x6184 0x4000>;
-
-   watchdog {
-   compatible = "socionext,uniphier-wdt";
-   }
-
-   other nodes ...
-   };
-- 
2.7.4

[PATCH] ASoC: wm8962: Use force clear for WM8962_SYSCLK_ENA after reset

2020-05-12 Thread Shengjiu Wang

When CLOCKING2 is non-volatile register, we need force clear
the WM8962_SYSCLK_ENA bit after reset, for the value in cache
maybe 0 but in hardware it is 1. Otherwise there will issue
as below statement in driver.

/* SYSCLK defaults to on; make sure it is off so we can safely
 * write to registers if the device is declocked.

Fixes: c38b608504aa ("ASoC: wm8962: set CLOCKING2 as non-volatile register")
Signed-off-by: Shengjiu Wang 
---
 sound/soc/codecs/wm8962.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 0a2cfff1..08d19df8a700 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -3798,8 +3798,8 @@ static int wm8962_runtime_resume(struct device *dev)
/* SYSCLK defaults to on; make sure it is off so we can safely
 * write to registers if the device is declocked.
 */
-   regmap_update_bits(wm8962->regmap, WM8962_CLOCKING2,
-  WM8962_SYSCLK_ENA, 0);
+   regmap_write_bits(wm8962->regmap, WM8962_CLOCKING2,
+ WM8962_SYSCLK_ENA, 0);
 
/* Ensure we have soft control over all registers */
regmap_update_bits(wm8962->regmap, WM8962_CLOCKING2,
-- 
2.21.0

Re: [RFC PATCH v2 1/3] printk: Add function to set console to preferred console's driver

2020-05-12 Thread Sergey Senozhatsky

On (20/04/30 19:14), Alper Nebi Yasak wrote:
[..]
> +int update_console_to_preferred(void)
> +{
> + struct console_cmdline *c = NULL;
> + struct console *con = NULL;
> + struct console *tmp = NULL;
> +
> + if (preferred_console >= 0)
> + c = _cmdline[preferred_console];
> +
> + if (!c || !c->name[0])
> + return 0;
> +
> + for_each_console(con) {
> + if (!con->next || !(con->next->flags & CON_ENABLED))
> + continue;
> + if (strcmp(c->name, con->next->name) != 0)
> + continue;

This matches the consoles by exact name. Consoles can have aliases,
but matching by alias is rather complex and it has some side effects.

Let me Cc more people on this. VT has a console takeover logic,
I wonder if we can extend the takeover code somehow.

Daniel, any thoughts?

https://lore.kernel.org/lkml/20200430161438.17640-1-alpernebiya...@gmail.com

-ss

Re: [PATCH net-next 1/4] net: ethernet: validate pause autoneg setting

2020-05-12 Thread Russell King - ARM Linux admin

On Tue, May 12, 2020 at 08:48:22PM -0700, Doug Berger wrote:
> On 5/12/2020 11:55 AM, Russell King - ARM Linux admin wrote:
> > On Tue, May 12, 2020 at 11:31:39AM -0700, Doug Berger wrote:
> >> This was intended as a fix, but I thought it would be better to keep it
> >> as part of this set for context and since net-next is currently open.
> >>
> >> The context is trying to improve the phylib support for offloading
> >> ethtool pause configuration and this is something that could be checked
> >> in a single location rather than by individual drivers.
> >>
> >> I included it here to get feedback about its appropriateness as a common
> >> behavior. I should have been more explicit about that.
> >>
> >> Personally, I'm actually not that fond of this change since it can
> >> easily be a source of confusion with the ethtool interface because the
> >> link autonegotiation and the pause autonegotiation are controlled by
> >> different commands.
> >>
> >> Since the ethtool -A command performs a read/modify/write of pause
> >> parameters, you can get strange results like these:
> >> # ethtool -s eth0 speed 100 duplex full autoneg off
> >> # ethtool -A eth0 tx off
> >> Cannot set device pause parameters: Invalid argument
> >> #
> >> Because, the get read pause autoneg as enabled and only the tx_pause
> >> member of the structure was updated.
> > 
> > This looks like the same argument I've been having with Heiner over
> > the EEE interface, except there's a difference here.
> > 
> > # ethtool -A eth0 autoneg on
> > # ethtool -s eth0 autoneg off speed 100 duplex full
> > 
> > After those two commands, what is the state of pause mode?  The answer
> > is, it's disabled.
> > 
> > # ethtool -A eth0 autoneg off rx on tx on
> > 
> > is perfectly acceptable, as we are forcing pause modes at the local
> > end of the link.
> > 
> > # ethtool -A eth0 autoneg on
> > 
> > Now, the question is whether that should be allowed or not - but this
> > is merely restoring the "pause" settings that were in effect prior
> > to the previous command.  It does not enable pause negotiation,
> > because autoneg as a whole is disabled, but it _allows_ pause
> > negotiation to occur when autoneg is enabled at some point in the
> > future.
> > 
> > Also, allowing "ethtool -A eth0 autoneg on" when "ethtool -s eth0
> > autoneg off" means you can configure the negotiation parameters
> > _before_ triggering a negotiation cycle on the link.  In other words,
> > it would avoid:
> > 
> > # ethtool -s eth0 autoneg on
> > # # Link renegotiates
> > # ethtool -A eth0 autoneg on
> > # # Link renegotiates a second time
> > 
> > and it also means that if stuff has already been scripted to avoid
> > this, nothing breaks.
> > 
> > If we start rejecting ethtool -A because autoneg is disabled, then
> > things get difficult to configure - we would need ethtool documentation
> > to state that autoneg must be enabled before configuration of pause
> > and EEE can be done.  IMHO, that hurts usability, and adds confusion.
> > 
> Thanks for your input and I agree with what you have said here. I will
> remove this commit from the set when I resubmit and I assume that, like
> Michal, you would like to see the comment in ethtool.h revised.
> 
> I think the crux of the matter is that the meaning of the autoneg pause
> parameter is not well specified, and that is fundamentally what I am
> trying to clarify in a common implementation that might help unify a
> consistent behavior across network drivers.
> 
> My interpretation is that the link autonegotiation and the pause
> autonegotiation can be meaningfully set independently from each other
> and that the interplay between the two has easily overlooked subtleties.
> 
> My opinion (which is at least in part drawn from my interpretation of
> your opinion) is as follows with regard to pause behaviors:
> 
> The link autonegotiation parameter concerns itself with whether the
> Pause capabilities are advertised as part of autonegotiation of link
> parameters.
> 
> The pause autonegotiation parameter concerns itself with whether the
> local node is willing to accept the advertised capabilities of its peer
> as input into its pause configuration.
> 
> The Tx_Pause and Rx_Pause parameters indicate in which directions pause
> frames should be supported.

This is where the ethtool interface breaks down - they are unable
to sanely define which should be supported, as what you end up with
could be wildly different from what you thought.  See the
documentation against linkmode_set_pause() where I detail the issues
in this API.

For example, if you specify Tx_Pause = 0, Rx_Pause = 1, you can end
up with the pause negotiating transmit and receive pause.

If you specify Tx_Pause = 1, Rx_Pause = 1, and the far end supports
only AsymPause, then you end up with pause disabled, despite the
link actually being able to support receive pause at the local end.
Whereas if you specified Tx_Pause = 0, Rx_Pause=1 in this scenario,
you would get receive

Re: [PATCH net 2/2 RESEND] ipmr: Add lockdep expression to ipmr_for_each_table macro

2020-05-12 Thread Madhuparna Bhowmik

On Tue, May 12, 2020 at 09:32:31AM -0700, Jakub Kicinski wrote:
> On Tue, 12 May 2020 10:47:05 +0530 Madhuparna Bhowmik wrote:
> > > >  #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
> > > > -#define ipmr_for_each_table(mrt, net) \
> > > > -   list_for_each_entry_rcu(mrt, >ipv4.mr_tables, list, \
> > > > -   lockdep_rtnl_is_held())
> > > > +#define ipmr_for_each_table(mrt, net)  
> > > > \
> > > > +   list_for_each_entry_rcu(mrt, >ipv4.mr_tables, list,
> > > > \
> > > > +   lockdep_rtnl_is_held() ||   
> > > > \
> > > > +   lockdep_is_held(_ops_rwsem))  
> > > 
> > > This is a strange condition, IMHO. How can we be fine with either
> > > lock.. This is supposed to be the writer side lock, one can't have 
> > > two writer side locks..
> > > 
> > > I think what is happening is this:
> > > 
> > > ipmr_net_init() -> ipmr_rules_init() -> ipmr_new_table()
> > > 
> > > ipmr_new_table() returns an existing table if there is one, but
> > > obviously none can exist at init.  So a better fix would be:
> > > 
> > > #define ipmr_for_each_table(mrt, net) 
> > > \
> > >   list_for_each_entry_rcu(mrt, >ipv4.mr_tables, list,\
> > >   lockdep_rtnl_is_held() ||   \
> > >   list_empty(>ipv4.mr_tables))
> > >  
> > (adding Stephen)
> > 
> > Hi Jakub,
> > 
> > Thank you for your suggestion about this patch.
> > Here is a stack trace for ipmr.c:
> > 
> > [...]
> 
> Thanks!
> 
> > > Thoughts?  
> > 
> > Do you think a similar fix (the one you suggested) is also applicable
> > in the ip6mr case.
> 
> Yes, looking at the code it seems ip6mr has the exact same flow for
> netns init.

Alright, thanks a lot.
I will send a patch for ip6mr.c soon.

Thank you,
Madhuparna

Re: [RFC 1/1] Weighted approach to gather and use history in TEO governor

2020-05-12 Thread Pratik Sampat


Thanks for your comment.


On 12/05/20 11:07 pm, Peter Zijlstra wrote:

Just a quick note..

On Mon, May 11, 2020 at 07:40:55PM +0530, Pratik Rajesh Sampat wrote:


+   /*
+* Rearrange the weight distribution of the state, increase the weight
+* by the LEARNING RATE % for the idle state that was supposed to be
+* chosen and reduce by the same amount for rest of the states
+*
+* If the weights are greater than (100 - LEARNING_RATE) % or lesser
+* than LEARNING_RATE %, do not increase or decrease the confidence
+* respectively
+*/
+   for (i = 0; i < drv->state_count; i++) {
+   unsigned int delta;
+
+   if (idx == -1)
+   break;
+   if (i ==  idx) {
+   delta = (LEARNING_RATE * 
cpu_data->state_mat[last_idx][i]) / 100;

100 is a crap number to divide by as a computer. We bio-puddings happend
to have 10 digits, so 100 makes sense to us, but it does not to our
binary friends.



Absolutely! I just wrote the code exactly the way I did the Math on paper,
definitely need to figure out an optimal way of doing things.

~Pratik

Re: [PATCH 1/1] powerpc/rtas: Implement reentrant rtas call

2020-05-12 Thread Leonardo Bras

v2: 
http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20200513044025.105379-2-leobra...@gmail.com/

(Series:
http://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=176534) 


signature.asc
Description: This is a digitally signed message part

Re: [PATCH v2 1/1] powerpc/crash: Use NMI context for printk when starting to crash

2020-05-12 Thread Leonardo Bras

Hello Nick, thanks for your feedback.
Comments inline:

On Wed, 2020-05-13 at 14:36 +1000, Nicholas Piggin wrote:
> Excerpts from Leonardo Bras's message of May 13, 2020 7:45 am:
> > Currently, if printk lock (logbuf_lock) is held by other thread during
> > crash, there is a chance of deadlocking the crash on next printk, and
> > blocking a possibly desired kdump.
> > 
> > At the start of default_machine_crash_shutdown, make printk enter
> > NMI context, as it will use per-cpu buffers to store the message,
> > and avoid locking logbuf_lock.
> 
> printk_nmi_enter is used in one other place outside nmi_enter.
> 
> Is there a different/better way to handle this? What do other 
> architectures do?

To be honest, I was unaware of nmi_enter() and I have yet to study what
other architectures do here.

> Other subsystems get put into an nmi-mode when we call nmi_enter
> (lockdep, ftrace, rcu etc). It seems like those would be useful for 
> similar reasons, so at least explaining why that is not used in a 
> comment would be good.

My reasoning for using printk_nmi_enter() here was only to keep it from
using printk regular buffer (and locking logbuf_lock) at this point of
the crash.

I have yet to see how nmi_enter() extra functions would happen to
interfere with the crash at this point. 

(In a quick look at x86, (native_machine_crash_shutdown) I could not
see it using any printk, so it may not be necessary).

> Aside from that, I welcome any effort to make our crashes more reliable
> so thanks for working on this stuff.
> 
> Thanks,
> Nick

Thank you, it means a lot.

Leonardo Bras

[PATCH V4 2/4] rpmsg: glink: Add support to handle signals command

2020-05-12 Thread Arun Kumar Neelakantam

Remote peripherals send signal notifications over glink with commandID 15.

Add support to send and receive the signal command and convert the signals
from NATIVE to TIOCM while receiving and vice versa while sending.

Signed-off-by: Chris Lew 
Signed-off-by: Arun Kumar Neelakantam 
---
 drivers/rpmsg/qcom_glink_native.c | 126 ++
 1 file changed, 126 insertions(+)

diff --git a/drivers/rpmsg/qcom_glink_native.c 
b/drivers/rpmsg/qcom_glink_native.c
index fc8ef66..68e039a 100644
--- a/drivers/rpmsg/qcom_glink_native.c
+++ b/drivers/rpmsg/qcom_glink_native.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (c) 2018, The Linux Foundation.
  * Copyright (c) 2016-2017, Linaro Ltd
  */
 
@@ -17,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -150,6 +152,8 @@ enum {
  * @intent_req_lock: Synchronises multiple intent requests
  * @intent_req_result: Result of intent request
  * @intent_req_comp: Completion for intent_req signalling
+ * @lsigs: local side signals
+ * @rsigs: remote side signals
  */
 struct glink_channel {
struct rpmsg_endpoint ept;
@@ -181,6 +185,10 @@ struct glink_channel {
struct mutex intent_req_lock;
bool intent_req_result;
struct completion intent_req_comp;
+
+   unsigned int lsigs;
+   unsigned int rsigs;
+
 };
 
 #define to_glink_channel(_ept) container_of(_ept, struct glink_channel, ept)
@@ -201,9 +209,15 @@ static const struct rpmsg_endpoint_ops glink_endpoint_ops;
 #define RPM_CMD_TX_DATA_CONT   12
 #define RPM_CMD_READ_NOTIF 13
 #define RPM_CMD_RX_DONE_W_REUSE14
+#define RPM_CMD_SIGNALS15
 
 #define GLINK_FEATURE_INTENTLESS   BIT(1)
 
+#define NATIVE_DTR_SIG BIT(31)
+#define NATIVE_CTS_SIG BIT(30)
+#define NATIVE_CD_SIG  BIT(29)
+#define NATIVE_RI_SIG  BIT(28)
+
 static void qcom_glink_rx_done_work(struct work_struct *work);
 
 static struct glink_channel *qcom_glink_alloc_channel(struct qcom_glink *glink,
@@ -975,6 +989,76 @@ static int qcom_glink_rx_open_ack(struct qcom_glink 
*glink, unsigned int lcid)
return 0;
 }
 
+/**
+ * qcom_glink_send_signals() - convert a signal  cmd to wire format and 
transmit
+ * @glink: The transport to transmit on.
+ * @channel:   The glink channel
+ * @sigs:  The signals to encode.
+ *
+ * Return: 0 on success or standard Linux error code.
+ */
+static int qcom_glink_send_signals(struct qcom_glink *glink,
+  struct glink_channel *channel,
+  u32 sigs)
+{
+   struct glink_msg msg;
+
+   /* convert signals from TIOCM to NATIVE */
+   sigs &= 0x0fff;
+   if (sigs & TIOCM_DTR)
+   sigs |= NATIVE_DTR_SIG;
+   if (sigs & TIOCM_RTS)
+   sigs |= NATIVE_CTS_SIG;
+   if (sigs & TIOCM_CD)
+   sigs |= NATIVE_CD_SIG;
+   if (sigs & TIOCM_RI)
+   sigs |= NATIVE_RI_SIG;
+
+   msg.cmd = cpu_to_le16(RPM_CMD_SIGNALS);
+   msg.param1 = cpu_to_le16(channel->lcid);
+   msg.param2 = cpu_to_le32(sigs);
+
+   return qcom_glink_tx(glink, , sizeof(msg), NULL, 0, true);
+}
+
+static int qcom_glink_handle_signals(struct qcom_glink *glink,
+unsigned int rcid, unsigned int signals)
+{
+   struct glink_channel *channel;
+   unsigned long flags;
+   u32 old;
+
+   spin_lock_irqsave(>idr_lock, flags);
+   channel = idr_find(>rcids, rcid);
+   spin_unlock_irqrestore(>idr_lock, flags);
+   if (!channel) {
+   dev_err(glink->dev, "signal for non-existing channel\n");
+   return -EINVAL;
+   }
+
+   old = channel->rsigs;
+
+   /* convert signals from NATIVE to TIOCM */
+   if (signals & NATIVE_DTR_SIG)
+   signals |= TIOCM_DSR;
+   if (signals & NATIVE_CTS_SIG)
+   signals |= TIOCM_CTS;
+   if (signals & NATIVE_CD_SIG)
+   signals |= TIOCM_CD;
+   if (signals & NATIVE_RI_SIG)
+   signals |= TIOCM_RI;
+   signals &= 0x0fff;
+
+   channel->rsigs = signals;
+
+   if (channel->ept.sig_cb) {
+   channel->ept.sig_cb(channel->ept.rpdev, channel->ept.priv,
+   old, channel->rsigs);
+   }
+
+   return 0;
+}
+
 static irqreturn_t qcom_glink_native_intr(int irq, void *data)
 {
struct qcom_glink *glink = data;
@@ -1036,6 +1120,10 @@ static irqreturn_t qcom_glink_native_intr(int irq, void 
*data)
qcom_glink_handle_intent_req_ack(glink, param1, param2);
qcom_glink_rx_advance(glink, ALIGN(sizeof(msg), 8));
break;
+   case RPM_CMD_SIGNALS:
+   qcom_glink_handle_signals(glink, param1, param2);
+   qcom_glink_rx_advance(glink, ALIGN(sizeof(msg), 8));
+   break;

[RESEND PATCH V4 0/4] Add TIOCM Signals support for RPMSG char devices

2020-05-12 Thread Arun Kumar Neelakantam

Glink transport support signals to exchange state notification between
local and remote side clients. Adding support to send/receive the signal
command and notify the clients through callback and POLL notification.

Changes since v3:
- Correct the TICOMGET case handling as per new rpmsg_get_signals prototype
- Update the rpmsg_get_signals function header

Changes since v2:
- Modify the rpmsg_get_signals function prototype

Changes since v1:
- Split the patches as per functional areas like core, char, glink
- Add set, clear mask for TIOCMSET
- Merge the char signal callback and POLLPRI patches

Arun Kumar Neelakantam (4):
  rpmsg: core: Add signal API support
  rpmsg: glink: Add support to handle signals command
  rpmsg: char: Add TIOCMGET/TIOCMSET ioctl support
  rpmsg: char: Add signal callback and POLLPRI support

 drivers/rpmsg/qcom_glink_native.c | 126 ++
 drivers/rpmsg/rpmsg_char.c|  75 ++-
 drivers/rpmsg/rpmsg_core.c|  41 +
 drivers/rpmsg/rpmsg_internal.h|   5 ++
 include/linux/rpmsg.h |  26 
 5 files changed, 270 insertions(+), 3 deletions(-)

-- 
2.7.4

[PATCH V4 4/4] rpmsg: char: Add signal callback and POLLPRI support

2020-05-12 Thread Arun Kumar Neelakantam

Register a callback to get the signal notifications from rpmsg and
send POLLPRI mask to indicate the signal change in POLL system call.

Signed-off-by: Arun Kumar Neelakantam 
---
 drivers/rpmsg/rpmsg_char.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c
index e2f92f3..ae15d4f 100644
--- a/drivers/rpmsg/rpmsg_char.c
+++ b/drivers/rpmsg/rpmsg_char.c
@@ -64,6 +64,7 @@ struct rpmsg_ctrldev {
  * @queue_lock:synchronization of @queue operations
  * @queue: incoming message queue
  * @readq: wait object for incoming queue
+ * @sig_pending:state of signal notification
  */
 struct rpmsg_eptdev {
struct device dev;
@@ -78,6 +79,8 @@ struct rpmsg_eptdev {
spinlock_t queue_lock;
struct sk_buff_head queue;
wait_queue_head_t readq;
+
+   bool sig_pending;
 };
 
 static int rpmsg_eptdev_destroy(struct device *dev, void *data)
@@ -122,6 +125,19 @@ static int rpmsg_ept_cb(struct rpmsg_device *rpdev, void 
*buf, int len,
return 0;
 }
 
+static int rpmsg_sigs_cb(struct rpmsg_device *rpdev, void *priv,
+u32 old, u32 new)
+{
+   struct rpmsg_eptdev *eptdev = priv;
+
+   eptdev->sig_pending = true;
+
+   /* wake up any blocking processes, waiting for signal notification */
+   wake_up_interruptible(>readq);
+   return 0;
+}
+
+
 static int rpmsg_eptdev_open(struct inode *inode, struct file *filp)
 {
struct rpmsg_eptdev *eptdev = cdev_to_eptdev(inode->i_cdev);
@@ -138,6 +154,7 @@ static int rpmsg_eptdev_open(struct inode *inode, struct 
file *filp)
return -EINVAL;
}
 
+   ept->sig_cb = rpmsg_sigs_cb;
eptdev->ept = ept;
filp->private_data = eptdev;
 
@@ -156,6 +173,7 @@ static int rpmsg_eptdev_release(struct inode *inode, struct 
file *filp)
eptdev->ept = NULL;
}
mutex_unlock(>ept_lock);
+   eptdev->sig_pending = false;
 
/* Discard all SKBs */
skb_queue_purge(>queue);
@@ -266,6 +284,9 @@ static __poll_t rpmsg_eptdev_poll(struct file *filp, 
poll_table *wait)
if (!skb_queue_empty(>queue))
mask |= EPOLLIN | EPOLLRDNORM;
 
+   if (eptdev->sig_pending)
+   mask |= POLLPRI;
+
mask |= rpmsg_poll(eptdev->ept, filp, wait);
 
return mask;
@@ -309,6 +330,7 @@ static long rpmsg_eptdev_ioctl(struct file *fp, unsigned 
int cmd,
 
switch (cmd) {
case TIOCMGET:
+   eptdev->sig_pending = false;
ret = rpmsg_get_signals(eptdev->ept);
if (ret >= 0)
ret = put_user(ret, (int __user *)arg);
-- 
2.7.4

[PATCH V4 1/4] rpmsg: core: Add signal API support

2020-05-12 Thread Arun Kumar Neelakantam

Some transports like Glink support the state notifications between
clients using signals similar to serial protocol signals.

Signed-off-by: Chris Lew 
Signed-off-by: Arun Kumar Neelakantam 
---
 drivers/rpmsg/rpmsg_core.c | 41 +
 drivers/rpmsg/rpmsg_internal.h |  5 +
 include/linux/rpmsg.h  | 26 ++
 3 files changed, 72 insertions(+)

diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index d6c3275..453790b 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -2,6 +2,7 @@
 /*
  * remote processor messaging bus
  *
+ * Copyright (c) 2018, The Linux Foundation.
  * Copyright (C) 2011 Texas Instruments, Inc.
  * Copyright (C) 2011 Google, Inc.
  *
@@ -283,6 +284,42 @@ int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, 
u32 src, u32 dst,
 }
 EXPORT_SYMBOL(rpmsg_trysend_offchannel);
 
+/**
+ * rpmsg_get_signals() - get the signals for this endpoint
+ * @ept:   the rpmsg endpoint
+ *
+ * Returns signal bits on success and an appropriate error value on failure.
+ */
+int rpmsg_get_signals(struct rpmsg_endpoint *ept)
+{
+   if (WARN_ON(!ept))
+   return -EINVAL;
+   if (!ept->ops->get_signals)
+   return -EOPNOTSUPP;
+
+   return ept->ops->get_signals(ept);
+}
+EXPORT_SYMBOL(rpmsg_get_signals);
+
+/**
+ * rpmsg_set_signals() - set the remote signals for this endpoint
+ * @ept:   the rpmsg endpoint
+ * @set:   set mask for signals
+ * @clear: clear mask for signals
+ *
+ * Returns 0 on success and an appropriate error value on failure.
+ */
+int rpmsg_set_signals(struct rpmsg_endpoint *ept, u32 set, u32 clear)
+{
+   if (WARN_ON(!ept))
+   return -EINVAL;
+   if (!ept->ops->set_signals)
+   return -EOPNOTSUPP;
+
+   return ept->ops->set_signals(ept, set, clear);
+}
+EXPORT_SYMBOL(rpmsg_set_signals);
+
 /*
  * match an rpmsg channel with a channel info struct.
  * this is used to make sure we're not creating rpmsg devices for channels
@@ -468,6 +505,10 @@ static int rpmsg_dev_probe(struct device *dev)
 
rpdev->ept = ept;
rpdev->src = ept->addr;
+
+   if (rpdrv->signals)
+   ept->sig_cb = rpdrv->signals;
+
}
 
err = rpdrv->probe(rpdev);
diff --git a/drivers/rpmsg/rpmsg_internal.h b/drivers/rpmsg/rpmsg_internal.h
index 3fc83cd..8958d6c 100644
--- a/drivers/rpmsg/rpmsg_internal.h
+++ b/drivers/rpmsg/rpmsg_internal.h
@@ -2,6 +2,7 @@
 /*
  * remote processor messaging bus internals
  *
+ * Copyright (c) 2018, The Linux Foundation.
  * Copyright (C) 2011 Texas Instruments, Inc.
  * Copyright (C) 2011 Google, Inc.
  *
@@ -47,6 +48,8 @@ struct rpmsg_device_ops {
  * @trysendto: see @rpmsg_trysendto(), optional
  * @trysend_offchannel:see @rpmsg_trysend_offchannel(), optional
  * @poll:  see @rpmsg_poll(), optional
+ * @get_signals:   see @rpmsg_get_signals(), optional
+ * @set_signals:   see @rpmsg_set_signals(), optional
  *
  * Indirection table for the operations that a rpmsg backend should implement.
  * In addition to @destroy_ept, the backend must at least implement @send and
@@ -66,6 +69,8 @@ struct rpmsg_endpoint_ops {
 void *data, int len);
__poll_t (*poll)(struct rpmsg_endpoint *ept, struct file *filp,
 poll_table *wait);
+   int (*get_signals)(struct rpmsg_endpoint *ept);
+   int (*set_signals)(struct rpmsg_endpoint *ept, u32 set, u32 clear);
 };
 
 int rpmsg_register_device(struct rpmsg_device *rpdev);
diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h
index 9fe156d..48c8ae3 100644
--- a/include/linux/rpmsg.h
+++ b/include/linux/rpmsg.h
@@ -2,6 +2,7 @@
 /*
  * Remote processor messaging
  *
+ * Copyright (c) 2018 The Linux Foundation.
  * Copyright (C) 2011 Texas Instruments, Inc.
  * Copyright (C) 2011 Google, Inc.
  * All rights reserved.
@@ -60,6 +61,7 @@ struct rpmsg_device {
 };
 
 typedef int (*rpmsg_rx_cb_t)(struct rpmsg_device *, void *, int, void *, u32);
+typedef int (*rpmsg_rx_sig_t)(struct rpmsg_device *, void *, u32, u32);
 
 /**
  * struct rpmsg_endpoint - binds a local rpmsg address to its user
@@ -67,6 +69,7 @@ typedef int (*rpmsg_rx_cb_t)(struct rpmsg_device *, void *, 
int, void *, u32);
  * @refcount: when this drops to zero, the ept is deallocated
  * @cb: rx callback handler
  * @cb_lock: must be taken before accessing/changing @cb
+ * @sig_cb: rx serial signal handler
  * @addr: local rpmsg address
  * @priv: private data for the driver's use
  *
@@ -89,6 +92,7 @@ struct rpmsg_endpoint {
struct kref refcount;
rpmsg_rx_cb_t cb;
struct mutex cb_lock;
+   rpmsg_rx_sig_t sig_cb;
u32 addr;
void *priv;
 
@@ -102,6 +106,7 @@ struct rpmsg_endpoint {
  * @probe: invoked when a matching rpmsg channel (i.e. device) is found
  * @remove: invoked when the

[PATCH V4 3/4] rpmsg: char: Add TIOCMGET/TIOCMSET ioctl support

2020-05-12 Thread Arun Kumar Neelakantam

Add TICOMGET and TIOCMSET ioctl support for rpmsg char device nodes
to get/set the low level transport signals.

Signed-off-by: Arun Kumar Neelakantam 
---
 drivers/rpmsg/rpmsg_char.c | 53 +++---
 1 file changed, 50 insertions(+), 3 deletions(-)

diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c
index 4bbbacd..e2f92f3 100644
--- a/drivers/rpmsg/rpmsg_char.c
+++ b/drivers/rpmsg/rpmsg_char.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (c) 2018, The Linux Foundation.
  * Copyright (c) 2016, Linaro Ltd.
  * Copyright (c) 2012, Michal Simek 
  * Copyright (c) 2012, PetaLogix
@@ -19,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -269,15 +271,60 @@ static __poll_t rpmsg_eptdev_poll(struct file *filp, 
poll_table *wait)
return mask;
 }
 
+static int rpmsg_eptdev_tiocmset(struct file *fp, unsigned int cmd,
+int __user *arg)
+{
+   struct rpmsg_eptdev *eptdev = fp->private_data;
+   u32 set, clear, val;
+   int ret;
+
+   ret = get_user(val, arg);
+   if (ret)
+   return ret;
+   set = clear = 0;
+   switch (cmd) {
+   case TIOCMBIS:
+   set = val;
+   break;
+   case TIOCMBIC:
+   clear = val;
+   break;
+   case TIOCMSET:
+   set = val;
+   clear = ~val;
+   break;
+   }
+
+   set &= TIOCM_DTR | TIOCM_RTS | TIOCM_CD | TIOCM_RI;
+   clear &= TIOCM_DTR | TIOCM_RTS | TIOCM_CD | TIOCM_RI;
+
+   return rpmsg_set_signals(eptdev->ept, set, clear);
+}
+
 static long rpmsg_eptdev_ioctl(struct file *fp, unsigned int cmd,
   unsigned long arg)
 {
struct rpmsg_eptdev *eptdev = fp->private_data;
+   int ret;
 
-   if (cmd != RPMSG_DESTROY_EPT_IOCTL)
-   return -EINVAL;
+   switch (cmd) {
+   case TIOCMGET:
+   ret = rpmsg_get_signals(eptdev->ept);
+   if (ret >= 0)
+   ret = put_user(ret, (int __user *)arg);
+   break;
+   case TIOCMSET:
+   case TIOCMBIS:
+   case TIOCMBIC:
+   ret = rpmsg_eptdev_tiocmset(fp, cmd, (int __user *)arg);
+   break;
+   case RPMSG_DESTROY_EPT_IOCTL:
+   ret = rpmsg_eptdev_destroy(>dev, NULL);
+   default:
+   ret = -EINVAL;
+   }
 
-   return rpmsg_eptdev_destroy(>dev, NULL);
+   return ret;
 }
 
 static const struct file_operations rpmsg_eptdev_fops = {
-- 
2.7.4

[PATCH V5 0/5] Add chrdev and name query support for GLINK

2020-05-12 Thread Arun Kumar Neelakantam

Add support for the GLINK rpmsg transport to register a rpmsg chrdev.
This will create the rpmsg_ctrl nodes for userspace clients to open 
rpmsg epts. The rpmsg chrdev allocation is done by allocating a local
channel which also allocates an ept. We need to add some guards against
edge cases for this chrdev because it will never fully open.

Changes since v4:
- Resending by removing approved patches

Changes since v3:
- Change to device_add_group for rpmsg name attr
- Add patch to unregister the rpmsg device
- Add patch to support compat ioctl for rpmsg char driver

Changes since v2:
- Revert change to make glink attribute table const

Changes since v1:
- Add explanation to dt-bindings commit message
- Add patch complete_all the open_req/ack variables
- Add patch to prevent null pointer dereference in chrdev channel release
- Change chrdev allocation to use glink channel allocation
- Change glink attr struct to const


Arun Kumar Neelakantam (1):
  rpmsg: glink: unregister rpmsg device during endpoint destroy

Chris Lew (4):
  rpmsg: glink: Use complete_all for open states
  rpmsg: Guard against null endpoint ops in destroy
  rpmsg: glink: Add support for rpmsg glink chrdev
  rpmsg: glink: Expose rpmsg name attr for glink

 drivers/rpmsg/qcom_glink_native.c | 79 +--
 drivers/rpmsg/rpmsg_core.c|  2 +-
 2 files changed, 77 insertions(+), 4 deletions(-)

-- 
2.7.4

[PATCH V5 2/5] rpmsg: Guard against null endpoint ops in destroy

2020-05-12 Thread Arun Kumar Neelakantam

From: Chris Lew 

In RPMSG GLINK the chrdev device will allocate an ept as part of the
rpdev creation. This device will not register endpoint ops even though
it has an allocated ept. Protect against the case where the device is
being destroyed.

Signed-off-by: Chris Lew 
Signed-off-by: Arun Kumar Neelakantam 
---
 drivers/rpmsg/rpmsg_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index e330ec4..d6c3275 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -81,7 +81,7 @@ EXPORT_SYMBOL(rpmsg_create_ept);
  */
 void rpmsg_destroy_ept(struct rpmsg_endpoint *ept)
 {
-   if (ept)
+   if (ept && ept->ops)
ept->ops->destroy_ept(ept);
 }
 EXPORT_SYMBOL(rpmsg_destroy_ept);
-- 
2.7.4

[PATCH V5 3/5] rpmsg: glink: Add support for rpmsg glink chrdev

2020-05-12 Thread Arun Kumar Neelakantam

From: Chris Lew 

RPMSG provides a char device interface to userspace. Probe the rpmsg
chrdev channel to enable the rpmsg_ctrl device creation on glink
transports.

Signed-off-by: Chris Lew 
Signed-off-by: Arun Kumar Neelakantam 
---
 drivers/rpmsg/qcom_glink_native.c | 40 ++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/rpmsg/qcom_glink_native.c 
b/drivers/rpmsg/qcom_glink_native.c
index 604f11f..3a7f87c 100644
--- a/drivers/rpmsg/qcom_glink_native.c
+++ b/drivers/rpmsg/qcom_glink_native.c
@@ -1178,7 +1178,7 @@ static int qcom_glink_announce_create(struct rpmsg_device 
*rpdev)
__be32 *val = defaults;
int size;
 
-   if (glink->intentless)
+   if (glink->intentless || !completion_done(>open_ack))
return 0;
 
prop = of_find_property(np, "qcom,intents", NULL);
@@ -1574,6 +1574,40 @@ static void qcom_glink_cancel_rx_work(struct qcom_glink 
*glink)
kfree(dcmd);
 }
 
+static void qcom_glink_device_release(struct device *dev)
+{
+   struct rpmsg_device *rpdev = to_rpmsg_device(dev);
+   struct glink_channel *channel = to_glink_channel(rpdev->ept);
+
+   /* Release qcom_glink_alloc_channel() reference */
+   kref_put(>refcount, qcom_glink_channel_release);
+   kfree(rpdev);
+}
+
+static int qcom_glink_create_chrdev(struct qcom_glink *glink)
+{
+   struct rpmsg_device *rpdev;
+   struct glink_channel *channel;
+
+   rpdev = kzalloc(sizeof(*rpdev), GFP_KERNEL);
+   if (!rpdev)
+   return -ENOMEM;
+
+   channel = qcom_glink_alloc_channel(glink, "rpmsg_chrdev");
+   if (IS_ERR(channel)) {
+   kfree(rpdev);
+   return PTR_ERR(channel);
+   }
+   channel->rpdev = rpdev;
+
+   rpdev->ept = >ept;
+   rpdev->ops = _device_ops;
+   rpdev->dev.parent = glink->dev;
+   rpdev->dev.release = qcom_glink_device_release;
+
+   return rpmsg_chrdev_register_device(rpdev);
+}
+
 struct qcom_glink *qcom_glink_native_probe(struct device *dev,
   unsigned long features,
   struct qcom_glink_pipe *rx,
@@ -1633,6 +1667,10 @@ struct qcom_glink *qcom_glink_native_probe(struct device 
*dev,
if (ret)
return ERR_PTR(ret);
 
+   ret = qcom_glink_create_chrdev(glink);
+   if (ret)
+   dev_err(glink->dev, "failed to register chrdev\n");
+
return glink;
 }
 EXPORT_SYMBOL_GPL(qcom_glink_native_probe);
-- 
2.7.4

[PATCH V5 1/5] rpmsg: glink: Use complete_all for open states

2020-05-12 Thread Arun Kumar Neelakantam

From: Chris Lew 

The open_req and open_ack completion variables are the state variables
to represet a remote channel as open. Use complete_all so there are no
races with waiters and using completion_done.

Signed-off-by: Chris Lew 
Signed-off-by: Arun Kumar Neelakantam 
---
 drivers/rpmsg/qcom_glink_native.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rpmsg/qcom_glink_native.c 
b/drivers/rpmsg/qcom_glink_native.c
index 1995f5b..604f11f 100644
--- a/drivers/rpmsg/qcom_glink_native.c
+++ b/drivers/rpmsg/qcom_glink_native.c
@@ -970,7 +970,7 @@ static int qcom_glink_rx_open_ack(struct qcom_glink *glink, 
unsigned int lcid)
return -EINVAL;
}
 
-   complete(>open_ack);
+   complete_all(>open_ack);
 
return 0;
 }
@@ -1413,7 +1413,7 @@ static int qcom_glink_rx_open(struct qcom_glink *glink, 
unsigned int rcid,
channel->rcid = ret;
spin_unlock_irqrestore(>idr_lock, flags);
 
-   complete(>open_req);
+   complete_all(>open_req);
 
if (create_device) {
rpdev = kzalloc(sizeof(*rpdev), GFP_KERNEL);
-- 
2.7.4

[PATCH V5 4/5] rpmsg: glink: Expose rpmsg name attr for glink

2020-05-12 Thread Arun Kumar Neelakantam

From: Chris Lew 

Expose the name field as an attr so clients listening to uevents for
rpmsg can identify the edge the events correspond to.

Signed-off-by: Chris Lew 
Signed-off-by: Arun Kumar Neelakantam 
---
 drivers/rpmsg/qcom_glink_native.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/drivers/rpmsg/qcom_glink_native.c 
b/drivers/rpmsg/qcom_glink_native.c
index 3a7f87c..0e8a28c0 100644
--- a/drivers/rpmsg/qcom_glink_native.c
+++ b/drivers/rpmsg/qcom_glink_native.c
@@ -1574,6 +1574,26 @@ static void qcom_glink_cancel_rx_work(struct qcom_glink 
*glink)
kfree(dcmd);
 }
 
+static ssize_t rpmsg_name_show(struct device *dev,
+  struct device_attribute *attr, char *buf)
+{
+   int ret = 0;
+   const char *name;
+
+   ret = of_property_read_string(dev->of_node, "label", );
+   if (ret < 0)
+   name = dev->of_node->name;
+
+   return snprintf(buf, RPMSG_NAME_SIZE, "%s\n", name);
+}
+static DEVICE_ATTR_RO(rpmsg_name);
+
+static struct attribute *qcom_glink_attrs[] = {
+   _attr_rpmsg_name.attr,
+   NULL
+};
+ATTRIBUTE_GROUPS(qcom_glink);
+
 static void qcom_glink_device_release(struct device *dev)
 {
struct rpmsg_device *rpdev = to_rpmsg_device(dev);
@@ -1638,6 +1658,12 @@ struct qcom_glink *qcom_glink_native_probe(struct device 
*dev,
idr_init(>lcids);
idr_init(>rcids);
 
+   glink->dev->groups = qcom_glink_groups;
+
+   ret = device_add_groups(dev, qcom_glink_groups);
+   if (ret)
+   dev_err(dev, "failed to add groups\n");
+
ret = of_property_read_string(dev->of_node, "label", >name);
if (ret < 0)
glink->name = dev->of_node->name;
-- 
2.7.4

[PATCH V5 5/5] rpmsg: glink: unregister rpmsg device during endpoint destroy

2020-05-12 Thread Arun Kumar Neelakantam

Rpmsg device unregister is not happening if channel close is triggered
from local side and causing re-registration of device failures.

Unregister rpmsg device for local close in endpoint destroy path.

Signed-off-by: Arun Kumar Neelakantam 
---
 drivers/rpmsg/qcom_glink_native.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/rpmsg/qcom_glink_native.c 
b/drivers/rpmsg/qcom_glink_native.c
index 0e8a28c0..fc8ef66 100644
--- a/drivers/rpmsg/qcom_glink_native.c
+++ b/drivers/rpmsg/qcom_glink_native.c
@@ -1207,6 +1207,7 @@ static void qcom_glink_destroy_ept(struct rpmsg_endpoint 
*ept)
 {
struct glink_channel *channel = to_glink_channel(ept);
struct qcom_glink *glink = channel->glink;
+   struct rpmsg_channel_info chinfo;
unsigned long flags;
 
spin_lock_irqsave(>recv_lock, flags);
@@ -1214,6 +1215,13 @@ static void qcom_glink_destroy_ept(struct rpmsg_endpoint 
*ept)
spin_unlock_irqrestore(>recv_lock, flags);
 
/* Decouple the potential rpdev from the channel */
+   if (channel->rpdev) {
+   strncpy(chinfo.name, channel->name, sizeof(chinfo.name));
+   chinfo.src = RPMSG_ADDR_ANY;
+   chinfo.dst = RPMSG_ADDR_ANY;
+
+   rpmsg_unregister_device(glink->dev, );
+   }
channel->rpdev = NULL;
 
qcom_glink_send_close_req(glink, channel);
@@ -1477,6 +1485,7 @@ static void qcom_glink_rx_close(struct qcom_glink *glink, 
unsigned int rcid)
 
rpmsg_unregister_device(glink->dev, );
}
+   channel->rpdev = NULL;
 
qcom_glink_send_close_ack(glink, channel->rcid);
 
-- 
2.7.4

Re: [PATCH v7 07/18] printk: Introduce kmsg_dump_reason_str()

2020-05-12 Thread Sergey Senozhatsky

On (20/05/10 13:24), Kees Cook wrote:
> The pstore subsystem already had a private version of this function.
> With the coming addition of the pstore/zone driver, this needs to be
> shared. As it really should live with printk, move it there instead.

Acked-by: Sergey Senozhatsky 

-ss

Re: [PATCH v11 33/56] Input: atmel_mxt_ts - delay enabling IRQ when not using regulators

2020-05-12 Thread Wang, Jiada


Hello Dmitry

On 2020/05/12 8:13, Dmitry Osipenko wrote:

11.05.2020 05:05, Wang, Jiada пишет:

Hello Dmitry

Thanks for your comment and test,

can you let me know which platform (board) you are using for test,
and DTS changes if you have added any.


That's this device-tree [1] without any extra changes.


I am using Samsung Chromebook Pro for testing,
but obviously some of the use cases it can't cover.

I also would like to test on same device you are using,
would you please let me know how to boot Acer Iconia Tab A500
with custom images. Are you booting Linux or Android on it?


[1]
https://patchwork.ozlabs.org/project/linux-tegra/patch/20200505022517.30523-2-dig...@gmail.com/

The v10 was working fine. I'm take a more detailed look at the problem
later this week.


Thanks, it is very helpful

Thanks,
Jiada

Re: [PATCH RFC tip/core/rcu] Add shrinker to shift to fast/inefficient GP mode

2020-05-12 Thread Dave Chinner

On Tue, May 12, 2020 at 08:18:26PM -0700, Paul E. McKenney wrote:
> On Wed, May 13, 2020 at 11:32:38AM +1000, Dave Chinner wrote:
> > On Sat, May 09, 2020 at 09:09:00AM -0700, Paul E. McKenney wrote:
> > > On Sat, May 09, 2020 at 11:54:40AM +0300, Konstantin Khlebnikov wrote:
> > > > On 08/05/2020 17.46, Paul E. McKenney wrote:
> > > > > Easy for me to provide "start fast and inefficient mode" and "stop 
> > > > > fast
> > > > > and inefficient mode" APIs for MM to call!
> > > > > 
> > > > > How about rcu_mempressure_start() and rcu_mempressure_end()?  I would
> > > > > expect them not to nest (as in if you need them to nest, please let
> > > > > me know).  I would not expect these to be invoked all that often (as 
> > > > > in
> > > > > if you do need them to be fast and scalable, please let me know). >
> > > > > RCU would then be in fast/inefficient mode if either MM told it to be
> > > > > or if RCU had detected callback overload on at least one CPU.
> > > > > 
> > > > > Seem reasonable?
> > > > 
> > > > Not exactly nested calls, but kswapd threads are per numa node.
> > > > So, at some level nodes under pressure must be counted.
> > > 
> > > Easy enough, especially given that RCU already "counts" CPUs having
> > > excessive numbers of callbacks.  But assuming that the transitions to/from
> > > OOM are rare, I would start by just counting them with a global counter.
> > > If the counter is non-zero, RCU is in fast and inefficient mode.
> > > 
> > > > Also forcing rcu calls only for cpus in one numa node might be useful.
> > > 
> > > Interesting.  RCU currently evaluates a given CPU by comparing the
> > > number of callbacks against a fixed cutoff that can be set at boot using
> > > rcutree.qhimark, which defaults to 10,000.  When this cutoff is exceeded,
> > > RCU becomes more aggressive about invoking callbacks on that CPU, for
> > > example, by sacrificing some degree of real-time response.  I believe
> > > that this heuristic would also serve the OOM use case well.
> > 
> > So one of the things that I'm not sure people have connected here is
> > that memory reclaim done by shrinkers is one of the things that
> > drives huge numbers of call_rcu() callbacks to free memory via rcu.
> > If we are reclaiming dentries and inodes, then we can be pushing
> > thousands to hundreds of thousands of objects into kfree_rcu()
> > and/or direct call_rcu() calls to free these objects in a single
> > reclaim pass. 
> 
> Good point!
> 
> > Hence the trigger for RCU going into "excessive callback" mode
> > might, in fact, be kswapd running a pass over the shrinkers. i.e.
> > memory reclaim itself can be responsible for pushing RCU into this "OOM
> > pressure" situation.
> > 
> > So perhaps we've missed a trick here by not having the memory
> > reclaim routines trigger RCU callbacks at the end of a priority
> > scan. The shrinkers have queued the objects for freeing, but they
> > haven't actually been freed yet and so things like slab pages
> > haven't actually been returned to the free pool even though the
> > shrinkers have said "freed this many objects"...
> > 
> > i.e. perhaps the right solution here is a "rcu_run_callbacks()"
> > function that memory reclaim calls before backing off and/or winding
> > up reclaim priority.
> 
> It would not be hard to make something that put RCU into fast/inefficient
> mode for a couple of grace periods.  I will also look into the possibility
> of speeding up callback invocation.
> 
> It might also make sense to put RCU grace periods into fast mode while
> running the shrinkers that are freeing dentries and inodes.  However,
> kbuild test robot reports ugly regressions when putting RCU into
> fast/inefficient mode to quickly and too often.  As in 78.5% degradation
> on one of the benchmarks.

I don't think it should be dependent on what specific shrinkers
free. There are other objects that may be RCU freed by shrinkers,
so it really shouldn't be applied just to specific shrinker
instances.

> > > > I wonder if direct-reclaim should at some stage simply wait for RCU QS.
> > > > I.e. call rcu_barrier() or similar somewhere before invoking OOM.
> > > 
> > > The rcu_oom_count() function in the patch starting this thread returns the
> > > total number of outstanding callbacks queued on all CPUs.  So one approach
> > > would be to invoke this function, and if the return value was truly
> > > huge (taking size of memory and who knows that all else into account),
> > > do the rcu_barrier() to wait for RCU to clear its current backlog.
> > 
> > The shrinker scan control structure has a node mask in it to
> > indicate what node (and hence CPUs) it should be reclaiming from.
> > This information comes from the main reclaim scan routine, so it
> > would be trivial to feed straight into the RCU code to have it
> > act on just the CPUs/node that we are reclaiming memory from...
> 
> For the callbacks, RCU can operate on CPUs, in theory anyway.  The
> grace period itself, however, is inherently

Re: net/sonic: Software evolution around the application of coding standards

2020-05-12 Thread Markus Elfring

> When the people who write and review the coding standards are the same
> people who write and review the code, the standards devolve (given the
> prevailing incentives).

A coding style is applied also for Linux software. This coding style
supports some alternatives for implementation details.
Deviations from the recommended style are occasionally tolerated.
But some developers care to improve the compliance with the current standard
at various source code places, don't they?

Regards,
Markus

Re: [PATCH v2 1/4] soc: mediatek: Enable mmsys driver by default if Mediatek arch is selected

2020-05-12 Thread Hsin-Yi Wang

On Thu, Apr 2, 2020 at 4:17 AM Enric Balletbo i Serra
 wrote:
>
> The mmsys driver supports only MT8173 device for now, but like other system
> controllers is an important piece for other Mediatek devices. Actually
> it depends on the mt8173 clock specific driver but that dependency is
> not real as it can build without the clock driver. Instead of depends on
> a specific model, make the driver depends on the generic ARCH_MEDIATEK and
> enable by default so other Mediatek devices can start using it without
> flood the Kconfig.
>
> Signed-off-by: Enric Balletbo i Serra 
Tested-by: Hsin-Yi Wang

[PATCH v2 1/2] powerpc/rtas: Move type/struct definitions from rtas.h into rtas-types.h

2020-05-12 Thread Leonardo Bras

In order to get any rtas* struct into other headers, including rtas.h
may cause a lot of errors, regarding include dependency needed for
inline functions.

Create rtas-types.h and move there all type/struct definitions
from rtas.h, then include rtas-types.h into rtas.h.

Also, as suggested by checkpath.pl, replace uint8_t for u8, and keep
the same type pattern for the whole file, as they are the same
according to powerpc/boot/types.h.

Signed-off-by: Leonardo Bras 
---
 arch/powerpc/include/asm/rtas-types.h | 124 ++
 arch/powerpc/include/asm/rtas.h   | 118 +---
 2 files changed, 125 insertions(+), 117 deletions(-)
 create mode 100644 arch/powerpc/include/asm/rtas-types.h

diff --git a/arch/powerpc/include/asm/rtas-types.h 
b/arch/powerpc/include/asm/rtas-types.h
new file mode 100644
index ..59b0b4b25b7a
--- /dev/null
+++ b/arch/powerpc/include/asm/rtas-types.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _POWERPC_RTAS_TYPES_H
+#define _POWERPC_RTAS_TYPES_H
+#ifdef __KERNEL__
+
+typedef __be32 rtas_arg_t;
+
+struct rtas_args {
+   __be32 token;
+   __be32 nargs;
+   __be32 nret;
+   rtas_arg_t args[16];
+   rtas_arg_t *rets; /* Pointer to return values in args[]. */
+};
+
+struct rtas_t {
+   unsigned long entry;/* physical address pointer */
+   unsigned long base; /* physical address pointer */
+   unsigned long size;
+   arch_spinlock_t lock;
+   struct rtas_args args;
+   struct device_node *dev;/* virtual address pointer */
+};
+
+struct rtas_suspend_me_data {
+   atomic_t working; /* number of cpus accessing this struct */
+   atomic_t done;
+   int token; /* ibm,suspend-me */
+   atomic_t error;
+   struct completion *complete; /* wait on this until working == 0 */
+};
+
+struct rtas_error_log {
+   /* Byte 0 */
+   u8  byte0;  /* Architectural version */
+
+   /* Byte 1 */
+   u8  byte1;
+   /* 
+* XXX  3: Severity level of error
+*XX2: Degree of recovery
+*  X   1: Extended log present?
+*   XX 2: Reserved
+*/
+
+   /* Byte 2 */
+   u8  byte2;
+   /* 
+*  4: Initiator of event
+*  4: Target of failed operation
+*/
+   u8  byte3;  /* General event or error*/
+   __be32  extended_log_length;/* length in bytes */
+   unsigned char   buffer[1];  /* Start of extended log */
+   /* Variable length.  */
+};
+
+/* RTAS general extended event log, Version 6. The extended log starts
+ * from "buffer" field of struct rtas_error_log defined above.
+ */
+struct rtas_ext_event_log_v6 {
+   /* Byte 0 */
+   u8 byte0;
+   /* 
+* X1: Log valid
+*  X   1: Unrecoverable error
+*   X  1: Recoverable (correctable or successfully retried)
+*X 1: Bypassed unrecoverable error (degraded operation)
+* X1: Predictive error
+*  X   1: "New" log (always 1 for data returned from RTAS)
+*   X  1: Big Endian
+*X 1: Reserved
+*/
+
+   /* Byte 1 */
+   u8 byte1;   /* reserved */
+
+   /* Byte 2 */
+   u8 byte2;
+   /* 
+* X1: Set to 1 (indicating log is in PowerPC format)
+*  XXX 3: Reserved
+*  4: Log format used for bytes 12-2047
+*/
+
+   /* Byte 3 */
+   u8 byte3;   /* reserved */
+   /* Byte 4-11 */
+   u8 reserved[8]; /* reserved */
+   /* Byte 12-15 */
+   __be32  company_id; /* Company ID of the company*/
+   /* that defines the format for  */
+   /* the vendor specific log type */
+   /* Byte 16-end of log */
+   u8 vendor_log[1];   /* Start of vendor specific log */
+   /* Variable length. */
+};
+
+/* Vendor specific Platform Event Log Format, Version 6, section header */
+struct pseries_errorlog {
+   __be16 id;  /* 0x00 2-byte ASCII section ID */
+   __be16 length;  /* 0x02 Section length in bytes */
+   u8 version; /* 0x04 Section version */
+   u8 subtype; /* 0x05 Section subtype */
+   __be16 creator_component;   /* 0x06 Creator component ID*/
+   u8 data[];  /* 0x08 Start of section data   */
+};
+
+/* RTAS pseries hotplug errorlog section */
+struct

[PATCH v2 2/2] powerpc/rtas: Implement reentrant rtas call

2020-05-12 Thread Leonardo Bras

Implement rtas_call_reentrant() for reentrant rtas-calls:
"ibm,int-on", "ibm,int-off",ibm,get-xive" and  "ibm,set-xive".

On LoPAPR Version 1.1 (March 24, 2016), from 7.3.10.1 to 7.3.10.4,
items 2 and 3 say:

2 - For the PowerPC External Interrupt option: The * call must be
reentrant to the number of processors on the platform.
3 - For the PowerPC External Interrupt option: The * argument call
buffer for each simultaneous call must be physically unique.

So, these rtas-calls can be called in a lockless way, if using
a different buffer for each call.

This can be useful to avoid deadlocks in crashing, where rtas-calls are
needed, but some other thread crashed holding the rtas.lock.

This is an example backtrace of deadlock noticed:

  #0 arch_spin_lock
  #1  lock_rtas () 
  #2  rtas_call (token=8204, nargs=1, nret=1, outputs=0x0)
  #3  ics_rtas_mask_real_irq (hw_irq=4100) 
  #4  machine_kexec_mask_interrupts
  #5  default_machine_crash_shutdown
  #6  machine_crash_shutdown 
  #7  __crash_kexec
  #8  crash_kexec
  #9  oops_end


Signed-off-by: Leonardo Bras 

---
Changes since v1:
- Moved buffer from stack to PACA (as suggested by Paul Mackerras)
- Added missing output bits
- Improve documentation following kernel-doc format (as suggested by
  Nathan Lynch)
---
 arch/powerpc/include/asm/paca.h |  2 ++
 arch/powerpc/include/asm/rtas.h |  1 +
 arch/powerpc/kernel/rtas.c  | 42 +
 arch/powerpc/sysdev/xics/ics-rtas.c | 22 +++
 4 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index e3cc9eb9204d..5a76ba50b40f 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -270,6 +271,7 @@ struct paca_struct {
 #ifdef CONFIG_MMIOWB
struct mmiowb_state mmiowb_state;
 #endif
+   struct rtas_args reentrant_args;
 } cacheline_aligned;
 
 extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index c35c5350b7e4..fa7509c85881 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -236,6 +236,7 @@ extern struct rtas_t rtas;
 extern int rtas_token(const char *service);
 extern int rtas_service_present(const char *service);
 extern int rtas_call(int token, int, int, int *, ...);
+int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...);
 void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
int nret, ...);
 extern void __noreturn rtas_restart(char *cmd);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index c5fa251b8950..d426b5c4856c 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -41,6 +41,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* This is here deliberately so it's only used in this file */
 void enter_rtas(unsigned long);
@@ -483,6 +484,47 @@ int rtas_call(int token, int nargs, int nret, int 
*outputs, ...)
 }
 EXPORT_SYMBOL(rtas_call);
 
+/**
+ * rtas_call_reentrant() - Used for reentrant rtas calls
+ * @token: Token for desired reentrant RTAS call
+ * @nargs: Number of Input Parameters
+ * @nret:  Number of Output Parameters
+ * @outputs:   Array of outputs
+ * @...:   Inputs for desired RTAS call
+ *
+ * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off",
+ * "ibm,get-xive" and "ibm,set-xive" are currently reentrant.
+ * Reentrant calls need their own rtas_args buffer, so not using rtas.args, but
+ * PACA one instead.
+ *
+ * Return: -1 on error,
+ * First output value of RTAS call if (nret > 0),
+ * 0 otherwise,
+ */
+
+int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...)
+{
+   va_list list;
+   struct rtas_args *args;
+   int i;
+
+   if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
+   return -1;
+
+   /* We use the per-cpu (PACA) rtas args buffer */
+   args = _paca->reentrant_args;
+
+   va_start(list, outputs);
+   va_rtas_call_unlocked(args, token, nargs, nret, list);
+   va_end(list);
+
+   if (nret > 1 && outputs)
+   for (i = 0; i < nret - 1; ++i)
+   outputs[i] = be32_to_cpu(args->rets[i + 1]);
+
+   return (nret > 0) ? be32_to_cpu(args->rets[0]) : 0;
+}
+
 /* For RTAS_BUSY (-2), delay for 1 millisecond.  For an extended busy status
  * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds.
  */
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c 
b/arch/powerpc/sysdev/xics/ics-rtas.c
index 6aabc74688a6..4cf18000f07c 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -50,8 +50,8 @@ static void ics_rtas_unmask_irq(struct irq_data *d)
 
server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);

Re: [PATCH v2 1/1] powerpc/crash: Use NMI context for printk when starting to crash

2020-05-12 Thread Nicholas Piggin

Excerpts from Leonardo Bras's message of May 13, 2020 7:45 am:
> Currently, if printk lock (logbuf_lock) is held by other thread during
> crash, there is a chance of deadlocking the crash on next printk, and
> blocking a possibly desired kdump.
> 
> At the start of default_machine_crash_shutdown, make printk enter
> NMI context, as it will use per-cpu buffers to store the message,
> and avoid locking logbuf_lock.

printk_nmi_enter is used in one other place outside nmi_enter.

Is there a different/better way to handle this? What do other 
architectures do?

Other subsystems get put into an nmi-mode when we call nmi_enter
(lockdep, ftrace, rcu etc). It seems like those would be useful for 
similar reasons, so at least explaining why that is not used in a 
comment would be good.

Aside from that, I welcome any effort to make our crashes more reliable
so thanks for working on this stuff.

Thanks,
Nick

> 
> Suggested-by: Michael Ellerman 
> Signed-off-by: Leonardo Bras 
> 
> ---
> Changes since v1:
> - Added in-code comment explaining the need of context change
> - Function moved to the start of default_machine_crash_shutdown,
>   to avoid locking any printk on crashing routine.
> - Title was 'Use NMI context for printk after crashing other CPUs'
> 
> ---
>  arch/powerpc/kexec/crash.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
> index d488311efab1..c9a889880214 100644
> --- a/arch/powerpc/kexec/crash.c
> +++ b/arch/powerpc/kexec/crash.c
> @@ -311,6 +311,9 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
>   unsigned int i;
>   int (*old_handler)(struct pt_regs *regs);
>  
> + /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */
> + printk_nmi_enter();
> +
>   /*
>* This function is only called after the system
>* has panicked or is otherwise in a critical state.
> -- 
> 2.25.4
> 
>

Re: [PATCH RFC tip/core/rcu] Add shrinker to shift to fast/inefficient GP mode

2020-05-12 Thread Konstantin Khlebnikov


On 13/05/2020 06.18, Paul E. McKenney wrote:

On Wed, May 13, 2020 at 11:32:38AM +1000, Dave Chinner wrote:

On Sat, May 09, 2020 at 09:09:00AM -0700, Paul E. McKenney wrote:

On Sat, May 09, 2020 at 11:54:40AM +0300, Konstantin Khlebnikov wrote:

On 08/05/2020 17.46, Paul E. McKenney wrote:

Easy for me to provide "start fast and inefficient mode" and "stop fast
and inefficient mode" APIs for MM to call!

How about rcu_mempressure_start() and rcu_mempressure_end()?  I would
expect them not to nest (as in if you need them to nest, please let
me know).  I would not expect these to be invoked all that often (as in
if you do need them to be fast and scalable, please let me know). >
RCU would then be in fast/inefficient mode if either MM told it to be
or if RCU had detected callback overload on at least one CPU.

Seem reasonable?


Not exactly nested calls, but kswapd threads are per numa node.
So, at some level nodes under pressure must be counted.


Easy enough, especially given that RCU already "counts" CPUs having
excessive numbers of callbacks.  But assuming that the transitions to/from
OOM are rare, I would start by just counting them with a global counter.
If the counter is non-zero, RCU is in fast and inefficient mode.


Also forcing rcu calls only for cpus in one numa node might be useful.


Interesting.  RCU currently evaluates a given CPU by comparing the
number of callbacks against a fixed cutoff that can be set at boot using
rcutree.qhimark, which defaults to 10,000.  When this cutoff is exceeded,
RCU becomes more aggressive about invoking callbacks on that CPU, for
example, by sacrificing some degree of real-time response.  I believe
that this heuristic would also serve the OOM use case well.


So one of the things that I'm not sure people have connected here is
that memory reclaim done by shrinkers is one of the things that
drives huge numbers of call_rcu() callbacks to free memory via rcu.
If we are reclaiming dentries and inodes, then we can be pushing
thousands to hundreds of thousands of objects into kfree_rcu()
and/or direct call_rcu() calls to free these objects in a single
reclaim pass.


Good point!


Indeed




Hence the trigger for RCU going into "excessive callback" mode
might, in fact, be kswapd running a pass over the shrinkers. i.e.
memory reclaim itself can be responsible for pushing RCU into this "OOM
pressure" situation.

So perhaps we've missed a trick here by not having the memory
reclaim routines trigger RCU callbacks at the end of a priority
scan. The shrinkers have queued the objects for freeing, but they
haven't actually been freed yet and so things like slab pages
haven't actually been returned to the free pool even though the
shrinkers have said "freed this many objects"...

i.e. perhaps the right solution here is a "rcu_run_callbacks()"
function that memory reclaim calls before backing off and/or winding
up reclaim priority.


It would not be hard to make something that put RCU into fast/inefficient
mode for a couple of grace periods.  I will also look into the possibility
of speeding up callback invocation.

It might also make sense to put RCU grace periods into fast mode while
running the shrinkers that are freeing dentries and inodes.  However,
kbuild test robot reports ugly regressions when putting RCU into
fast/inefficient mode to quickly and too often.  As in 78.5% degradation
on one of the benchmarks.


I think fast/inefficient mode here just an optimization for freeing
memory faster. It doesn't solve the problem itself.

At first we have to close the loop in reclaimer and actually wait or run
rcu callbacks which might free memory before increasing priority and
invoking OOM killer.




I wonder if direct-reclaim should at some stage simply wait for RCU QS.
I.e. call rcu_barrier() or similar somewhere before invoking OOM.


The rcu_oom_count() function in the patch starting this thread returns the
total number of outstanding callbacks queued on all CPUs.  So one approach
would be to invoke this function, and if the return value was truly
huge (taking size of memory and who knows that all else into account),
do the rcu_barrier() to wait for RCU to clear its current backlog.


The shrinker scan control structure has a node mask in it to
indicate what node (and hence CPUs) it should be reclaiming from.
This information comes from the main reclaim scan routine, so it
would be trivial to feed straight into the RCU code to have it
act on just the CPUs/node that we are reclaiming memory from...


For the callbacks, RCU can operate on CPUs, in theory anyway.  The
grace period itself, however, is inherently global.


On the NUMA point, it would be dead easy for me to supply a function
that returned the number of callbacks on a given CPU, which would allow
you to similarly evaluate a NUMA node, a cgroup, or whatever.


I'd think it runs the other way around - we optimisitically call the
RCU layer to do cleanup, and the RCU layer decides if there's enough
queued callbacks

Re: [PATCH] gpiolib: add GPIO_SET_DEBOUNCE_IOCTL

2020-05-12 Thread Kent Gibson

On Tue, May 12, 2020 at 07:55:42PM +0200, Linus Walleij wrote:
> On Mon, May 4, 2020 at 12:32 PM Bartosz Golaszewski  wrote:
> 

I hope Bart doesn't mind if I jump in here, but I've started working on
this so hopefully I can address most of your points...

> > Ideally we'd have to introduce new versions of gpioevent_request,
> > gpioline_request, gpioline_info and gpioevent_data structs - this time
> > with enough additional padding and no alignment issues. Then we could
> > add the debounce properly.
> 
> Hm that sounds massive. Is it really that bad?
> 

Agreed - it is massive - we end up replacing the majority of the
existing structs and ioctls.

If we want to be able to set debounce in the request(s), not just in
SET_CONFIG, then we need new requests as there is no room in the
existing.  If we want to be able to report that config in the info then
we need new infos for the same reason.  The info_changed contains an
info so that has to change as well. And the event_data has a 32/64bit
alignment issue so it was already up for replacement.

So it could be worse, but not much.

> > This would of course add a lot of cruft to the uAPI code. I'd start by
> > moving it out of drivers/gpio/gpiolib.c into a new file:
> > drivers/gpio/gpiolib-cdev.c. This way we'd have everything related to
> > the character device in one place. It would make it easier to: a) add
> > a config option for disabling it entirely and b) add a config option
> > to disable the v1 of the ioctl()s.
> 
> Its good to break out for code maintenance no matter what we do
> with it :)
>

It definitely is, and I'll submit a patch soon, that hopefully can be
applied immediately before the next dev window opens, to do just that.

> I would however not make it in any way totally optional, because the
> big win with the character device over the legacy sysfs is to always
> be available.
> 

And if you build it into your kernel, which will be the default, it
still will be.

But maybe there are specific applications that don't need cdev and
would be interested in reducing kernel bloat?

> > Linus: about the software-debounce you mentioned: do you think it
> > somehow plugs the hole we identified here?
> 
> Hm, I don't quite understand what the hole is I guess...
> 

I'll leave this one for Bart - the more I re-read the thread the less
certain I am as well.

I will note that Bart correctly mentioned that the uapi doesn't return
an error if the user requests bias that is not supported by the driver
- gpio_set_bias absorbs the error.

That isn't by intent - it is the way gpiod_direction_input
behaved before I added bias to cdev. It was left that way as I was
unsure on the broader implications of changing it, and wasn't keen on
implementing a cdev specific gpiod_direction_input either.
I'm open to suggestions if you would like to change that.

Cheers,
Kent.

Re: [PATCH 1/1] powerpc/rtas: Implement reentrant rtas call

2020-05-12 Thread Leonardo Bras

Hello Nathan, thanks for the feedback!

On Fri, 2020-04-10 at 14:28 -0500, Nathan Lynch wrote:
> Leonardo Bras  writes:
> > Implement rtas_call_reentrant() for reentrant rtas-calls:
> > "ibm,int-on", "ibm,int-off",ibm,get-xive" and  "ibm,set-xive".
> > 
> > On LoPAPR Version 1.1 (March 24, 2016), from 7.3.10.1 to 7.3.10.4,
> > items 2 and 3 say:
> > 
> > 2 - For the PowerPC External Interrupt option: The * call must be
> > reentrant to the number of processors on the platform.
> > 3 - For the PowerPC External Interrupt option: The * argument call
> > buffer for each simultaneous call must be physically unique.
> > 
> > So, these rtas-calls can be called in a lockless way, if using
> > a different buffer for each call.
> > 

> From the language in the spec it's clear that these calls are intended
> to be reentrant with respect to themselves, but it's less clear to me
> that they are safe to call simultaneously with respect to each other or
> arbitrary other RTAS methods.

In my viewpoint, being reentrant to themselves, without being reentrant
to others would be very difficult to do, considering the way the
rtas_call is crafted to work.

I mean, I have no experience in rtas code, it's my viewpoint. In my
thoughts there is something like this:

common_path -> selects function by token -> reentrant function
|-> non-reentrant function

If there is one function that is reentrant, it means the common_path
and function selection by token would need to be reentrant too.

> > This can be useful to avoid deadlocks in crashing, where rtas-calls are
> > needed, but some other thread crashed holding the rtas.lock.
> 
> Are these calls commonly used in the crash-handling path? Is this
> addressing a real issue you've seen?
> 

Yes, I noticed deadlocks during crashes, like this one:
#0 arch_spin_lock
#1  lock_rtas () 
#2  rtas_call (token=8204, nargs=1, nret=1, outputs=0x0)
#3  ics_rtas_mask_real_irq (hw_irq=4100) 
#4  machine_kexec_mask_interrupts
#5  default_machine_crash_shutdown
#6  machine_crash_shutdown 
#7  __crash_kexec
#8  crash_kexec
#9  oops_end

On ics_rtas_mask_real_irq() we have both ibm_int_off and ibm_set_xive,
so it makes sense to also add ibm_int_on and ibm_get_xive as reentrant
too.

Full discussion available on this thread:
http://patchwork.ozlabs.org/project/linuxppc-dev/patch/2020040120.590447-1-leona...@linux.ibm.com/

> 
> > +/*
> > + * Used for reentrant rtas calls.
> > + * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off",
> > + * "ibm,get-xive" and "ibm,set-xive" are currently reentrant.
> > + * Reentrant calls need their own rtas_args buffer, so not using rtas.args.
> > + */
> 
> Please use kernel-doc format in new code.

Sure, v2 is going to be fixed.

> 
> 
> > +int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...)
> > +{
> > +   va_list list;
> > +   struct rtas_args rtas_args;
> > +
> > +   if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
> > +   return -1;
> > +
> > +   va_start(list, outputs);
> > +   va_rtas_call_unlocked(_args, token, nargs, nret, list);
> > +   va_end(list);
> 
> No, I don't think you can place the RTAS argument buffer on the stack:
> 
>   7.2.7, Software Implementation Note:
>   | The OS must be aware that the effective address range for RTAS is 4
>   | GB when instantiated in 32-bit mode and the OS should not pass RTAS
>   | addresses or blocks of data which might fall outside of this range.

Agree, moved to PACA.

I will send a v2 soon, it will be a 2-patch patchset.

Best regards,
Leonardo Bras


signature.asc
Description: This is a digitally signed message part

[RFC PATCH] kunit: Support skipped tests

2020-05-12 Thread David Gow

This is a proof-of-concept to support "skipping" tests.

The kunit_mark_skipped() macro marks the current test as "skipped", with
the provided reason. The kunit_skip() macro will mark the test as
skipped, and abort the test.

The TAP specification supports this "SKIP directive" as a comment after
the "ok" / "not ok" for a test. See the "Directives" section of the TAP
spec for details:
https://testanything.org/tap-specification.html#directives

kunit_tool will parse this SKIP directive, and renders skipped tests in
yellow and counts them. Skipped tests do not affect the result for a
suite.

Signed-off-by: David Gow 
---

Following on from discussions about the KCSAN test[1], which requires a
multi-core/processor system to make sense, it would be useful for tests
to be able to mark themselves as "skipped", where tests have runtime
dependencies which aren't met.

As a proof-of-concept, this patch doesn't implement some things which
we'd ideally like to have (e.g., non-static "reasons" for skipping the
test, maybe some SKIP macros akin to the EXPECT and ASSERT ones), and
the implementation is still pretty hacky, but I though I'd put this out
there to see if there are any thoughts on the concept in general.

Cheers,
-- David

[1]: https://lkml.org/lkml/2020/5/5/31

 include/kunit/test.h| 12 
 lib/kunit/kunit-example-test.c  |  7 +++
 lib/kunit/test.c| 23 ---
 tools/testing/kunit/kunit_parser.py | 21 +
 4 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/include/kunit/test.h b/include/kunit/test.h
index 9b0c46a6ca1f..7817c5580b2c 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -178,6 +178,7 @@ struct kunit_suite {
/* private - internal use only */
struct dentry *debugfs;
char *log;
+   const char *skip_directive;
 };
 
 /**
@@ -213,6 +214,8 @@ struct kunit {
 * protect it with some type of lock.
 */
struct list_head resources; /* Protected by lock. */
+
+   const char *skip_directive;
 };
 
 void kunit_init_test(struct kunit *test, const char *name, char *log);
@@ -391,6 +394,15 @@ void kunit_cleanup(struct kunit *test);
 
 void kunit_log_append(char *log, const char *fmt, ...);
 
+#define kunit_mark_skipped(test_or_suite, reason)  \
+   (test_or_suite)->skip_directive = "SKIP " reason
+
+#define kunit_skip(test_or_suite, reason)  \
+   do {\
+   kunit_mark_skipped(test_or_suite, reason);  \
+   kunit_try_catch_throw(&((test_or_suite)->try_catch));   \
+   } while (0)
+
 /*
  * printk and log to per-test or per-suite log buffer.  Logging only done
  * if CONFIG_KUNIT_DEBUGFS is 'y'; if it is 'n', no log is allocated/used.
diff --git a/lib/kunit/kunit-example-test.c b/lib/kunit/kunit-example-test.c
index be1164ecc476..998401a61458 100644
--- a/lib/kunit/kunit-example-test.c
+++ b/lib/kunit/kunit-example-test.c
@@ -29,6 +29,12 @@ static void example_simple_test(struct kunit *test)
KUNIT_EXPECT_EQ(test, 1 + 1, 2);
 }
 
+static void example_skip_test(struct kunit *test)
+{
+   kunit_skip(test, "this test should be skipped");
+   KUNIT_EXPECT_EQ(test, 1 + 1, 2);
+}
+
 /*
  * This is run once before each test case, see the comment on
  * example_test_suite for more information.
@@ -52,6 +58,7 @@ static struct kunit_case example_test_cases[] = {
 * test suite.
 */
KUNIT_CASE(example_simple_test),
+   KUNIT_CASE(example_skip_test),
{}
 };
 
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index ccb2ffad8dcf..84b9be3a8da7 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -79,10 +79,12 @@ static void kunit_print_ok_not_ok(void *test_or_suite,
  bool is_test,
  bool is_ok,
  size_t test_number,
- const char *description)
+ const char *description,
+ const char *directive)
 {
struct kunit_suite *suite = is_test ? NULL : test_or_suite;
struct kunit *test = is_test ? test_or_suite : NULL;
+   const char *directive_header = directive ? " # " : "";
 
/*
 * We do not log the test suite results as doing so would
@@ -93,13 +95,16 @@ static void kunit_print_ok_not_ok(void *test_or_suite,
 * representation.
 */
if (suite)
-   pr_info("%s %zd - %s\n",
+   pr_info("%s %zd - %s%s%s\n",
kunit_status_to_string(is_ok),
-   test_number, description);
+   test_number, description,
+   directive_header, directive ? directive : "");
else
-   kunit_log(KERN_INFO, test,

Virtual LAPIC page corruption

2020-05-12 Thread Suresh Gumpula

Hi,

We are a seeing a problem with windows guests(2016/2012R2) where guest crashes 
with 
Virtual APIC page corruption similar to the following redhat ticket.
https://bugzilla.redhat.com/show_bug.cgi?id=1751017

> Arg4: 0017, Type of corrupted region, can be
16  : Critical floating point control register modification
17  : Local APIC modification

Here, we are seeing the corruption LAPIC page and guest is BSOD'ing.
Looking at the guest windows dump, we see the full page is zeroed. And it seems 
the 
Guest windows kernel patchguard is detecting this case and resetting the VM.

Is it possible that KVM, somehow corrupted the virtual LAPIC page?  While the 
guest is running
the KVM is not supposed to touch that vcpu lapic page?

Could you please give us some pointers on what could wrong here. Is it a known 
issue in the kvm?
We are using the host kernel 4.19 and qemu 2.12 and windows guests(2016/2012)


Thanks,
Suresh

Re: [PATCH v4 2/2] mailbox: sprd: Add Spreadtrum mailbox driver

2020-05-12 Thread Baolin Wang

Hi Jassi,

On Thu, May 7, 2020 at 11:23 AM Baolin Wang  wrote:
>
> Hi Jassi,
>
> On Thu, May 7, 2020 at 7:25 AM Jassi Brar  wrote:
> >
> > On Wed, May 6, 2020 at 8:29 AM Baolin Wang  wrote:
> > >
> > > Hi Jassi,
> > >
> > > On Tue, Apr 28, 2020 at 11:10 AM Baolin Wang  
> > > wrote:
> > > >
> > > > From: Baolin Wang 
> > > >
> > > > The Spreadtrum mailbox controller supports 8 channels to communicate
> > > > with MCUs, and it contains 2 different parts: inbox and outbox, which
> > > > are used to send and receive messages by IRQ mode.
> > > >
> > > > Signed-off-by: Baolin Wang 
> > > > Signed-off-by: Baolin Wang 
> > > > ---
> > > > Changes from v3:
> > > >  - Save the id in mbox_chan.con_priv and remove the 'sprd_mbox_chan'
> > > >
> > > > Changes from v2:
> > > >  - None.
> > > >
> > > > Changes from v1:
> > > >  - None
> > >
> > > Gentle ping, do you have any other comments? Thanks.
> > >
> > Yea, I am still not sure about the error returned in send_data().  It
> > will either never hit or there will be no easy recovery from it. The
> > api expects the driver to tell it the last-tx was done only when it
> > can send the next message. (There may be case like sending depend on
> > remote, which can't be ensured before hand).
>
> Actually this is an unusual case, suppose the remote target did not
> fetch the message as soon as possile, which will cause the FIFO
> overflow, so in this case we  can not send messages to the remote
> target any more, otherwise messages will be lost. Thus we can return
> errors to users to indicate that something wrong with the remote
> target need to be checked.
>
> So this validation in send_data() is mostly for debugging for this
> abnormal case and we will not trigger this issue if the remote target
> works well. So I think it is useful to keep this validation in
> send_data(). Thanks.

Any comments? Thanks.

-- 
Baolin Wang

Re: [PATCH V2] ifcvf: move IRQ request/free to status change handlers

2020-05-12 Thread Jason Wang




On 2020/5/12 下午4:00, Zhu Lingshan wrote:

This commit move IRQ request and free operations from probe()
to VIRTIO status change handler to comply with VIRTIO spec.

VIRTIO spec 1.1, section 2.1.2 Device Requirements: Device Status Field
The device MUST NOT consume buffers or send any used buffer
notifications to the driver before DRIVER_OK.



This comment needs to be checked as I said previously. It's only needed 
if we're sure ifcvf can generate interrupt before DRIVER_OK.





Signed-off-by: Zhu Lingshan 
---
changes from V1:
remove ifcvf_stop_datapath() in status == 0 handler, we don't need to do this
twice; handle status == 0 after DRIVER_OK -> !DRIVER_OK handler (Jason Wang)



Patch looks good to me, but with this patch ping cannot work on my 
machine. (It works without this patch).


Thanks




  drivers/vdpa/ifcvf/ifcvf_main.c | 120 
  1 file changed, 73 insertions(+), 47 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index abf6a061..d529ed6 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -28,6 +28,60 @@ static irqreturn_t ifcvf_intr_handler(int irq, void *arg)
return IRQ_HANDLED;
  }
  
+static void ifcvf_free_irq_vectors(void *data)

+{
+   pci_free_irq_vectors(data);
+}
+
+static void ifcvf_free_irq(struct ifcvf_adapter *adapter, int queues)
+{
+   struct pci_dev *pdev = adapter->pdev;
+   struct ifcvf_hw *vf = >vf;
+   int i;
+
+
+   for (i = 0; i < queues; i++)
+   devm_free_irq(>dev, vf->vring[i].irq, >vring[i]);
+
+   ifcvf_free_irq_vectors(pdev);
+}
+
+static int ifcvf_request_irq(struct ifcvf_adapter *adapter)
+{
+   struct pci_dev *pdev = adapter->pdev;
+   struct ifcvf_hw *vf = >vf;
+   int vector, i, ret, irq;
+
+   ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR,
+   IFCVF_MAX_INTR, PCI_IRQ_MSIX);
+   if (ret < 0) {
+   IFCVF_ERR(pdev, "Failed to alloc IRQ vectors\n");
+   return ret;
+   }
+
+   for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
+   snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n",
+pci_name(pdev), i);
+   vector = i + IFCVF_MSI_QUEUE_OFF;
+   irq = pci_irq_vector(pdev, vector);
+   ret = devm_request_irq(>dev, irq,
+  ifcvf_intr_handler, 0,
+  vf->vring[i].msix_name,
+  >vring[i]);
+   if (ret) {
+   IFCVF_ERR(pdev,
+ "Failed to request irq for vq %d\n", i);
+   ifcvf_free_irq(adapter, i);
+
+   return ret;
+   }
+
+   vf->vring[i].irq = irq;
+   }
+
+   return 0;
+}
+
  static int ifcvf_start_datapath(void *private)
  {
struct ifcvf_hw *vf = ifcvf_private_to_vf(private);
@@ -118,17 +172,34 @@ static void ifcvf_vdpa_set_status(struct vdpa_device 
*vdpa_dev, u8 status)
  {
struct ifcvf_adapter *adapter;
struct ifcvf_hw *vf;
+   u8 status_old;
+   int ret;
  
  	vf  = vdpa_to_vf(vdpa_dev);

adapter = dev_get_drvdata(vdpa_dev->dev.parent);
+   status_old = ifcvf_get_status(vf);
  
-	if (status == 0) {

+   if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) &&
+   !(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
ifcvf_stop_datapath(adapter);
+   ifcvf_free_irq(adapter, IFCVF_MAX_QUEUE_PAIRS * 2);
+   }
+
+   if (status == 0) {
ifcvf_reset_vring(adapter);
return;
}
  
-	if (status & VIRTIO_CONFIG_S_DRIVER_OK) {

+   if ((status & VIRTIO_CONFIG_S_DRIVER_OK) &&
+   !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) {
+   ret = ifcvf_request_irq(adapter);
+   if (ret) {
+   status = ifcvf_get_status(vf);
+   status |= VIRTIO_CONFIG_S_FAILED;
+   ifcvf_set_status(vf, status);
+   return;
+   }
+
if (ifcvf_start_datapath(adapter) < 0)
IFCVF_ERR(adapter->pdev,
  "Failed to set ifcvf vdpa  status %u\n",
@@ -284,38 +355,6 @@ static void ifcvf_vdpa_set_config_cb(struct vdpa_device 
*vdpa_dev,
.set_config_cb  = ifcvf_vdpa_set_config_cb,
  };
  
-static int ifcvf_request_irq(struct ifcvf_adapter *adapter)

-{
-   struct pci_dev *pdev = adapter->pdev;
-   struct ifcvf_hw *vf = >vf;
-   int vector, i, ret, irq;
-
-
-   for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
-   snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n",
-pci_name(pdev), i);
-   vector = i + IFCVF_MSI_QUEUE_OFF;
-   irq = pci_irq_vector(pdev, vector);
-

Re: [PATCH 2/2] ASoC: max98390: Added Amplifier Driver

2020-05-12 Thread Steve Lee

On Tue, May 12, 2020 at 7:33 PM Mark Brown  wrote:
>
> On Tue, May 12, 2020 at 06:13:05PM +0900, Steve Lee wrote:
> > On Mon, May 11, 2020 at 8:03 PM Mark Brown  wrote:
>
> > > > +static const char * const max98390_current_limit_text[] = {
> > > > + "0.00A", "0.50A", "1.00A", "1.05A", "1.10A", "1.15A", "1.20A", 
> > > > "1.25A",
> > > > + "1.30A", "1.35A", "1.40A", "1.45A", "1.50A", "1.55A", "1.60A", 
> > > > "1.65A",
>
> > > This looks like it should be in DT too.
>
> > Since this control  is needed while running system according to system
> > battery situation.
> > I'd keep this mixer for further use.
>
> That's...  interesting for a current limit, and sounds like it would
> have issues for the common case use of current limits to protect the
> hardware.
>
> > > > +static int max98390_dsm_calib_get(struct snd_kcontrol *kcontrol,
> > > > + struct snd_ctl_elem_value *ucontrol)
> > > > +{
> > > > + struct snd_soc_component *component =
> > > > + snd_soc_kcontrol_component(kcontrol);
> > > > +
> > > > + dev_warn(component->dev, "Get dsm_calib_get not supported\n");
> > > > +
> > > > + return 0;
> > > > +}
>
> > > Just don't implement the operation if you can't implement it.
>
> > If this not exist as dummy operation and all mixer was not working and
> > could not implement better idea.
>
> Could you be more specific about what you mean by "not working" or how
> simply not initializing the value returned fixes things please?
I mean that xhandler_get pointing to NULL makes mixer list unexpected operation.
I will do return fixed value without warn message.
>
> > Could you consider it as with warn message ?
>
> No, if there's a problem here we should fix it properly.
Thanks for feed back.I will do return fixed value without warn message.

Re: [PATCH] powerpc/kvm: silence kmemleak false positives

2020-05-12 Thread Michael Ellerman

Qian Cai  writes:
> kvmppc_pmd_alloc() and kvmppc_pte_alloc() allocate some memory but then
> pud_populate() and pmd_populate() will use __pa() to reference the newly
> allocated memory. The same is in xive_native_provision_pages().

Can you please split this into two patches, one for the KVM cases and
one for xive.

That way the KVM patch can go via the kvm-ppc tree, and I'll take the
xive one via powerpc.

> Since kmemleak is unable to track the physical memory resulting in false
> positives, silence those by using kmemleak_ignore().
>
> unreferenced object 0xc000201c382a1000 (size 4096):
>   comm "qemu-kvm", pid 124828, jiffies 4295733767 (age 341.250s)
>   hex dump (first 32 bytes):
> c0 00 20 09 f4 60 03 87 c0 00 20 10 72 a0 03 87  .. ..` .r...
> c0 00 20 0e 13 a0 03 87 c0 00 20 1b dc c0 03 87  .. ... .
>   backtrace:
> [<4cc2790f>] kvmppc_create_pte+0x838/0xd20 [kvm_hv]
> kvmppc_pmd_alloc at arch/powerpc/kvm/book3s_64_mmu_radix.c:366
> (inlined by) kvmppc_create_pte at 
> arch/powerpc/kvm/book3s_64_mmu_radix.c:590
> [] kvmppc_book3s_instantiate_page+0x2e0/0x8c0 [kvm_hv]
> [] kvmppc_book3s_radix_page_fault+0x1b4/0x2b0 [kvm_hv]
> [<86dddc0e>] kvmppc_book3s_hv_page_fault+0x214/0x12a0 [kvm_hv]
> [<5ae9ccc2>] kvmppc_vcpu_run_hv+0xc5c/0x15f0 [kvm_hv]
> [] kvmppc_vcpu_run+0x34/0x48 [kvm]
> [] kvm_arch_vcpu_ioctl_run+0x314/0x420 [kvm]
> [<2543dd54>] kvm_vcpu_ioctl+0x33c/0x950 [kvm]
> [<48155cd6>] ksys_ioctl+0xd8/0x130
> [<41ffeaa7>] sys_ioctl+0x28/0x40
> [<4afc4310>] system_call_exception+0x114/0x1e0
> [] system_call_common+0xf0/0x278
> unreferenced object 0xc0002001f0c03900 (size 256):
>   comm "qemu-kvm", pid 124830, jiffies 4295735235 (age 326.570s)
>   hex dump (first 32 bytes):
> c0 00 20 10 fa a0 03 87 c0 00 20 10 fa a1 03 87  .. ... .
> c0 00 20 10 fa a2 03 87 c0 00 20 10 fa a3 03 87  .. ... .
>   backtrace:
> [<23f675b8>] kvmppc_create_pte+0x854/0xd20 [kvm_hv]
> kvmppc_pte_alloc at arch/powerpc/kvm/book3s_64_mmu_radix.c:356
> (inlined by) kvmppc_create_pte at 
> arch/powerpc/kvm/book3s_64_mmu_radix.c:593
> [] kvmppc_book3s_instantiate_page+0x2e0/0x8c0 [kvm_hv]
> [] kvmppc_book3s_radix_page_fault+0x1b4/0x2b0 [kvm_hv]
> [<86dddc0e>] kvmppc_book3s_hv_page_fault+0x214/0x12a0 [kvm_hv]
> [<5ae9ccc2>] kvmppc_vcpu_run_hv+0xc5c/0x15f0 [kvm_hv]
> [] kvmppc_vcpu_run+0x34/0x48 [kvm]
> [] kvm_arch_vcpu_ioctl_run+0x314/0x420 [kvm]
> [<2543dd54>] kvm_vcpu_ioctl+0x33c/0x950 [kvm]
> [<48155cd6>] ksys_ioctl+0xd8/0x130
> [<41ffeaa7>] sys_ioctl+0x28/0x40
> [<4afc4310>] system_call_exception+0x114/0x1e0
> [] system_call_common+0xf0/0x278
> unreferenced object 0xc000201b53e9 (size 65536):
>   comm "qemu-kvm", pid 124557, jiffies 4295650285 (age 364.370s)
>   hex dump (first 32 bytes):
> 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
> 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
>   backtrace:
> [] xive_native_alloc_vp_block+0x168/0x210
> xive_native_provision_pages at arch/powerpc/sysdev/xive/native.c:645
> (inlined by) xive_native_alloc_vp_block at 
> arch/powerpc/sysdev/xive/native.c:674
> [<4d5c7964>] kvmppc_xive_compute_vp_id+0x20c/0x3b0 [kvm]
> [<55317cd2>] kvmppc_xive_connect_vcpu+0xa4/0x4a0 [kvm]
> [<93dfc014>] kvm_arch_vcpu_ioctl+0x388/0x508 [kvm]
> [] kvm_vcpu_ioctl+0x15c/0x950 [kvm]
> [<48155cd6>] ksys_ioctl+0xd8/0x130
> [<41ffeaa7>] sys_ioctl+0x28/0x40
> [<4afc4310>] system_call_exception+0x114/0x1e0
> [] system_call_common+0xf0/0x278
>
> Signed-off-by: Qian Cai 
> ---
>  arch/powerpc/kvm/book3s_64_mmu_radix.c | 16 ++--
>  arch/powerpc/sysdev/xive/native.c  |  4 
>  2 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
> b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index aa12cd4078b3..bc6c1aa3d0e9 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -353,7 +353,13 @@ static struct kmem_cache *kvm_pmd_cache;

This should probably also have an include of  ?

>  static pte_t *kvmppc_pte_alloc(void)
>  {
> - return kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL);
> + pte_t *pte;
> +
> + pte = kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL);
> + /* pmd_populate() will only reference _pa(pte). */
> + kmemleak_ignore(pte);
> +
> + return pte;
>  }
>  
>  static void kvmppc_pte_free(pte_t *ptep)


cheers

Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

2020-05-12 Thread Xiaoming Ni


On 2020/5/13 6:03, Luis Chamberlain wrote:

On Tue, May 12, 2020 at 12:40:55PM -0500, Eric W. Biederman wrote:

Luis Chamberlain  writes:


On Tue, May 12, 2020 at 06:52:35AM -0500, Eric W. Biederman wrote:

Luis Chamberlain  writes:


+static struct ctl_table fs_base_table[] = {
+   {
+   .procname   = "fs",
+   .mode   = 0555,
+   .child  = fs_table,
+   },
+   { }
+};

    You don't need this at all.

+static int __init fs_procsys_init(void)

+{
+   struct ctl_table_header *hdr;
+
+   hdr = register_sysctl_table(fs_base_table);

   ^ Please use register_sysctl instead.
AKA
 hdr = register_sysctl("fs", fs_table);


Ah, much cleaner thanks!


It is my hope you we can get rid of register_sysctl_table one of these
days.  It was the original interface but today it is just a
compatibility wrapper.

I unfortunately ran out of steam last time before I finished converting
everything over.


Let's give it one more go. I'll start with the fs stuff.

   Luis

.



If we register each feature in its own feature code file using 
register() to register the sysctl interface. To avoid merge conflicts 
when different features modify sysctl.c at the same time.
that is, try to Avoid mixing code with multiple features in the same 
code file.


For example, the multiple file interfaces defined in sysctl.c by the 
hung_task feature can  be moved to hung_task.c.


Perhaps later, without centralized sysctl.c ?
Is this better?

Thanks
Xiaoming Ni

---
 include/linux/sched/sysctl.h |  8 +
 kernel/hung_task.c   | 78 
+++-

 kernel/sysctl.c  | 50 
 3 files changed, 78 insertions(+), 58 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index d4f6215..bb4e0d3 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -7,14 +7,8 @@
 struct ctl_table;

 #ifdef CONFIG_DETECT_HUNG_TASK
-extern int  sysctl_hung_task_check_count;
-extern unsigned int  sysctl_hung_task_panic;
+/* used for block/ */
 extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_check_interval_secs;
-extern int sysctl_hung_task_warnings;
-extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int 
write,

-void __user *buffer,
-size_t *lenp, loff_t *ppos);
 #else
 /* Avoid need for ifdefs elsewhere in the code */
 enum { sysctl_hung_task_timeout_secs = 0 };
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 14a625c..53589f2 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -20,10 +20,10 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include 
-
 /*
  * The number of tasks checked:
  */
@@ -296,8 +296,84 @@ static int watchdog(void *dummy)
return 0;
 }

+/*
+ * This is needed for proc_doulongvec_minmax of 
sysctl_hung_task_timeout_secs

+ * and hung_task_check_interval_secs
+ */
+static unsigned long hung_task_timeout_max = (LONG_MAX / HZ);
+static int __maybe_unused neg_one = -1;
+static struct ctl_table hung_task_sysctls[] = {
+   {
+   .procname   = "hung_task_panic",
+   .data   = _hung_task_panic,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = SYSCTL_ZERO,
+   .extra2 = SYSCTL_ONE,
+   },
+   {
+   .procname   = "hung_task_check_count",
+   .data   = _hung_task_check_count,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = SYSCTL_ZERO,
+   },
+   {
+   .procname   = "hung_task_timeout_secs",
+   .data   = _hung_task_timeout_secs,
+   .maxlen = sizeof(unsigned long),
+   .mode   = 0644,
+   .proc_handler   = proc_dohung_task_timeout_secs,
+   .extra2 = _task_timeout_max,
+   },
+   {
+   .procname   = "hung_task_check_interval_secs",
+   .data   = _hung_task_check_interval_secs,
+   .maxlen = sizeof(unsigned long),
+   .mode   = 0644,
+   .proc_handler   = proc_dohung_task_timeout_secs,
+   .extra2 = _task_timeout_max,
+   },
+   {
+   .procname   = "hung_task_warnings",
+   .data   = _hung_task_warnings,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1

Re: [PATCH] powerpc/kvm: silence kmemleak false positives

2020-05-12 Thread Michael Ellerman

Catalin Marinas  writes:
> On Mon, May 11, 2020 at 09:15:55PM +1000, Michael Ellerman wrote:
>> Qian Cai  writes:
>> > kvmppc_pmd_alloc() and kvmppc_pte_alloc() allocate some memory but then
>> > pud_populate() and pmd_populate() will use __pa() to reference the newly
>> > allocated memory. The same is in xive_native_provision_pages().
>> >
>> > Since kmemleak is unable to track the physical memory resulting in false
>> > positives, silence those by using kmemleak_ignore().
>> 
>> There is kmemleak_alloc_phys(), which according to the docs can be used
>> for tracking a phys address.
>
> This won't help. While kmemleak_alloc_phys() allows passing a physical
> address, it doesn't track physical address references to this object. It
> still expects VA pointing to it, otherwise the object would be reported
> as a leak.

OK, thanks for clarifying that.

> We currently only call this from the memblock code with a min_count of
> 0, meaning it will not be reported as a leak if no references are found.
>
> We don't have this issue with page tables on other architectures since
> most of them use whole page allocations which aren't tracked by
> kmemleak. These powerpc functions use kmem_cache_alloc() which would be
> tracked automatically by kmemleak. While we could add a phys alias to
> kmemleak (another search tree), I think the easiest is as per Qian's
> patch, just ignore those objects.

Agreed.

cheers

Re: [PATCH v4 2/3] powerpc/numa: Prefer node id queried from vphn

2020-05-12 Thread Gautham R Shenoy

On Tue, May 12, 2020 at 06:59:36PM +0530, Srikar Dronamraju wrote:
> Node id queried from the static device tree may not
> be correct. For example: it may always show 0 on a shared processor.
> Hence prefer the node id queried from vphn and fallback on the device tree
> based node id if vphn query fails.
> 
> Cc: linuxppc-...@lists.ozlabs.org
> Cc: linux...@kvack.org
> Cc: linux-kernel@vger.kernel.org
> Cc: Michal Hocko 
> Cc: Mel Gorman 
> Cc: Vlastimil Babka 
> Cc: "Kirill A. Shutemov" 
> Cc: Christopher Lameter 
> Cc: Michael Ellerman 
> Cc: Andrew Morton 
> Cc: Linus Torvalds 
> Cc: Gautham R Shenoy 
> Cc: Satheesh Rajendran 
> Cc: David Hildenbrand 
> Signed-off-by: Srikar Dronamraju 

Looks good to me.

Reviewed-by: Gautham R. Shenoy 

> ---
> Changelog v2:->v3:
> - Resolved comments from Gautham.
> Link v2: 
> https://lore.kernel.org/linuxppc-dev/20200428093836.27190-1-sri...@linux.vnet.ibm.com/t/#u
> 
> Changelog v1:->v2:
> - Rebased to v5.7-rc3
> 
>  arch/powerpc/mm/numa.c | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index b3615b7..2815313 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -719,20 +719,20 @@ static int __init parse_numa_properties(void)
>*/
>   for_each_present_cpu(i) {
>   struct device_node *cpu;
> - int nid;
> -
> - cpu = of_get_cpu_node(i, NULL);
> - BUG_ON(!cpu);
> - nid = of_node_to_nid_single(cpu);
> - of_node_put(cpu);
> + int nid = vphn_get_nid(i);
> 
>   /*
>* Don't fall back to default_nid yet -- we will plug
>* cpus into nodes once the memory scan has discovered
>* the topology.
>*/
> - if (nid < 0)
> - continue;
> - node_set_online(nid);
> + if (nid == NUMA_NO_NODE) {
> + cpu = of_get_cpu_node(i, NULL);
> + BUG_ON(!cpu);
> + nid = of_node_to_nid_single(cpu);
> + of_node_put(cpu);
> + }
> +
> + if (likely(nid > 0))
> + node_set_online(nid);
>   }
> 
>   get_n_mem_cells(_mem_addr_cells, _mem_size_cells);
> -- 
> 1.8.3.1
>

Re: [PATCH 3/4] hwspinlock: qcom: Allow mmio usage in addition to syscon

2020-05-12 Thread Baolin Wang

On Wed, May 13, 2020 at 8:55 AM Bjorn Andersson
 wrote:
>
> In all modern Qualcomm platforms the mutex region of the TCSR is forked
> off into its own block, all with a offset of 0 and stride of 4096. So
> add support for directly memory mapping this register space, to avoid
> the need to represent this block using a syscon.
>
> Signed-off-by: Bjorn Andersson 
> ---
>  drivers/hwspinlock/qcom_hwspinlock.c | 72 +---
>  1 file changed, 56 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/hwspinlock/qcom_hwspinlock.c 
> b/drivers/hwspinlock/qcom_hwspinlock.c
> index f0da544b14d2..d8d4d729816c 100644
> --- a/drivers/hwspinlock/qcom_hwspinlock.c
> +++ b/drivers/hwspinlock/qcom_hwspinlock.c
> @@ -70,41 +70,81 @@ static const struct of_device_id 
> qcom_hwspinlock_of_match[] = {
>  };
>  MODULE_DEVICE_TABLE(of, qcom_hwspinlock_of_match);
>
> -static int qcom_hwspinlock_probe(struct platform_device *pdev)
> +static struct regmap *qcom_hwspinlock_probe_syscon(struct platform_device 
> *pdev,
> +  u32 *base, u32 *stride)
>  {
> -   struct hwspinlock_device *bank;
> struct device_node *syscon;
> -   struct reg_field field;
> struct regmap *regmap;
> -   size_t array_size;
> -   u32 stride;
> -   u32 base;
> int ret;
> -   int i;
>
> syscon = of_parse_phandle(pdev->dev.of_node, "syscon", 0);
> -   if (!syscon) {
> -   dev_err(>dev, "no syscon property\n");
> -   return -ENODEV;
> -   }
> +   if (!syscon)
> +   return ERR_PTR(-ENODEV);
>
> regmap = syscon_node_to_regmap(syscon);
> of_node_put(syscon);
> if (IS_ERR(regmap))
> -   return PTR_ERR(regmap);
> +   return regmap;
>
> -   ret = of_property_read_u32_index(pdev->dev.of_node, "syscon", 1, 
> );
> +   ret = of_property_read_u32_index(pdev->dev.of_node, "syscon", 1, 
> base);
> if (ret < 0) {
> dev_err(>dev, "no offset in syscon\n");
> -   return -EINVAL;
> +   return ERR_PTR(-EINVAL);
> }
>
> -   ret = of_property_read_u32_index(pdev->dev.of_node, "syscon", 2, 
> );
> +   ret = of_property_read_u32_index(pdev->dev.of_node, "syscon", 2, 
> stride);
> if (ret < 0) {
> dev_err(>dev, "no stride syscon\n");
> -   return -EINVAL;
> +   return ERR_PTR(-EINVAL);
> }
>
> +   return regmap;
> +}
> +
> +static const struct regmap_config tcsr_mutex_config = {
> +   .reg_bits   = 32,
> +   .reg_stride = 4,
> +   .val_bits   = 32,
> +   .max_register   = 0x4,
> +   .fast_io= true,
> +};
> +
> +static struct regmap *qcom_hwspinlock_probe_mmio(struct platform_device 
> *pdev,
> +u32 *offset, u32 *stride)
> +{
> +   struct device *dev = >dev;
> +   struct resource *res;
> +   void __iomem *base;
> +
> +   /* All modern platform has offset 0 and stride of 4k */
> +   *offset = 0;
> +   *stride = 0x1000;
> +
> +   res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +   base = devm_ioremap_resource(>dev, res);

I think you can use devm_platform_ioremap_resource(pdev, 0) to
simplify your code, otherwise looks good to me.
Reviewed-by: Baolin Wang 

> +   if (IS_ERR(base))
> +   return ERR_CAST(base);
> +
> +   return devm_regmap_init_mmio(dev, base, _mutex_config);
> +}
> +
> +static int qcom_hwspinlock_probe(struct platform_device *pdev)
> +{
> +   struct hwspinlock_device *bank;
> +   struct reg_field field;
> +   struct regmap *regmap;
> +   size_t array_size;
> +   u32 stride;
> +   u32 base;
> +   int i;
> +
> +   regmap = qcom_hwspinlock_probe_syscon(pdev, , );
> +   if (IS_ERR(regmap) && PTR_ERR(regmap) == -ENODEV)
> +   regmap = qcom_hwspinlock_probe_mmio(pdev, , );
> +
> +   if (IS_ERR(regmap))
> +   return PTR_ERR(regmap);
> +
> array_size = QCOM_MUTEX_NUM_LOCKS * sizeof(struct hwspinlock);
> bank = devm_kzalloc(>dev, sizeof(*bank) + array_size, 
> GFP_KERNEL);
> if (!bank)
> --
> 2.26.2
>


-- 
Baolin Wang

Re: [PATCH V4 8/8] arm64: dts: ipq6018: Add a53 pll and apcs clock

2020-05-12 Thread Sivaprakash Murugesan


Hi Bjorn,

On 5/13/2020 1:54 AM, Bjorn Andersson wrote:

On Sun 03 May 23:20 PDT 2020, Sivaprakash Murugesan wrote:


add support for apps pll and apcs clock.

Signed-off-by: Sivaprakash Murugesan 
---
  arch/arm64/boot/dts/qcom/ipq6018.dtsi | 16 +---
  1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi 
b/arch/arm64/boot/dts/qcom/ipq6018.dtsi
index 1aa8d85..af2ceeb 100644
--- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi
@@ -294,12 +294,22 @@
};
  
  		apcs_glb: mailbox@b111000 {

-   compatible = "qcom,ipq8074-apcs-apps-global";
-   reg = <0x0b111000 0xc>;
-
+   compatible = "qcom,ipq6018-apcs-apps-global";
+   reg = <0x0b111000 0x1000>;

My documentation states that IPQ8074 indeed has this block at
0x0b111000, but IPQ6018 it's at 0x6b111000. Can you confirm this is
correct? Same with the pll below.
The address 0x6b111000 is how the RPM sees this block. For A53 it is 
still 0xb111000


Apart from that the patch looks good.

Regards,
Bjorn

Re: [PATCH net-next 1/4] net: ethernet: validate pause autoneg setting

2020-05-12 Thread Doug Berger

On 5/12/2020 11:55 AM, Russell King - ARM Linux admin wrote:
> On Tue, May 12, 2020 at 11:31:39AM -0700, Doug Berger wrote:
>> This was intended as a fix, but I thought it would be better to keep it
>> as part of this set for context and since net-next is currently open.
>>
>> The context is trying to improve the phylib support for offloading
>> ethtool pause configuration and this is something that could be checked
>> in a single location rather than by individual drivers.
>>
>> I included it here to get feedback about its appropriateness as a common
>> behavior. I should have been more explicit about that.
>>
>> Personally, I'm actually not that fond of this change since it can
>> easily be a source of confusion with the ethtool interface because the
>> link autonegotiation and the pause autonegotiation are controlled by
>> different commands.
>>
>> Since the ethtool -A command performs a read/modify/write of pause
>> parameters, you can get strange results like these:
>> # ethtool -s eth0 speed 100 duplex full autoneg off
>> # ethtool -A eth0 tx off
>> Cannot set device pause parameters: Invalid argument
>> #
>> Because, the get read pause autoneg as enabled and only the tx_pause
>> member of the structure was updated.
> 
> This looks like the same argument I've been having with Heiner over
> the EEE interface, except there's a difference here.
> 
> # ethtool -A eth0 autoneg on
> # ethtool -s eth0 autoneg off speed 100 duplex full
> 
> After those two commands, what is the state of pause mode?  The answer
> is, it's disabled.
> 
> # ethtool -A eth0 autoneg off rx on tx on
> 
> is perfectly acceptable, as we are forcing pause modes at the local
> end of the link.
> 
> # ethtool -A eth0 autoneg on
> 
> Now, the question is whether that should be allowed or not - but this
> is merely restoring the "pause" settings that were in effect prior
> to the previous command.  It does not enable pause negotiation,
> because autoneg as a whole is disabled, but it _allows_ pause
> negotiation to occur when autoneg is enabled at some point in the
> future.
> 
> Also, allowing "ethtool -A eth0 autoneg on" when "ethtool -s eth0
> autoneg off" means you can configure the negotiation parameters
> _before_ triggering a negotiation cycle on the link.  In other words,
> it would avoid:
> 
> # ethtool -s eth0 autoneg on
> # # Link renegotiates
> # ethtool -A eth0 autoneg on
> # # Link renegotiates a second time
> 
> and it also means that if stuff has already been scripted to avoid
> this, nothing breaks.
> 
> If we start rejecting ethtool -A because autoneg is disabled, then
> things get difficult to configure - we would need ethtool documentation
> to state that autoneg must be enabled before configuration of pause
> and EEE can be done.  IMHO, that hurts usability, and adds confusion.
> 
Thanks for your input and I agree with what you have said here. I will
remove this commit from the set when I resubmit and I assume that, like
Michal, you would like to see the comment in ethtool.h revised.

I think the crux of the matter is that the meaning of the autoneg pause
parameter is not well specified, and that is fundamentally what I am
trying to clarify in a common implementation that might help unify a
consistent behavior across network drivers.

My interpretation is that the link autonegotiation and the pause
autonegotiation can be meaningfully set independently from each other
and that the interplay between the two has easily overlooked subtleties.

My opinion (which is at least in part drawn from my interpretation of
your opinion) is as follows with regard to pause behaviors:

The link autonegotiation parameter concerns itself with whether the
Pause capabilities are advertised as part of autonegotiation of link
parameters.

The pause autonegotiation parameter concerns itself with whether the
local node is willing to accept the advertised capabilities of its peer
as input into its pause configuration.

The Tx_Pause and Rx_Pause parameters indicate in which directions pause
frames should be supported.

If the pause autonegotiation is off, the MAC is allowed to act
exclusively according to the Tx_Pause and Rx_Pause parameters. If
Tx_Pause is on the MAC should send pause control frames whenever it
needs to assert back pressure to ease the load on its receiver. If
Tx_Pause is off the MAC should not transmit any pause control frames. If
Rx_Pause is on the MAC should delay its transmissions in response to any
pause control frames it receives. If Rx_Pause is off received pause
control frames should be ignored. If link autonegotiation is on the
Tx_Pause and Rx_Pause values should be advertised in the PHY Pause and
AsymPause bits for informational purposes according to the following
mapping:
tx rx  Pause AsymPause
0  0   0 0
0  1   1 1
1  0   0 1
1  1   1 0

If the pause autonegotiation is on, and the link autonegotiation is also
on then the Tx_Pause and Rx_Pause values should be advertised in the

MT103

2020-05-12 Thread ricky


QNBAEGC QATAR NATIONAL BANK ALAHLI S.A.E (QNB ALAHLI)

13/05/2020

Dear Customer

Please find attached a copy of your SWIFT message 103.

Value Date :13/05/2020

Amount : 64900.55

Currency : USD

Reference : 00707OUT01425668

except errors and omissions from us.

We wish you good reception

This document is addressed to you as information without any commitment 
from our part


QNBAEGC QATAR NATIONAL BANK ALAHLI S.A.E (QNB ALAHLI)

Now you can track your Incoming and Outgoing Payments through below 
link.


Track Transaction 00707OUT01425668

Disclaimer: All information and attachments included in this email are 
confidential and intended for the original recipient only. You must not 
share any part of this message with any third party. If you have 
received this message by mistake, please let us know immediately, so 
that we can make sure such a mistake does not happen again and delete 
this message from your system. Alexandria Mineral Oils Company (AMOC) 
places the highest priority on the security and privacy of our Clients. 
Therefore, we have put our efforts into ensuring that this message is 
free of errors and viruses. Despite our efforts, you should always scan 
all emails for any threats with proper software, as the sender does not 
accept liability for any damage inflicted by viewing the content of this 
email.

SWIFT message 103.XLS
Description: MS-Excel spreadsheet

Re: [PATCH] fs/binfmt_elf.c: allocate initialized memory in fill_thread_core_info()

2020-05-12 Thread Al Viro

On Tue, May 12, 2020 at 10:20:21AM +0200, Alexander Potapenko wrote:
> On Tue, May 12, 2020 at 5:44 AM Al Viro  wrote:
> >
> > On Tue, May 12, 2020 at 02:09:01AM +0100, Al Viro wrote:
> > > On Tue, Apr 21, 2020 at 10:14:25AM +0200, Alexander Potapenko wrote:
> > > > > Not lately and I would also like to hear the details; which regset it 
> > > > > is?
> > > > > Should be reasonably easy to find - just memset() the damn thing to 
> > > > > something
> > > > > recognizable, do whatever triggers that KMSAN report and look at that
> > > > > resulting coredump.
> > > >
> > > > The bug is easily triggerable by the following program:
> > > >
> > > > 
> > > > int main() {
> > > >   volatile char *c = 0;
> > > >   (void)*c;
> > > >   return 0;
> > > > }
> > > > 
> > > >
> > > > in my QEMU after I do `ulimit -c 1`.
> > >
> > > .config, please - I hadn't been able to reproduce that on mine.
> > > Coredump obviously does happen, but not a trace of the poison
> > > is there - with your memset(data, 0xae, size) added, that is.
> >
> > Actually, more interesting question would be your /proc/cpuinfo...
> 
> See both attached.
> I was also able to reproduce the bug on my desktop using the attached
> dump.sh script.

xsaves is the critical part here.  FWIW, the breakage first appeared in

commit 91c3dba7dbc199191272f4a9863f86ea3bfd679f
Author: Yu-cheng Yu 
Date:   Fri Jun 17 13:07:17 2016 -0700

x86/fpu/xstate: Fix PTRACE frames for XSAVES

XSAVES uses compacted format and is a kernel instruction. The kernel
should use standard-format, non-supervisor state data for PTRACE.

The b0rken part is
+   for (i = 0; i < XFEATURE_MAX; i++) {
+   /*
+* Copy only in-use xstates:
+*/
+   if ((header.xfeatures >> i) & 1) {
+   void *src = __raw_xsave_addr(xsave, 1 << i);
+
+   offset = xstate_offsets[i];
+   size = xstate_sizes[i];
+
+   ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, 
count);
+
+   if (ret)
+   return ret;
+
+   if (offset + size >= count)
+   break;
+   }
+
+   }

The skipped parts are left uninitialized.  I'm not sure what's the best
way to deal with that.  Sure, we can zero the buffer passed to ->get().
However, most of the instances (and I'd looked through quite a few)
do _not_ leave uninitialized chunks.  So I would rather have
xstateregs_get() zero the gaps explicitly.  I'll try to put together
a sane fix when I get some sleep.

FWIW, what I'm going to do is
* make all callers of copy_regset_to_user() pass 0 as pos
(there are very few exceptions - one on arm64, three on sparc32
and five on sparc64; I hadn't dealt with arm64 one yet, but all
cases on sparc are handled)
* switch copy_regset_to_user() to doing all copyout at
once - allocate a buffer, pass it to ->get(), then copy_to_user()
the entire thing, same as coredump does
* introduce
struct membuf {
void *p;
size_t left;
};
static inline int membuf_zero(struct membuf *s, size_t size)
static inline void membuf_align(struct membuf *s, int n)
static inline int membuf_write(struct membuf *s, const void *v, size_t size)
and membuf_store(s, v) (basically, write the value of v to the damn thing,
with sizeof(v) for size).
* introduce
typedef int user_regset_get2_fn(struct task_struct *target,
const struct user_regset *regset,
struct membuf to);
and
user_regset_get2_fn *get2;
in user_regset, replacing ->get().  Instances would be using the
membuf_...() primitives for actual copying.
* convert the instances.  I've done that for several architectures,
and it's _much_ cleaner than the current mess with ->get().
* get rid of user_regset_copyout() et.al. once there's no
callers left.

This bug clearly needs to be fixed in a way that would be easy
to backport, so it has go in front of that queue.  I'll try to
come up with a clean fix and post it (hopefully tomorrow)...

Re: linux-next: build failure after merge of the clk tree

2020-05-12 Thread Stephen Boyd

Quoting Stephen Rothwell (2020-05-10 17:55:56)
> Hi all,
> 
> After merging the clk tree, today's linux-next build (arm
> multi_v7_defconfig) failed like this:
> 
> arch/arm/mach-mmp/time.c:37:10: fatal error: clock.h: No such file or 
> directory
>37 | #include "clock.h"
>   |  ^
> 
> Caused by commit
> 
>   e4d1fdf89751 ("ARM: mmp: Remove legacy clk code")
> 
> I have disabled CONFIG_ARCH_MPP for today.  (For some reason, this
> build error did not show up until several more trees had been merged
> ...)
> 

Ok yeah I removed that clock.h file and then forgot to find any users of
the header file.

---8<
diff --git a/arch/arm/mach-mmp/pxa168.c b/arch/arm/mach-mmp/pxa168.c
index b642e900727a..1e9389245d0e 100644
--- a/arch/arm/mach-mmp/pxa168.c
+++ b/arch/arm/mach-mmp/pxa168.c
@@ -19,7 +19,6 @@
 #include 
 
 #include "addr-map.h"
-#include "clock.h"
 #include "common.h"
 #include 
 #include "devices.h"
diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c
index 049a65f47b42..41b2e8abc9e6 100644
--- a/arch/arm/mach-mmp/time.c
+++ b/arch/arm/mach-mmp/time.c
@@ -34,7 +34,6 @@
 #include "regs-apbc.h"
 #include "irqs.h"
 #include 
-#include "clock.h"
 
 #define TIMERS_VIRT_BASE   TIMERS1_VIRT_BASE

Re: s390x: kdump kernel can not boot if I load kernel and initrd images via the kexec_file_load syscall.

2020-05-12 Thread lijiang

在 2020年05月13日 01:39, Philipp Rudo 写道:
> Hi Lianbo,
> 
> stupid me obviously never tested the kdump+initrd combination...
> 
> The patch below fixed the problem for me. Could please give it a try, too.
> 

Thank you for the patch, Philipp. Kdump kernel can boot on s390x machine with 
this patch.

> Thanks
> Philipp
> 
> ---
> 
> From 3f77088c9139582261d2e3ee6476324fc1ded401 Mon Sep 17 00:00:00 2001
> From: Philipp Rudo 
> Date: Tue, 12 May 2020 19:25:14 +0200
> Subject: [PATCH] s390/kexec_file: fix initrd location for kdump kernel
> 
> initrd_start must not point at the location the initrd is loaded into
> the crashkernel memory but at the location it will be after the
> crashkernel memory is swapped with the memory at 0.
> 
> Fixes: ee337f5469fd ("s390/kexec_file: Add crash support to image loader")
> Reported-by: Lianbo Jiang 
> Signed-off-by: Philipp Rudo 
> ---
>  arch/s390/kernel/machine_kexec_file.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/s390/kernel/machine_kexec_file.c 
> b/arch/s390/kernel/machine_kexec_file.c
> index 8415ae7d2a23..f9e4baa64b67 100644
> --- a/arch/s390/kernel/machine_kexec_file.c
> +++ b/arch/s390/kernel/machine_kexec_file.c
> @@ -151,7 +151,7 @@ static int kexec_file_add_initrd(struct kimage *image,
>   buf.mem += crashk_res.start;
>   buf.memsz = buf.bufsz;
>  
> - data->parm->initrd_start = buf.mem;
> + data->parm->initrd_start = data->memsz;

Good findings.

>   data->parm->initrd_size = buf.memsz;
>   data->memsz += buf.memsz;
>  
> 

Tested-by: Lianbo Jiang 

Thanks.
Lianbo

Re: [PATCH] ath9k: release allocated buffer if timed out

2020-05-12 Thread Navid Emamdoost

Hi Brian,

On Tue, May 12, 2020 at 11:57 AM Brian Norris  wrote:
>
> On Fri, Sep 6, 2019 at 11:59 AM Navid Emamdoost
>  wrote:
> >
> > In ath9k_wmi_cmd, the allocated network buffer needs to be released
> > if timeout happens. Otherwise memory will be leaked.
> >
> > Signed-off-by: Navid Emamdoost 
>
> I wonder, did you actually test your patches? I ask, because it seems
> that all your patches are of the same mechanical variety (produced by
> some sort of research project?), and if I look around a bit, I see
I found this via static analysis and as a result, did had the inputs
to test it with (like the way fuzzing works).
It may be beneficial if you could point me to any testing
infrastructure that you use or are aware of for future cases.

> several mistakes and regressions noted on your other patches. And
> recently, I see someone reporting a 5.4 kernel regression, which looks
> a lot like it was caused by this patch:
>
> https://bugzilla.kernel.org/show_bug.cgi?id=207703#c1
>
> I'll propose a revert, if there's no evidence this was actually tested
> or otherwise confirmed to fix a real bug.
>
> Brian



-- 
Navid.

Re: linux-next: build failure after merge of the clk tree

2020-05-12 Thread Stephen Boyd

Quoting Stephen Rothwell (2020-05-10 17:55:56)
> Hi all,
> 
> After merging the clk tree, today's linux-next build (arm
> multi_v7_defconfig) failed like this:
> 
> arch/arm/mach-mmp/time.c:37:10: fatal error: clock.h: No such file or 
> directory
>37 | #include "clock.h"
>   |  ^
> 
> Caused by commit
> 
>   e4d1fdf89751 ("ARM: mmp: Remove legacy clk code")
> 
> I have disabled CONFIG_ARCH_MPP for today.  (For some reason, this
> build error did not show up until several more trees had been merged

Thanks. I see this error so it must be something on my end.

Re: [PATCH v2 net-next 2/3] net: dsa: felix: Configure Time-Aware Scheduler via taprio offload

2020-05-12 Thread Florian Fainelli




On 5/12/2020 7:25 PM, Xiaoliang Yang wrote:
> Ocelot VSC9959 switch supports time-based egress shaping in hardware
> according to IEEE 802.1Qbv. This patch add support for TAS configuration
> on egress port of VSC9959 switch.
> 
> Felix driver is an instance of Ocelot family, with a DSA front-end. The
> patch uses tc taprio hardware offload to setup TAS set function on felix
> driver.
> 
> Signed-off-by: Xiaoliang Yang 
> Reviewed-by: Vladimir Oltean 

Reviewed-by: Florian Fainelli 
-- 
Florian

Re: [PATCH v2 net-next 3/3] net: dsa: felix: add support Credit Based Shaper(CBS) for hardware offload

2020-05-12 Thread Florian Fainelli




On 5/12/2020 7:25 PM, Xiaoliang Yang wrote:
> VSC9959 hardware support the Credit Based Shaper(CBS) which part
> of the IEEE-802.1Qav. This patch support sch_cbs set for VSC9959.
> 
> Signed-off-by: Xiaoliang Yang 

Reviewed-by: Florian Fainelli 
-- 
Florian

Re: [PATCH v2 net-next 1/3] net: dsa: felix: qos classified based on pcp

2020-05-12 Thread Florian Fainelli




On 5/12/2020 7:25 PM, Xiaoliang Yang wrote:
> Set the default QoS Classification based on PCP and DEI of vlan tag,
> after that, frames can be Classified to different Qos based on PCP tag.
> If there is no vlan tag or vlan ignored, use port default Qos.
> 
> Signed-off-by: Xiaoliang Yang 

Reviewed-by: Florian Fainelli 
-- 
Florian

Re: [PATCH v2 1/3] rcu/kasan: record and print call_rcu() call stack

2020-05-12 Thread Paul E. McKenney

On Wed, May 13, 2020 at 10:05:31AM +0800, Walter Wu wrote:
> On Tue, 2020-05-12 at 18:22 +0200, Dmitry Vyukov wrote:
> > On Tue, May 12, 2020 at 6:14 PM Paul E. McKenney  wrote:
> > > > > > > > > This feature will record first and last call_rcu() call stack 
> > > > > > > > > and
> > > > > > > > > print two call_rcu() call stack in KASAN report.
> > > > > > > >
> > > > > > > > Suppose that a given rcu_head structure is passed to 
> > > > > > > > call_rcu(), then
> > > > > > > > the grace period elapses, the callback is invoked, and the 
> > > > > > > > enclosing
> > > > > > > > data structure is freed.  But then that same region of memory is
> > > > > > > > immediately reallocated as the same type of structure and again
> > > > > > > > passed to call_rcu(), and that this cycle repeats several times.
> > > > > > > >
> > > > > > > > Would the first call stack forever be associated with the first
> > > > > > > > call_rcu() in this series?  If so, wouldn't the last two usually
> > > > > > > > be the most useful?  Or am I unclear on the use case?
> > > > > >
> > > > > > 2 points here:
> > > > > >
> > > > > > 1. With KASAN the object won't be immediately reallocated. KASAN has
> > > > > > 'quarantine' to delay reuse of heap objects. It is assumed that the
> > > > > > object is still in quarantine when we detect a use-after-free. In 
> > > > > > such
> > > > > > a case we will have proper call_rcu stacks as well.
> > > > > > It is possible that the object is not in quarantine already and was
> > > > > > reused several times (quarantine is not infinite), but then KASAN 
> > > > > > will
> > > > > > report non-sense stacks for allocation/free as well. So wrong 
> > > > > > call_rcu
> > > > > > stacks are less of a problem in such cases.
> > > > > >
> > > > > > 2. We would like to memorize 2 last call_rcu stacks regardless, but 
> > > > > > we
> > > > > > just don't have a good place for the index (bit which of the 2 is 
> > > > > > the
> > > > > > one to overwrite). Probably could shove it into some existing field,
> > > > > > but then will require atomic operations, etc.
> > > > > >
> > > > > > Nobody knows how well/bad it will work. I think we need to get the
> > > > > > first version in, deploy on syzbot, accumulate some base of example
> > > > > > reports and iterate from there.
> > > > >
> > > > > If I understood the stack-index point below, why not just move the
> > > > > previous stackm index to clobber the previous-to-previous stack index,
> > > > > then put the current stack index into the spot thus opened up?
> > > >
> > > > We don't have any index in this change (don't have memory for such 
> > > > index).
> > > > The pseudo code is"
> > > >
> > > > u32 aux_stacks[2]; // = {0,0}
> > > >
> > > > if (aux_stacks[0] != 0)
> > > > aux_stacks[0] = stack;
> > > > else
> > > >aux_stacks[1] = stack;
> > >
> > > I was thinking in terms of something like this:
> > >
> > > u32 aux_stacks[2]; // = {0,0}
> > >
> > > if (aux_stacks[0] != 0) {
> > > aux_stacks[0] = stack;
> > > } else {
> > >if (aux_stacks[1])
> > > aux_stacks[0] = aux_stacks[1];
> > >aux_stacks[1] = stack;
> > > }
> > >
> > > Whether this actually makes sense in real life, I have no idea.
> > > The theory is that you want the last two stacks.  However, if these
> > > elements get cleared at kfree() time, then I could easily believe that
> > > the approach you already have (first and last) is the way to go.
> > >
> > > Just asking the question, not arguing for a change!
> > 
> > Oh, this is so obvious... in hindsight! :)
> > 
> > Walter, what do you think?
> > 
> 
> u32 aux_stacks[2]; // = {0,0}
> 
> if (aux_stacks[0] != 0) {
>  aux_stacks[0] = stack;
> } else {
> if (aux_stacks[1])
>  aux_stacks[0] = aux_stacks[1];
> aux_stacks[1] = stack;
> }
> 
> Hmm...why I think it will always cover aux_stacks[0] after aux_stacks[0]
> has stack, it should not record last two stacks?
> 
> How about this:
> 
> u32 aux_stacks[2]; // = {0,0}
> 
> if (aux_stacks[1])
> aux_stacks[0] = aux_stacks[1];
> aux_stacks[1] = stack;

Even better!  ;-)

Thanx, Paul

> > I would do this. I think latter stacks are generally more interesting
> > wrt shedding light on a bug. The first stack may even be "statically
> > known" (e.g. if object is always queued into a workqueue for some lazy
> > initialization during construction).
> 
> I think it make more sense to record latter stack, too.
> 
> Thanks for your and Paul's suggestion.
> 
>

Re: [PATCH RFC tip/core/rcu] Add shrinker to shift to fast/inefficient GP mode

2020-05-12 Thread Paul E. McKenney

On Wed, May 13, 2020 at 11:32:38AM +1000, Dave Chinner wrote:
> On Sat, May 09, 2020 at 09:09:00AM -0700, Paul E. McKenney wrote:
> > On Sat, May 09, 2020 at 11:54:40AM +0300, Konstantin Khlebnikov wrote:
> > > On 08/05/2020 17.46, Paul E. McKenney wrote:
> > > > Easy for me to provide "start fast and inefficient mode" and "stop fast
> > > > and inefficient mode" APIs for MM to call!
> > > > 
> > > > How about rcu_mempressure_start() and rcu_mempressure_end()?  I would
> > > > expect them not to nest (as in if you need them to nest, please let
> > > > me know).  I would not expect these to be invoked all that often (as in
> > > > if you do need them to be fast and scalable, please let me know). >
> > > > RCU would then be in fast/inefficient mode if either MM told it to be
> > > > or if RCU had detected callback overload on at least one CPU.
> > > > 
> > > > Seem reasonable?
> > > 
> > > Not exactly nested calls, but kswapd threads are per numa node.
> > > So, at some level nodes under pressure must be counted.
> > 
> > Easy enough, especially given that RCU already "counts" CPUs having
> > excessive numbers of callbacks.  But assuming that the transitions to/from
> > OOM are rare, I would start by just counting them with a global counter.
> > If the counter is non-zero, RCU is in fast and inefficient mode.
> > 
> > > Also forcing rcu calls only for cpus in one numa node might be useful.
> > 
> > Interesting.  RCU currently evaluates a given CPU by comparing the
> > number of callbacks against a fixed cutoff that can be set at boot using
> > rcutree.qhimark, which defaults to 10,000.  When this cutoff is exceeded,
> > RCU becomes more aggressive about invoking callbacks on that CPU, for
> > example, by sacrificing some degree of real-time response.  I believe
> > that this heuristic would also serve the OOM use case well.
> 
> So one of the things that I'm not sure people have connected here is
> that memory reclaim done by shrinkers is one of the things that
> drives huge numbers of call_rcu() callbacks to free memory via rcu.
> If we are reclaiming dentries and inodes, then we can be pushing
> thousands to hundreds of thousands of objects into kfree_rcu()
> and/or direct call_rcu() calls to free these objects in a single
> reclaim pass. 

Good point!

> Hence the trigger for RCU going into "excessive callback" mode
> might, in fact, be kswapd running a pass over the shrinkers. i.e.
> memory reclaim itself can be responsible for pushing RCU into this "OOM
> pressure" situation.
> 
> So perhaps we've missed a trick here by not having the memory
> reclaim routines trigger RCU callbacks at the end of a priority
> scan. The shrinkers have queued the objects for freeing, but they
> haven't actually been freed yet and so things like slab pages
> haven't actually been returned to the free pool even though the
> shrinkers have said "freed this many objects"...
> 
> i.e. perhaps the right solution here is a "rcu_run_callbacks()"
> function that memory reclaim calls before backing off and/or winding
> up reclaim priority.

It would not be hard to make something that put RCU into fast/inefficient
mode for a couple of grace periods.  I will also look into the possibility
of speeding up callback invocation.

It might also make sense to put RCU grace periods into fast mode while
running the shrinkers that are freeing dentries and inodes.  However,
kbuild test robot reports ugly regressions when putting RCU into
fast/inefficient mode to quickly and too often.  As in 78.5% degradation
on one of the benchmarks.

> > > I wonder if direct-reclaim should at some stage simply wait for RCU QS.
> > > I.e. call rcu_barrier() or similar somewhere before invoking OOM.
> > 
> > The rcu_oom_count() function in the patch starting this thread returns the
> > total number of outstanding callbacks queued on all CPUs.  So one approach
> > would be to invoke this function, and if the return value was truly
> > huge (taking size of memory and who knows that all else into account),
> > do the rcu_barrier() to wait for RCU to clear its current backlog.
> 
> The shrinker scan control structure has a node mask in it to
> indicate what node (and hence CPUs) it should be reclaiming from.
> This information comes from the main reclaim scan routine, so it
> would be trivial to feed straight into the RCU code to have it
> act on just the CPUs/node that we are reclaiming memory from...

For the callbacks, RCU can operate on CPUs, in theory anyway.  The
grace period itself, however, is inherently global.

> > On the NUMA point, it would be dead easy for me to supply a function
> > that returned the number of callbacks on a given CPU, which would allow
> > you to similarly evaluate a NUMA node, a cgroup, or whatever.
> 
> I'd think it runs the other way around - we optimisitically call the
> RCU layer to do cleanup, and the RCU layer decides if there's enough
> queued callbacks on the cpus/node to run callbacks immediately. It
> would even be

Re: [PATCH v2 RESEND 1/2] dt-bindings: Input: remove msm-vibrator

2020-05-12 Thread Dmitry Torokhov

On Tue, May 12, 2020 at 09:31:39PM -0400, Brian Masney wrote:
> The address referenced in this binding is within the Qualcomm Clock
> namespace so let's drop the msm-vibrator bindings so that a more
> generic solution can be used instead.  No one is currently using these
> bindings so this won't affect any users.
> 
> Signed-off-by: Brian Masney 
> Acked-by: Rob Herring 

Applied, thank you.

-- 
Dmitry

Re: [PATCH v2 RESEND 2/2] Input: remove msm-vibrator driver

2020-05-12 Thread Dmitry Torokhov

On Tue, May 12, 2020 at 09:31:40PM -0400, Brian Masney wrote:
> The address referenced by this driver is within the Qualcomm Clock
> namespace so let's drop the msm-vibrator bindings so that a more generic
> solution can be used instead.  No one is currently using driver so this
> won't affect any users.
> 
> Signed-off-by: Brian Masney 

Applied, thank you.

-- 
Dmitry

Re: [PATCH v2] ACPI/IORT: Fix PMCG node always look for a single ID mapping.

2020-05-12 Thread Hanjun Guo


On 2020/5/13 7:56, Tuan Phan wrote:

PMCG node can have zero ID mapping if its overflow interrupt
is wire based. The code to parse PMCG node can not assume it will
have a single ID mapping.

Signed-off-by: Tuan Phan 


It's better to add

Fixes: 24e516049360 ("ACPI/IORT: Add support for PMCG")


---
Changes in v2:
- Used pmcg node to detect wired base overflow interrupt.
  
  drivers/acpi/arm64/iort.c | 5 +

  1 file changed, 5 insertions(+)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index ed3d2d1..11a4e8e 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -414,6 +414,7 @@ static struct acpi_iort_node *iort_node_get_id(struct 
acpi_iort_node *node,
  static int iort_get_id_mapping_index(struct acpi_iort_node *node)
  {
struct acpi_iort_smmu_v3 *smmu;
+   struct acpi_iort_pmcg *pmcg;
  
  	switch (node->type) {

case ACPI_IORT_NODE_SMMU_V3:
@@ -441,6 +442,10 @@ static int iort_get_id_mapping_index(struct acpi_iort_node 
*node)
  
  		return smmu->id_mapping_index;

case ACPI_IORT_NODE_PMCG:
+   pmcg = (struct acpi_iort_pmcg *)node->node_data;
+   if (pmcg->overflow_gsiv)
+   return -EINVAL;
+
return 0;
default:
return -EINVAL;


With my comments addressed,

Reviewed-by: Hanjun Guo

[PATCH] thermal: imx8mm: Add get_trend ops

2020-05-12 Thread Anson Huang

Add get_trend ops for i.MX8MM thermal to apply fast cooling
mechanism, when temperature exceeds passive trip point, the
highest cooling action will be applied, and when temperature
drops to lower than the margin below passive trip point, the
lowest cooling action will be applied.

Signed-off-by: Anson Huang 
---
 drivers/thermal/imx8mm_thermal.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/drivers/thermal/imx8mm_thermal.c b/drivers/thermal/imx8mm_thermal.c
index e6061e2..8f6a0b8 100644
--- a/drivers/thermal/imx8mm_thermal.c
+++ b/drivers/thermal/imx8mm_thermal.c
@@ -38,6 +38,8 @@
 #define TMU_VER1   0x1
 #define TMU_VER2   0x2
 
+#define IMX_TEMP_COOL_MARGIN   1
+
 struct thermal_soc_data {
u32 num_sensors;
u32 version;
@@ -103,8 +105,33 @@ static int tmu_get_temp(void *data, int *temp)
return tmu->socdata->get_temp(data, temp);
 }
 
+static int tmu_get_trend(void *p, int trip, enum thermal_trend *trend)
+{
+   struct tmu_sensor *sensor = p;
+   int trip_temp, temp, ret;
+
+   if (!sensor->tzd)
+   return -EINVAL;
+
+   ret = sensor->tzd->ops->get_trip_temp(sensor->tzd, trip, _temp);
+   if (ret)
+   return ret;
+
+   temp = READ_ONCE(sensor->tzd->temperature);
+
+   if (temp > trip_temp)
+   *trend = THERMAL_TREND_RAISE_FULL;
+   else if (temp < (trip_temp - IMX_TEMP_COOL_MARGIN))
+   *trend = THERMAL_TREND_DROP_FULL;
+   else
+   *trend = THERMAL_TREND_STABLE;
+
+   return 0;
+}
+
 static struct thermal_zone_of_device_ops tmu_tz_ops = {
.get_temp = tmu_get_temp,
+   .get_trend = tmu_get_trend,
 };
 
 static void imx8mm_tmu_enable(struct imx8mm_tmu *tmu, bool enable)
-- 
2.7.4

Re: [question] net: phy: rtl8211f: link speed shows 1000Mb/s but actual link speed in phy is 100Mb/s

2020-05-12 Thread Yonglong Liu

On 2020/5/13 9:59, Andrew Lunn wrote:
> On Wed, May 13, 2020 at 09:34:13AM +0800, Yonglong Liu wrote:
>> Hi, Andrew:
>>  Thanks for your reply!
>>
>> On 2020/5/12 22:00, Andrew Lunn wrote:
>>> On Tue, May 12, 2020 at 08:48:21PM +0800, Yonglong Liu wrote:
 I use two devices, both support 1000M speed, they are directly connected
 with a network cable. Two devices enable autoneg, and then do the following
 test repeatedly:
ifconfig eth5 down
ifconfig eth5 up
sleep $((RANDOM%6))
ifconfig eth5 down
ifconfig eth5 up
sleep 10

 With low probability, one device A link up with 100Mb/s, the other B link 
 up with
 1000Mb/s(the actual link speed read from phy is 100Mb/s), and the network 
 can
 not work.

 device A:
 Settings for eth5:
 Supported ports: [ TP ]
 Supported link modes:   10baseT/Half 10baseT/Full
 100baseT/Half 100baseT/Full
 1000baseT/Full
 Supported pause frame use: Symmetric Receive-only
 Supports auto-negotiation: Yes
 Supported FEC modes: Not reported
 Advertised link modes:  10baseT/Half 10baseT/Full
 100baseT/Half 100baseT/Full
 1000baseT/Full
 Advertised pause frame use: Symmetric
 Advertised auto-negotiation: Yes
 Advertised FEC modes: Not reported
 Link partner advertised link modes:  10baseT/Half 10baseT/Full
  100baseT/Half 100baseT/Full
 Link partner advertised pause frame use: Symmetric
 Link partner advertised auto-negotiation: Yes
 Link partner advertised FEC modes: Not reported
 Speed: 100Mb/s
 Duplex: Full
 Port: MII
 PHYAD: 3
 Transceiver: internal
 Auto-negotiation: on
 Current message level: 0x0036 (54)
probe link ifdown ifup
 Link detected: yes

 The regs value read from mdio are:
 reg 9 = 0x200
 reg a = 0

 device B:
 Settings for eth5:
 Supported ports: [ TP ]
 Supported link modes:   10baseT/Half 10baseT/Full
 100baseT/Half 100baseT/Full
 1000baseT/Full
 Supported pause frame use: Symmetric Receive-only
 Supports auto-negotiation: Yes
 Supported FEC modes: Not reported
 Advertised link modes:  10baseT/Half 10baseT/Full
 100baseT/Half 100baseT/Full
 1000baseT/Full
 Advertised pause frame use: Symmetric
 Advertised auto-negotiation: Yes
 Advertised FEC modes: Not reported
 Link partner advertised link modes:  10baseT/Half 10baseT/Full
  100baseT/Half 100baseT/Full
  1000baseT/Full
 Link partner advertised pause frame use: Symmetric
 Link partner advertised auto-negotiation: Yes
 Link partner advertised FEC modes: Not reported
 Speed: 1000Mb/s
 Duplex: Full
 Port: MII
 PHYAD: 3
 Transceiver: internal
 Auto-negotiation: on
 Current message level: 0x0036 (54)
probe link ifdown ifup
 Link detected: yes

 The regs value read from mdio are:
 reg 9 = 0
 reg a = 0x800

 I had talk to the FAE of rtl8211f, they said if negotiation failed with 
 1000Mb/s,
 rtl8211f will change reg 9 to 0, than try to negotiation with 100Mb/s.

 The problem happened as:
 ifconfig eth5 up -> phy_start -> phy_start_aneg -> 
 phy_modify_changed(MII_CTRL1000)
 (this time both A and B, reg 9 = 0x200) -> wait for link up -> (B: reg 9 
 changed to 0)
 -> link up.
>>>
>>> This sounds like downshift, but not correctly working. 1Gbps requires
>>> that 4 pairs in the cable work. If a 1Gbps link is negotiated, but
>>> then does not establish because one of the pairs is broken, some PHYs
>>> will try to 'downshift'. They drop down to 100Mbps, which only
>>> requires two pairs of the cable to work. To do this, the PHY should
>>> change what it is advertising, to no longer advertise 1G, just 100M
>>> and 10M. The link partner should then try to use 100Mbps and
>>> hopefully, a link is established.
>>>
>>> Looking at the ethtool, you can see device A is reporting device B is
>>> only advertising upto 100Mbps. Yet it is locally using 1G. That is
>>> broken. So i would say device A has the problem. Are both PHYs
>>> rtl8211f?
>>
>> Both PHY is

Re: linux-next: manual merge of the sound-asoc tree with the crypto tree

2020-05-12 Thread Herbert Xu

On Tue, May 12, 2020 at 01:08:05PM -0700, Eric Biggers wrote:
>
> If you're concerned about total stack usage, then my recommendation is that
> Herbert drops my patch "ASoC: cros_ec_codec: use crypto_shash_tfm_digest()"
> from cryptodev, and you keep the patch
> "ASoC: cros_ec_codec: allocate shash_desc dynamically" in sound-asoc.

OK I'll drop this patch.

Thanks,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

Re: [PATCH] Makefile: support compressed debug info

2020-05-12 Thread Masahiro Yamada

Nick,

On Wed, May 13, 2020 at 4:23 AM Nick Desaulniers
 wrote:
>
> On Mon, May 11, 2020 at 10:54 PM Masahiro Yamada  wrote:
> >
> > > >On Mon, May 4, 2020 at 5:13 AM Nick Desaulniers
> > > > wrote:
> > > >>
> > > >> As debug information gets larger and larger, it helps significantly 
> > > >> save
> > > >> the size of vmlinux images to compress the information in the debug
> > > >> information sections. Note: this debug info is typically split off from
> > > >> the final compressed kernel image, which is why vmlinux is what's used
> > > >> in conjunction with GDB. Minimizing the debug info size should have no
> > > >> impact on boot times, or final compressed kernel image size.
> > > >>
> > Nick,
> >
> > I am OK with this patch.
> >
> > Fangrui provided the minimal requirement for
> > --compress-debug-sections=zlib
> >
> >
> > Is it worth recording in the help text?
> > Do you want to send v2?
>
> Yes I'd like to record that information.  I can also record Sedat's
> Tested-by tag.  Thank you for testing Sedat.
>
> I don't know what "linux-image-dbg file" are, or why they would be
> bigger.  The size of the debug info is the primary concern with this
> config.  It sounds like however that file is created might be
> problematic.



As Sedat explained, deb package data
is compressed by xz, which is default.

You can use another compression method,
or disable compression if you desire.



"man dpkg-deb" says as follows:

 -Zcompress-type
  Specify which compression type to use when building a package.
  Allowed  values  are  gzip,  xz  (since  dpkg  1.15.6), and none
  (default is xz).



Kbuild supports KDEB_COMPRESS variable
to change the compression method.
See line 46 of scripts/package/builddeb.



If you are interested,
try "make bindeb-pkg" with/without CONFIG_DEBUG_INFO_COMPRESSED,
and compare the size of the generated debug package.




As Sedat stated,

(plain data) -> compress by gzip  ->  compress by xz

   is often less efficient than

(plain data) -> compress by xz



I hope this is clearer.








> Fangrui, I wasn't able to easily find what version of binutils first
> added support.  Can you please teach me how to fish?
>
> Another question I had for Fangrui is, if the linker can compress
> these sections, shouldn't we just have the linker do it, not the the
> compiler and assembler?  IIUC the debug info can contain relocations,
> so the linker would have to decompress these, perform relocations,
> then recompress these?  I guess having the compiler and assembler
> compress the debug info as well would minimize the size of the .o
> files on disk.
>
> Otherwise I should add this flag to the assembler invocation, too, in
> v2.  Thoughts?
>
> I have a patch series that enables dwarf5 support in the kernel that
> I'm working up to.  I wanted to send this first.  Both roughly reduce
> the debug info size by 20% each, though I haven't measured them
> together, yet.  Requires ToT binutils because there have been many
> fixes from reports of mine recently.
> --
> Thanks,
> ~Nick Desaulniers



-- 
Best Regards
Masahiro Yamada

Re: [PATCH v7 2/3] phy: zynqmp: Add PHY driver for the Xilinx ZynqMP Gigabit Transceiver

2020-05-12 Thread Kishon Vijay Abraham I

Hi Laurent,

On 5/8/2020 6:23 AM, Laurent Pinchart wrote:
> Hi Kishon,
> 
> On Thu, May 07, 2020 at 10:14:45AM +0530, Kishon Vijay Abraham I wrote:
>> On 4/2/2020 3:40 AM, Laurent Pinchart wrote:
>>> From: Anurag Kumar Vulisha 
>>>
>>> Xilinx ZynqMP SoCs have a Gigabit Transceiver with four lanes. All the
>>> high speed peripherals such as USB, SATA, PCIE, Display Port and
>>> Ethernet SGMII can rely on any of the four GT lanes for PHY layer. This
>>> patch adds driver for that ZynqMP GT core.
>>>
>>> Signed-off-by: Anurag Kumar Vulisha 
>>> Signed-off-by: Laurent Pinchart 
>>> ---
>>> Changes since v5:
>>>
>>> - Cleanup headers
>>> - Organize the code in sections
>>> - Constify data tables and structures
>>> - Allocate all PHY instances in one go
>>> - Add I/O accessors
>>> - Move DP-specific init to a separate function
>>> - Use devm_platform_ioremap_resource_byname()
>>> - Simplify acquisition of reset controllers
>>> - Implement .configure() PHY operation for DP
>>> - Implement .power_on() and .power_off() operations
>>> - Wait for PLL lock for DP PHY too
>>> - Remove USB core reset operations
>>> - Fix SGMII bus width settings
>>> - Update copyright notice and authors list
>>> - Disable error messages on probe deferral
>>> - Update reset names to new DT bindings
>>> - Update to removal of subnodes in new DT bindings
>>> - Handle reference clocks through CCF
>>> - Add MAINTAINERS entry
>>> - Drop reset handling
>>> - Split TX term fix to separate function
>>> - Remove unused registers
>>> ---
>>>  MAINTAINERS  |   9 +
>>>  drivers/phy/Kconfig  |   8 +
>>>  drivers/phy/Makefile |   1 +
>>>  drivers/phy/phy-zynqmp.c | 995 +++
>>
>> Better to add a xilinx directory for this driver.
> 
> OK.
> 
>>>  4 files changed, 1013 insertions(+)
>>>  create mode 100644 drivers/phy/phy-zynqmp.c
>>>
>>> diff --git a/MAINTAINERS b/MAINTAINERS
>>> index 07293073c4f6..19e630fcaf62 100644
>>> --- a/MAINTAINERS
>>> +++ b/MAINTAINERS
>>> @@ -18406,6 +18406,15 @@ F: 
>>> Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml
>>>  F: drivers/dma/xilinx/xilinx_dpdma.c
>>>  F: include/dt-bindings/dma/xlnx-zynqmp-dpdma.h
>>>  
>>> +XILINX ZYNQMP GSPTR PHY DRIVER
>>
>> Looks like a typo here,  rest of the place seems to use PSGTR
> 
> Good catch. Will fix it.
> 
>>> +M: Anurag Kumar Vulisha 
>>> +M: Laurent Pinchart 
>>> +L: linux-kernel@vger.kernel.org
>>> +T: git https://github.com/Xilinx/linux-xlnx.git
>>> +S: Supported
>>> +F: Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml
>>> +F: drivers/phy/phy-zynqmp.c
>>> +
>>>  XILLYBUS DRIVER
>>>  M: Eli Billauer 
>>>  L: linux-kernel@vger.kernel.org
>>> diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
>>> index b3ed94b98d9b..b8251b9f3d87 100644
>>> --- a/drivers/phy/Kconfig
>>> +++ b/drivers/phy/Kconfig
>>> @@ -49,6 +49,14 @@ config PHY_XGENE
>>> help
>>>   This option enables support for APM X-Gene SoC multi-purpose PHY.
>>>  
>>> +config PHY_XILINX_ZYNQMP
>>> +   tristate "Xilinx ZynqMP PHY driver"
>>> +   depends on ARCH_ZYNQMP
>>> +   select GENERIC_PHY
>>> +   help
>>> + Enable this to support ZynqMP High Speed Gigabit Transceiver
>>> + that is part of ZynqMP SoC.
>>> +
>>>  source "drivers/phy/allwinner/Kconfig"
>>>  source "drivers/phy/amlogic/Kconfig"
>>>  source "drivers/phy/broadcom/Kconfig"
>>> diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
>>> index 310c149a9df5..5dc7469f078b 100644
>>> --- a/drivers/phy/Makefile
>>> +++ b/drivers/phy/Makefile
>>> @@ -8,6 +8,7 @@ obj-$(CONFIG_GENERIC_PHY_MIPI_DPHY) += phy-core-mipi-dphy.o
>>>  obj-$(CONFIG_PHY_LPC18XX_USB_OTG)  += phy-lpc18xx-usb-otg.o
>>>  obj-$(CONFIG_PHY_XGENE)+= phy-xgene.o
>>>  obj-$(CONFIG_PHY_PISTACHIO_USB)+= phy-pistachio-usb.o
>>> +obj-$(CONFIG_PHY_XILINX_ZYNQMP)+= phy-zynqmp.o
>>>  obj-$(CONFIG_ARCH_SUNXI)   += allwinner/
>>>  obj-$(CONFIG_ARCH_MESON)   += amlogic/
>>>  obj-$(CONFIG_ARCH_MEDIATEK)+= mediatek/
>>> diff --git a/drivers/phy/phy-zynqmp.c b/drivers/phy/phy-zynqmp.c
>>> new file mode 100644
>>> index ..8ab99d6b9220
>>> --- /dev/null
>>> +++ b/drivers/phy/phy-zynqmp.c
>>> @@ -0,0 +1,995 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/*
>>> + * phy-zynqmp.c - PHY driver for Xilinx ZynqMP GT.
>>> + *
>>> + * Copyright (C) 2018-20 Xilinx Inc.
>>> + *
>>> + * Author: Anurag Kumar Vulisha 
>>> + * Author: Subbaraya Sundeep 
>>> + * Author: Laurent Pinchart 
>>> + *
>>> + * This driver is tested for USB, SATA and Display Port currently.
>>> + * Other controllers PCIe and SGMII should also work but that is
>>> + * experimental as of now.
>>> + */
>>> +
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +
>>> +#include 
>>> +
>>> +/*
>>> + * Lane Registers
>>> + */
>>> +
>>> +/* TX De-emphasis parameters */
>>> +#define

Re: [PATCH v2 5/5] ramoops: add max_reason optional field to ramoops DT node

2020-05-12 Thread Rob Herring

On Tue, May 05, 2020 at 11:45:10AM -0400, Pavel Tatashin wrote:
> Currently, it is possible to dump kmsges for panic, or oops.
> With max_reason it is possible to dump messages for other
> kmesg_dump events, for example reboot, halt, shutdown, kexec.
> 
> Signed-off-by: Pavel Tatashin 
> ---
>  .../devicetree/bindings/reserved-memory/ramoops.txt| 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/reserved-memory/ramoops.txt 
> b/Documentation/devicetree/bindings/reserved-memory/ramoops.txt
> index 0eba562fe5c6..886cff15d822 100644
> --- a/Documentation/devicetree/bindings/reserved-memory/ramoops.txt
> +++ b/Documentation/devicetree/bindings/reserved-memory/ramoops.txt
> @@ -30,7 +30,7 @@ Optional properties:
>  - ecc-size: enables ECC support and specifies ECC buffer size in bytes
>(defaults to 0: no ECC)
>  
> -- record-size: maximum size in bytes of each dump done on oops/panic
> +- record-size: maximum size in bytes of each kmsg dump.
>(defaults to 0: disabled)
>  
>  - console-size: size in bytes of log buffer reserved for kernel messages
> @@ -45,7 +45,13 @@ Optional properties:
>  - unbuffered: if present, use unbuffered mappings to map the reserved region
>(defaults to buffered mappings)
>  
> -- no-dump-oops: if present, only dump panics (defaults to panics and oops)
> +- max_reason: maximum reason for kmsg dump. Defaults to 2 (dump oops and

max-reason

> +  panics). Can be set to INT_MAX to dump for all reasons. See
> +  include/linux/kmsg_dump.h KMSG_DUMP_* for other kmsg dump values.
> +
> +- no-dump-oops: deprecated, use max_reason instead.
> +  if present, and max_reason is not specified is equivalent to
> +  max_reason = 1 (KMSG_DUMP_PANIC).
>  
>  - flags: if present, pass ramoops behavioral flags (defaults to 0,
>see include/linux/pstore_ram.h RAMOOPS_FLAG_* for flag values).
> -- 
> 2.25.1
>

[loop]efcfec579: BUG:blk_update_request: I/O error, dev loop6, sector 49674 op 0x9:(WRITE_ZEROES)

2020-05-12 Thread Xu, Yanfei


Hi,

After operating the /dev/loop which losetup with an image placed in tmpfs,

I got the following ERROR messages:

[cut here]-

[  183.110770] blk_update_request: I/O error, dev loop6, sector 524160 
op 0x9:(WRITE_ZEROES) flags 0x1000800 phys_seg 0 prio class 0
[  183.123949] blk_update_request: I/O error, dev loop6, sector 522 op 
0x9:(WRITE_ZEROES) flags 0x1000800 phys_seg 0 prio class 0
[  183.137123] blk_update_request: I/O error, dev loop6, sector 16906 op 
0x9:(WRITE_ZEROES) flags 0x1000800 phys_seg 0 prio class 0
[  183.150314] blk_update_request: I/O error, dev loop6, sector 32774 op 
0x9:(WRITE_ZEROES) flags 0x1000800 phys_seg 0 prio class 0
[  183.163551] blk_update_request: I/O error, dev loop6, sector 49674 op 
0x9:(WRITE_ZEROES) flags 0x1000800 phys_seg 0 prio class 0
[  183.176824] blk_update_request: I/O error, dev loop6, sector 65542 op 
0x9:(WRITE_ZEROES) flags 0x1000800 phys_seg 0 prio class 0
[  183.190029] blk_update_request: I/O error, dev loop6, sector 82442 op 
0x9:(WRITE_ZEROES) flags 0x1000800 phys_seg 0 prio class 0
[  183.203281] blk_update_request: I/O error, dev loop6, sector 98310 op 
0x9:(WRITE_ZEROES) flags 0x1000800 phys_seg 0 prio class 0
[  183.216531] blk_update_request: I/O error, dev loop6, sector 115210 
op 0x9:(WRITE_ZEROES) flags 0x1000800 phys_seg 0 prio class 0
[  183.229914] blk_update_request: I/O error, dev loop6, sector 131078 
op 0x9:(WRITE_ZEROES) flags 0x1000800 phys_seg 0 prio class 0



I have found the commit which introduce this issue by git bisect :

commit :efcfec57[loop: fix no-unmap write-zeroes request behavior]


Kernrel version: Linux version 5.6.0

Frequency: every time

steps to reproduce:

  1.git clone mainline kernel

  2.compile kernel with ARCH=x86_64, and then boot the system with it

(seems other arch also can reproduce it )

  3.make an image by "dd of=/tmp/image if=/dev/zero bs=1M count=256"

  4.place the image in tmpfs directory

  5.losetup /dev/loop6 /PATH/image

  6.mkfs.ext2 /dev/loop6


Any comments will be appreciated.


Thanks,

Yanfei

Re: [PATCH 02/11] dt-bindings: new: add yaml bindings for MediaTek Ethernet MAC

2020-05-12 Thread Rob Herring

On Tue, May 05, 2020 at 04:02:22PM +0200, Bartosz Golaszewski wrote:
> From: Bartosz Golaszewski 
> 
> This adds yaml DT bindings for the MediaTek Ethernet MAC present on the
> mt8* family of SoCs.
> 
> Signed-off-by: Bartosz Golaszewski 
> ---
>  .../bindings/net/mediatek,eth-mac.yaml| 80 +++
>  1 file changed, 80 insertions(+)
>  create mode 100644 
> Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml
> 
> diff --git a/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml 
> b/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml
> new file mode 100644
> index ..7682fe9d8109
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml
> @@ -0,0 +1,80 @@
> +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/net/mediatek,eth-mac.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: MediaTek Ethernet MAC Controller
> +
> +maintainers:
> +  - Bartosz Golaszewski 
> +
> +description:
> +  This Ethernet MAC is used on the MT8* family of SoCs from MediaTek.
> +  It's compliant with 802.3 standards and supports half- and full-duplex
> +  modes with flow-control as well as CRC offloading and VLAN tags.
> +
> +properties:
> +  compatible:
> +enum:
> +  - mediatek,mt8516-eth
> +  - mediatek,mt8518-eth
> +  - mediatek,mt8175-eth
> +
> +  reg:
> +maxItems: 1
> +
> +  interrupts:
> +maxItems: 1
> +
> +  clocks:
> +minItems: 3
> +maxItems: 3
> +
> +  clock-names:
> +additionalItems: false
> +items:
> +  - const: core
> +  - const: reg
> +  - const: trans
> +
> +  mediatek,pericfg:
> +$ref: /schemas/types.yaml#definitions/phandle
> +description:
> +  Phandle to the device containing the PERICFG register range.

Perhaps say what it is used for?

> +
> +required:
> +  - compatible
> +  - reg
> +  - interrupts
> +  - clocks
> +  - clock-names
> +  - mediatek,pericfg
> +  - phy-handle
> +
> +examples:
> +  - |
> +#include 
> +#include 
> +
> +ethernet: ethernet@1118 {
> +compatible = "mediatek,mt8516-eth";
> +reg = <0 0x1118 0 0x1000>;

Default addr and size is 1 cell.

> +mediatek,pericfg = <>;
> +interrupts = ;
> +clocks = < CLK_TOP_RG_ETH>,
> + < CLK_TOP_66M_ETH>,
> + < CLK_TOP_133M_ETH>;
> +clock-names = "core", "reg", "trans";
> +phy-handle = <_phy>;
> +phy-mode = "rmii";
> +
> +mdio {

Not documented.

> +#address-cells = <1>;
> +#size-cells = <0>;
> +
> +eth_phy: ethernet-phy@0 {
> +reg = <0>;
> +};
> +};
> +};
> -- 
> 2.25.0
>

Re: [PATCH 01/11] dt-bindings: add a binding document for MediaTek PERICFG controller

2020-05-12 Thread Rob Herring

On Tue, May 05, 2020 at 04:02:21PM +0200, Bartosz Golaszewski wrote:
> From: Bartosz Golaszewski 
> 
> This adds a binding document for the PERICFG controller present on
> MediaTek SoCs. For now the only variant supported is 'mt8516-pericfg'.
> 
> Signed-off-by: Bartosz Golaszewski 
> ---
>  .../arm/mediatek/mediatek,pericfg.yaml| 34 +++
>  1 file changed, 34 insertions(+)
>  create mode 100644 
> Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml
> 
> diff --git 
> a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml 
> b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml
> new file mode 100644
> index ..74b2a6173ffb
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml
> @@ -0,0 +1,34 @@
> +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
> +%YAML 1.2
> +---
> +$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,pericfg.yaml#;
> +$schema: "http://devicetree.org/meta-schemas/core.yaml#;
> +
> +title: MediaTek Peripheral Configuration Controller
> +
> +maintainers:
> +  - Bartosz Golaszewski 
> +
> +properties:
> +  compatible:
> +oneOf:

Don't need oneOf.

> +  - items:
> +- enum:
> +  - mediatek,pericfg

PERICFG is exactly the same register set and functions on all Mediatek 
SoCs? Needs to be more specific.

> +- const: syscon
> +
> +  reg:
> +maxItems: 1
> +
> +required:
> +  - compatible
> +  - reg
> +
> +additionalProperties: false
> +
> +examples:
> +  - |
> +pericfg: pericfg@10003050 {
> +compatible = "mediatek,mt8516-pericfg", "syscon";
> +reg = <0 0x10003050 0 0x1000>;

Default for examples is 1 cell for addr and size.

> +};
> -- 
> 2.25.0
>

Re: [PATCH v1 2/2] phy: phy-cadence-torrent: Use PHY kernel APIs to set PHY attributes

2020-05-12 Thread Kishon Vijay Abraham I

Hi,

On 5/8/2020 1:20 PM, Tomi Valkeinen wrote:
> On 07/05/2020 20:17, Maxime Ripard wrote:
> 
>>> Actually, for this particular case, consumer driver will be the Cadence MHDP
>>> bridge driver for DisplayPort which is also under review process for
>>> upstreaming [1]. So this DRM bridge driver will make use of the PHY APIs
>>> phy_get_bus_width() and phy_get_max_link_rate() during execution of probe
>>> function to get the number of lanes and maximum link rate supported by 
>>> Cadence
>>> Torrent PHY. This information is required to set the host capabilities in 
>>> the
>>> DRM bridge driver, based on which initial values for DisplayPort link 
>>> training
>>> will be determined.
>>>
>>> The changes in this PHY patch series are based on suggestions in the review
>>> comments in [1] which asks to use kernel PHY APIs to read these properties
>>> instead of directly accessing PHY device node. The complete driver and 
>>> actual
>>> use of these APIs can be found in [2]. This is how we are planning to use
>>> these APIs.
>>
>> I haven't really looked into the displayport spec, but I'd assume that 
>> there's a
>> lot more parameters that would need to be negociated between the phy and the 
>> DP
>> block? If so, then it would make more sense to follow the path we did for
>> MIPI-DSI where the parameters can be negociated through the phy_configure /
>> phy_validate interface.
> 
> I don't think this is negotiation, but just exposing the (max) capabilities of
> PHY, inside which the configure can work. Maybe all the capabilities could
> handled with a struct (struct phy_attrs), instead of adding separate functions
> for each, though.

yeah, that makes sense. Just that users should take care not to over-write all
the phy attributes with partial information.

Thanks
Kishon

Re: [PATCH v1] dt-bindings: net: nxp,tja11xx: rework validation support

2020-05-12 Thread Rob Herring

On Tue,  5 May 2020 12:42:15 +0200, Oleksij Rempel wrote:
> To properly identify this node, we need to use ethernet-phy-id0180.dc80.
> And add missing required properties.
> 
> Signed-off-by: Oleksij Rempel 
> ---
>  .../devicetree/bindings/net/nxp,tja11xx.yaml  | 55 ---
>  1 file changed, 35 insertions(+), 20 deletions(-)
> 

Reviewed-by: Rob Herring

Re: [Patch v2] efi: cper: Add support for printing Firmware Error Record Reference

2020-05-12 Thread Punit Agrawal

Ard Biesheuvel  writes:

> On Tue, 12 May 2020 at 06:55, Punit Agrawal
>  wrote:
>>
>> While debugging a boot failure, the following unknown error record was
>> seen in the boot logs.
>>
>> <...>
>> BERT: Error records from previous boot:
>> [Hardware Error]: event severity: fatal
>> [Hardware Error]:  Error 0, type: fatal
>> [Hardware Error]:   section type: unknown, 
>> 81212a96-09ed-4996-9471-8d729c8e69ed
>> [Hardware Error]:   section length: 0x290
>> [Hardware Error]:   : 0001   00020002  
>> 
>> [Hardware Error]:   0010: 00020002 001f 0320   
>>  ...
>> [Hardware Error]:   0020:      
>> 
>> [Hardware Error]:   0030:      
>> 
>> <...>
>>
>> On further investigation, it was found that the error record with
>> UUID (81212a96-09ed-4996-9471-8d729c8e69ed) has been defined in the
>> UEFI Specification at least since v2.4 and has recently had additional
>> fields defined in v2.7 Section N.2.10 Firmware Error Record Reference.
>>
>> Add support for parsing and printing the defined fields to give users
>> a chance to figure out what went wrong.
>>
>> Signed-off-by: Punit Agrawal 
>> Cc: Ard Biesheuvel 
>> Cc: "Rafael J. Wysocki" 
>> Cc: Borislav Petkov 
>> Cc: James Morse 
>> Cc: linux-a...@vger.kernel.org
>> Cc: linux-...@vger.kernel.org
>> ---
>> Hi Ard,
>>
>> I've updated the patch based on your feedback.
>>
>> As you noted, some aspects of the spec make it a bit tricky to support
>> all revisions in a nice way (e.g., size check) but this version should
>> fix existing issues.
>>
>> Thanks,
>> Punit
>>
>> v1[0] -> v2:
>> * Simplified error record structure definition
>> * Fixed size check
>> * Added comment to clarify offset calculation for dumped data
>> * Style fixes for multiline if blocks
>
> Thanks. I will queue this as a fix.

Thanks!

Just for my understanding - are you planning to send this for v5.7 or
v5.8? There's no rush, so I am fine either ways.

[...]

Re: [PATCH v1] dt-bindings: net: nxp,tja11xx: rework validation support

2020-05-12 Thread Rob Herring

On Tue, May 05, 2020 at 04:01:27PM +0200, Andrew Lunn wrote:
> On Tue, May 05, 2020 at 12:42:15PM +0200, Oleksij Rempel wrote:
> > To properly identify this node, we need to use ethernet-phy-id0180.dc80.
> > And add missing required properties.
> > 
> > Signed-off-by: Oleksij Rempel 
> > ---
> >  .../devicetree/bindings/net/nxp,tja11xx.yaml  | 55 ---
> >  1 file changed, 35 insertions(+), 20 deletions(-)
> > 
> > diff --git a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml 
> > b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
> > index 42be0255512b3..cc322107a24a2 100644
> > --- a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
> > +++ b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
> > @@ -1,4 +1,4 @@
> > -# SPDX-License-Identifier: GPL-2.0+
> > +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> >  %YAML 1.2
> >  ---
> >  $id: http://devicetree.org/schemas/net/nxp,tja11xx.yaml#
> > @@ -12,44 +12,59 @@ maintainers:
> >- Heiner Kallweit 
> >  
> >  description:
> > -  Bindings for NXP TJA11xx automotive PHYs
> > +  Bindings for the NXP TJA1102 automotive PHY. This is a dual PHY package 
> > where
> > +  only the first PHY has global configuration register and HW health
> > +  monitoring.
> >  
> > -allOf:
> > -  - $ref: ethernet-phy.yaml#
> > +properties:
> > +  compatible:
> > +const: ethernet-phy-id0180.dc80
> > +description: ethernet-phy-id0180.dc80 used for TJA1102 PHY
> > +
> > +  reg:
> > +minimum: 0
> > +maximum: 14
> > +description:
> > +  The PHY address of the parent PHY.
> 
> Hi Oleksij
> 
> reg is normally 0 to 31, since that is the address range for MDIO. 
> Did you use 14 here because of what strapping allows?
> 
> > +required:
> > +  - compatible
> > +  - reg
> > +  - '#address-cells'
> > +  - '#size-cells'
> 
> So we have two different meanings of 'required' here.
> 
> One meaning is the code requires it. compatible is not required, the
> driver will correctly be bind to the device based on its ID registers.
> Is reg also required by the code?
> 
> The second meaning is about keeping the yaml verifier happy. It seems
> like compatible is needed for the verifier. Is reg also required? We
> do recommend having reg, but the generic code does not require it.

Well, you have to be able to match a discoverable device to a DT node. 
Unless you only have one thing on the bus (how would you know though, 
they're discoverable?), that's with reg. And if you need to say turn on 
a regulator for the device to be discovered, then you need compatible to 
know how to do that.

So either don't describe the device in DT because you can discover 
everything or you describe it in DT with both 'compatible' and 'reg'. 
MDIO is not special.

Rob

Re: [PATCH net-next 1/4] net: ethernet: validate pause autoneg setting

2020-05-12 Thread Doug Berger

On 5/12/2020 12:08 PM, Michal Kubecek wrote:
> On Tue, May 12, 2020 at 11:31:39AM -0700, Doug Berger wrote:
>> On 5/11/2020 5:47 PM, Andrew Lunn wrote:
>>> On Mon, May 11, 2020 at 05:24:07PM -0700, Doug Berger wrote:
 A comment in uapi/linux/ethtool.h states "Drivers should reject a
 non-zero setting of @autoneg when autoneogotiation is disabled (or
 not supported) for the link".

 That check should be added to phy_validate_pause() to consolidate
 the code where possible.

 Fixes: 22b7d29926b5 ("net: ethernet: Add helper to determine if pause 
 configuration is supported")
>>>
>>> Hi Doug
>>>
>>> If this is a real fix, please submit this to net, not net-next.
>>>
>>>Andrew
>>>
>> This was intended as a fix, but I thought it would be better to keep it
>> as part of this set for context and since net-next is currently open.
>>
>> The context is trying to improve the phylib support for offloading
>> ethtool pause configuration and this is something that could be checked
>> in a single location rather than by individual drivers.
>>
>> I included it here to get feedback about its appropriateness as a common
>> behavior. I should have been more explicit about that.
>>
>> Personally, I'm actually not that fond of this change since it can
>> easily be a source of confusion with the ethtool interface because the
>> link autonegotiation and the pause autonegotiation are controlled by
>> different commands.
>>
>> Since the ethtool -A command performs a read/modify/write of pause
>> parameters, you can get strange results like these:
>> # ethtool -s eth0 speed 100 duplex full autoneg off
>> # ethtool -A eth0 tx off
>> Cannot set device pause parameters: Invalid argument
>> #
>> Because, the get read pause autoneg as enabled and only the tx_pause
>> member of the structure was updated.
> 
> This would be indeed unfortunate. We could use extack to make the error
> message easier to understand but the real problem IMHO is that
> ethtool_ops::get_pauseparam() returns value which is rejected by
> ethtool_ops::set_pauseparam(). This is something we should avoid.
> 
> If we really wanted to reject ethtool_pauseparam::autoneg on when
> general autonegotiation is off, we would have to disable pause
> autonegotiation whenever general autonegotiation is disabled. I don't
> like that idea, however, as that would mean that
> 
>   ethtool -s dev autoneg off ...
>   ethtool -s dev autoneg on ...
> 
> would reset the setting of pause autonegotiation.
> 
> Therefore I believe the comment should be rather replaced by a warning
> that even if ethtool_pauseparam::autoneg is enabled, pause
> autonegotiation is only active if general autonegotiation is also
> enabled.
> 
> Michal
> 
Thanks for your reply.

I agree with your concerns and will remove this commit from the set when
I resubmit. I also favor replacing the comment in ethtool.h.

-Doug

Re: [RESEND PATCH v2 1/3] dt-bindings: phy: Drop reset-gpios from marvell,mmp3-hsic-phy

2020-05-12 Thread Kishon Vijay Abraham I

+Rob

On 5/9/2020 1:47 PM, Lubomir Rintel wrote:
> This has been added in error -- the PHY block doesn't have a reset pin.
> 
> Signed-off-by: Lubomir Rintel 
> ---
>  .../devicetree/bindings/phy/marvell,mmp3-hsic-phy.yaml | 7 ---
>  1 file changed, 7 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/phy/marvell,mmp3-hsic-phy.yaml 
> b/Documentation/devicetree/bindings/phy/marvell,mmp3-hsic-phy.yaml
> index 00609ace677c..30e290c57930 100644
> --- a/Documentation/devicetree/bindings/phy/marvell,mmp3-hsic-phy.yaml
> +++ b/Documentation/devicetree/bindings/phy/marvell,mmp3-hsic-phy.yaml
> @@ -18,27 +18,20 @@ properties:
>  maxItems: 1
>  description: base address of the device
>  
> -  reset-gpios:
> -maxItems: 1
> -description: GPIO connected to reset
> -
>"#phy-cells":
>  const: 0
>  
>  required:
>- compatible
>- reg
> -  - reset-gpios
>- "#phy-cells"
>  
>  additionalProperties: false
>  
>  examples:
>- |
> -#include 
>  hsic-phy@f0001800 {
>  compatible = "marvell,mmp3-hsic-phy";
>  reg = <0xf0001800 0x40>;
> -reset-gpios = < 63 GPIO_ACTIVE_HIGH>;
>  #phy-cells = <0>;
>  };
>

Re: [PATCH v3] security/keys: rewrite big_key crypto to use library interface

2020-05-12 Thread Jason A. Donenfeld

On Tue, May 12, 2020 at 4:03 PM David Howells  wrote:
>
> Jason A. Donenfeld  wrote:
>
> > So long as that ->update function:
> > 1. Deletes the old on-disk data.
> > 2. Deletes the old key from the inode.
> > 3. Generates a new key using get_random_bytes.
> > 4. Stores that new key in the inode.
> > 5. Encrypts the updated data afresh with the new key.
> > 6. Puts the updated data onto disk,
> >
> > then this is fine with me, and feel free to have my Acked-by if you
> > want. But if it doesn't do that -- i.e. if it tries to reuse the old
> > key or similar -- then this isn't fine. But it sounds like from what
> > you've described that things are actually fine, in which case, I guess
> > it makes sense to apply your patch ontop of mine and commit these.
>
> Yep.  It calls big_key_destroy(), which clears away the old stuff just as when
> a key is being destroyed, then generic_key_instantiate() just as when a key is
> being set up.
>
> The key ID and the key metadata (ownership, perms, expiry) are maintained, but
> the payload is just completely replaced.

Okay, in that case, take my:

Acked-by: Jason A. Donenfeld 

And then perhaps you can take both my patch and your addendum into keys-next.

Jason

Re: [PATCH 3/5] exec: Remove recursion from search_binary_handler

2020-05-12 Thread Rob Landley




On 5/12/20 7:20 PM, Linus Torvalds wrote:
> On Tue, May 12, 2020 at 11:46 AM Eric W. Biederman
>  wrote:
>>
>> I am still thinking about this one, but here is where I am at.  At a
>> practical level passing the file descriptor of the script to interpreter
>> seems like something we should encourage in the long term.  It removes
>> races and it is cheaper because then the interpreter does not have to
>> turn around and open the script itself.
> 
> Yeah, I think we should continue to support it, because I think it's
> the right thing to do (and we might just end up having compatibility
> issues if we don't).
...
>> It is possible although unlikely for userspace to find the file
>> descriptor without consulting AT_EXECFD so just to be conservative I
>> think we should install the file descriptor in begin_new_exec even if
>> the next interpreter does not support AT_EXECFD.
> 
> Ack. I think the AT_EXECFD thing is a sign that this isn't internal to
> binfmt_misc, but it also shouldn't be gating this issue. In reality,
> ELF is the only real binary format that matters - the script/misc
> binfmts are just indirection entries - and it supports AT_EXECFD, so
> let's just ignore the theoretical case of "maybe nobody exposes it".

Would this potentially make the re-exec-yourself case easier to do at some
point? (Which nommu needs to do, and /proc/self/exe isn't always available.)

Here's the first time I asked about that:

https://lore.kernel.org/lkml/200612261823.07927@landley.net/

Here's the most recent:

https://lkml.org/lkml/2017/9/5/246

Here's someone else asking and being basically told "chroot isn't a thing":

http://lkml.iu.edu/hypermail/linux/kernel/0906.3/00584.html

(See also "CVE-2019-5736" and the workarounds thereto.)

Rob

P.S. Yes I'm aware it would only work properly with static binaries. Not the
first thing that's true for.

Re: [PATCH v6] streamline_config.pl: add LMC_KEEP to preserve some kconfigs

2020-05-12 Thread Masahiro Yamada

On Wed, May 13, 2020 at 12:36 AM Changbin Du  wrote:
>
> Sometimes it is useful to preserve batches of configs when making
> localmodconfig. For example, I usually don't want any usb and fs
> modules to be disabled. Now we can do it by:
>
>  $ make LMC_KEEP="drivers/usb:fs" localmodconfig
>
> Signed-off-by: Changbin Du 
> Acked-by: Steven Rostedt (VMware) 
>

Applied to linux-kbuild.
Thanks.


> ---
> v6: add note for localyesconfig.
> v5: use ':' as delimiter.
> v4: fix typo.
> v3: rename LOCALMODCONFIG_PRESERVE to shorter LMC_KEEP.
> v2: fix typo in documentation. (Randy Dunlap)
> ---
>  Documentation/admin-guide/README.rst | 11 +--
>  scripts/kconfig/Makefile |  2 ++
>  scripts/kconfig/streamline_config.pl | 21 +
>  3 files changed, 32 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/admin-guide/README.rst 
> b/Documentation/admin-guide/README.rst
> index cc6151fc0845..5fb526900023 100644
> --- a/Documentation/admin-guide/README.rst
> +++ b/Documentation/admin-guide/README.rst
> @@ -209,15 +209,22 @@ Configuring the kernel
> store the lsmod of that machine into a file
> and pass it in as a LSMOD parameter.
>
> +   Also, you can preserve modules in certain folders
> +   or kconfig files by specifying their paths in
> +   parameter LMC_KEEP.
> +
> target$ lsmod > /tmp/mylsmod
> target$ scp /tmp/mylsmod host:/tmp
>
> -   host$ make LSMOD=/tmp/mylsmod localmodconfig
> +   host$ make LSMOD=/tmp/mylsmod \
> +   LMC_KEEP="drivers/usb:drivers/gpu:fs" \
> +   localmodconfig
>
> The above also works when cross compiling.
>
>   "make localyesconfig" Similar to localmodconfig, except it will convert
> -   all module options to built in (=y) options.
> +   all module options to built in (=y) options. You 
> can
> +   also preserve modules by LMC_KEEP.
>
>   "make kvmconfig"   Enable additional options for kvm guest kernel 
> support.
>
> diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
> index c9d0a4a8efb3..f3355bd86aa5 100644
> --- a/scripts/kconfig/Makefile
> +++ b/scripts/kconfig/Makefile
> @@ -123,7 +123,9 @@ help:
> @echo  '  gconfig - Update current config utilising a GTK+ 
> based front-end'
> @echo  '  oldconfig   - Update current config utilising a 
> provided .config as base'
> @echo  '  localmodconfig  - Update current config disabling modules 
> not loaded'
> +   @echo  'except those preserved by LMC_KEEP 
> environment variable'
> @echo  '  localyesconfig  - Update current config converting local 
> mods to core'
> +   @echo  'except those preserved by LMC_KEEP 
> environment variable'
> @echo  '  defconfig   - New config with default from ARCH 
> supplied defconfig'
> @echo  '  savedefconfig   - Save current config as ./defconfig 
> (minimal config)'
> @echo  '  allnoconfig - New config where all options are answered 
> with no'
> diff --git a/scripts/kconfig/streamline_config.pl 
> b/scripts/kconfig/streamline_config.pl
> index e2f8504f5a2d..19857d18d814 100755
> --- a/scripts/kconfig/streamline_config.pl
> +++ b/scripts/kconfig/streamline_config.pl
> @@ -143,6 +143,7 @@ my %depends;
>  my %selects;
>  my %prompts;
>  my %objects;
> +my %config2kfile;
>  my $var;
>  my $iflevel = 0;
>  my @ifdeps;
> @@ -201,6 +202,7 @@ sub read_kconfig {
> if (/^\s*(menu)?config\s+(\S+)\s*$/) {
> $state = "NEW";
> $config = $2;
> +   $config2kfile{"CONFIG_$config"} = $kconfig;
>
> # Add depends for 'if' nesting
> for (my $i = 0; $i < $iflevel; $i++) {
> @@ -591,6 +593,20 @@ while ($repeat) {
>  }
>
>  my %setconfigs;
> +my @preserved_kconfigs = split(/:/,$ENV{LMC_KEEP});
> +
> +sub in_preserved_kconfigs {
> +my $kconfig = $config2kfile{$_[0]};
> +if (!defined($kconfig)) {
> +return 0;
> +}
> +foreach my $excl (@preserved_kconfigs) {
> +if($kconfig =~ /^$excl/) {
> +return 1;
> +}
> +}
> +return 0;
> +}
>
>  # Finally, read the .config file and turn off any module enabled that
>  # we could not find a reason to keep enabled.
> @@ -644,6 +660,11 @@ foreach my $line (@config_file) {
>  }
>
>  if (/^(CONFIG.*)=(m|y)/) {
> +if (in_preserved_kconfigs($1)) {
> +dprint "Preserve config $1";
> +print;
> +next;
> +}
> if (defined($configs{$1})) {
> if ($localyesconfig) {
> $setconfigs{$1} = 'y';
> --
> 2.25.1
>


-- 
Best Regards
Masahiro Yamada

Re: [PATCH v4 10/10] loop: Add LOOP_CONFIGURE ioctl

2020-05-12 Thread Jens Axboe

On 5/12/20 8:29 PM, Jens Axboe wrote:
> On 5/12/20 12:46 AM, Martijn Coenen wrote:
>> Hi Jens,
>>
>> What do you think of this series?
> 
> Looks acceptable to me, but I'm getting a failure applying it to
> for-5.8/drivers on this patch:
> 
> Applying: loop: Refactor loop_set_status() size calculation
> 
> So you'll probably want to respin on the right branch.

Then you can also drop patch #1.

-- 
Jens Axboe

Re: [PATCH] perf record: Use an eventfd to wakeup when done

2020-05-12 Thread Anand K. Mistry

On Wed, 13 May 2020 at 00:12, Arnaldo Carvalho de Melo
 wrote:
>
> Em Tue, May 12, 2020 at 02:12:32PM +0200, Jiri Olsa escreveu:
> > On Tue, May 12, 2020 at 02:59:36PM +1000, Anand K Mistry wrote:
> >
> > SNIP
> >
> > > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> > > index 1ab349abe90469..099ecaa66732a2 100644
> > > --- a/tools/perf/builtin-record.c
> > > +++ b/tools/perf/builtin-record.c
> > > @@ -53,6 +53,7 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > +#include 
> > >  #include 
> > >  #include 
> > >  #include 
> > > @@ -518,15 +519,28 @@ static int record__pushfn(struct mmap *map, void 
> > > *to, void *bf, size_t size)
> > >
> > >  static volatile int signr = -1;
> > >  static volatile int child_finished;
> > > +static int done_fd = -1;
> > >
> > >  static void sig_handler(int sig)
> > >  {
> > > +   u64 tmp = 1;
> > > if (sig == SIGCHLD)
> > > child_finished = 1;
> > > else
> > > signr = sig;
> > >
> > > done = 1;
> > > +
> > > +   /*
> > > +* It is possible for this signal handler to run after done is checked
> > > +* in the main loop, but before the perf counter fds are polled. If 
> > > this
> > > +* happens, the poll() will continue to wait even though done is set,
> > > +* and will only break out if either another signal is received, or 
> > > the
> > > +* counters are ready for read. To ensure the poll() doesn't sleep 
> > > when
> > > +* done is set, use an eventfd (done_fd) to wake up the poll().
> > > +*/
> > > +   if (write(done_fd, , sizeof(tmp)) < 0)
> > > +   pr_err("failed to signal wakeup fd\n");
> > >  }
> > >
> > >  static void sigsegv_handler(int sig)
> > > @@ -1424,6 +1438,17 @@ static int __cmd_record(struct record *rec, int 
> > > argc, const char **argv)
> > > int fd;
> > > float ratio = 0;
> > >
> > > +   done_fd = eventfd(0, EFD_NONBLOCK);
> > > +   if (done_fd < 0) {
> > > +   pr_err("Failed to create wakeup eventfd, error: %m\n");
> > > +   return -1;
> > > +   }
> > > +   err = evlist__add_pollfd(rec->evlist, done_fd);
> > > +   if (err < 0) {
> > > +   pr_err("Failed to add wakeup eventfd to poll list\n");
> > > +   return -1;
> > > +   }
> >
> > sorry I did not notice before, but I think we also
> > need to close done_fd descriptor on the exit path
> >
> > also please change subject to PATCHv3 for the next version

Apologies. I'm still getting the hang of this.

>
> Yeah, and, and don't take this as a requirement for this patch to be
> processed, this can be made as a follow up patch by you or someone else
> (me, maybe :)), that maybe tools/perf/builtin-top.c and
> tools/perf/builtin-trace.c have the same issue?
>
> Could you please take a look there as well?

I looked at 'top', 'trace', and 'kvm'. kvm doesn't really have this
issue because
the poll() has a 100ms timeout. Even though it's technically affected,
the timeout
will make it unnoticeable (just delaying the exit for 100ms). top is
in the same boat
(uses a timeout).

trace is the affected one because it has the following code:
int timeout = done ? 100 : -1;
if (!draining && evlist__poll(evlist, timeout) > 0) {

Different logic, but still a gap and an indefinite timeout.

>
> - Arnaldo
>
> > thanks,
> > jirka
> >
> > > +
> > > atexit(record__sig_exit);
> > > signal(SIGCHLD, sig_handler);
> > > signal(SIGINT, sig_handler);
> > > --
> > > 2.26.2.645.ge9eca65c58-goog
> > >
> >
>
> --
>
> - Arnaldo



-- 
Anand K. Mistry
Software Engineer
Google Australia

[PATCH v2 net-next 1/3] net: dsa: felix: qos classified based on pcp

2020-05-12 Thread Xiaoliang Yang

Set the default QoS Classification based on PCP and DEI of vlan tag,
after that, frames can be Classified to different Qos based on PCP tag.
If there is no vlan tag or vlan ignored, use port default Qos.

Signed-off-by: Xiaoliang Yang 
---
 drivers/net/dsa/ocelot/felix.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index a2dfd73f8a1a..58d6b0f454e5 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -289,6 +289,27 @@ static void felix_phylink_mac_link_up(struct dsa_switch 
*ds, int port,
 QSYS_SWITCH_PORT_MODE, port);
 }
 
+static void felix_port_qos_map_init(struct ocelot *ocelot, int port)
+{
+   int i;
+
+   ocelot_rmw_gix(ocelot,
+  ANA_PORT_QOS_CFG_QOS_PCP_ENA,
+  ANA_PORT_QOS_CFG_QOS_PCP_ENA,
+  ANA_PORT_QOS_CFG,
+  port);
+
+   for (i = 0; i < FELIX_NUM_TC * 2; i++) {
+   ocelot_rmw_ix(ocelot,
+ (ANA_PORT_PCP_DEI_MAP_DP_PCP_DEI_VAL & i) |
+ ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL(i),
+ ANA_PORT_PCP_DEI_MAP_DP_PCP_DEI_VAL |
+ ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL_M,
+ ANA_PORT_PCP_DEI_MAP,
+ port, i);
+   }
+}
+
 static void felix_get_strings(struct dsa_switch *ds, int port,
  u32 stringset, u8 *data)
 {
@@ -547,6 +568,11 @@ static int felix_setup(struct dsa_switch *ds)
ocelot_configure_cpu(ocelot, port,
 OCELOT_TAG_PREFIX_NONE,
 OCELOT_TAG_PREFIX_LONG);
+
+   /* Set the default QoS Classification based on PCP and DEI
+* bits of vlan tag.
+*/
+   felix_port_qos_map_init(ocelot, port);
}
 
/* Include the CPU port module in the forwarding mask for unknown
-- 
2.17.1

[PATCH v2 net-next 2/3] net: dsa: felix: Configure Time-Aware Scheduler via taprio offload

2020-05-12 Thread Xiaoliang Yang

Ocelot VSC9959 switch supports time-based egress shaping in hardware
according to IEEE 802.1Qbv. This patch add support for TAS configuration
on egress port of VSC9959 switch.

Felix driver is an instance of Ocelot family, with a DSA front-end. The
patch uses tc taprio hardware offload to setup TAS set function on felix
driver.

Signed-off-by: Xiaoliang Yang 
Reviewed-by: Vladimir Oltean 
---
 drivers/net/dsa/ocelot/felix.c |  19 
 drivers/net/dsa/ocelot/felix.h |   5 +
 drivers/net/dsa/ocelot/felix_vsc9959.c | 140 +
 3 files changed, 164 insertions(+)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 58d6b0f454e5..d2b114c96952 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -237,6 +237,10 @@ static void felix_phylink_mac_config(struct dsa_switch 
*ds, int port,
 
if (felix->info->pcs_init)
felix->info->pcs_init(ocelot, port, link_an_mode, state);
+
+   if (felix->info->port_sched_speed_set)
+   felix->info->port_sched_speed_set(ocelot, port,
+ state->speed);
 }
 
 static void felix_phylink_mac_an_restart(struct dsa_switch *ds, int port)
@@ -730,6 +734,19 @@ static void felix_port_policer_del(struct dsa_switch *ds, 
int port)
ocelot_port_policer_del(ocelot, port);
 }
 
+static int felix_port_setup_tc(struct dsa_switch *ds, int port,
+  enum tc_setup_type type,
+  void *type_data)
+{
+   struct ocelot *ocelot = ds->priv;
+   struct felix *felix = ocelot_to_felix(ocelot);
+
+   if (felix->info->port_setup_tc)
+   return felix->info->port_setup_tc(ds, port, type, type_data);
+   else
+   return -EOPNOTSUPP;
+}
+
 static const struct dsa_switch_ops felix_switch_ops = {
.get_tag_protocol   = felix_get_tag_protocol,
.setup  = felix_setup,
@@ -768,6 +785,7 @@ static const struct dsa_switch_ops felix_switch_ops = {
.cls_flower_add = felix_cls_flower_add,
.cls_flower_del = felix_cls_flower_del,
.cls_flower_stats   = felix_cls_flower_stats,
+   .port_setup_tc  = felix_port_setup_tc,
 };
 
 static struct felix_info *felix_instance_tbl[] = {
@@ -856,6 +874,7 @@ static int felix_pci_probe(struct pci_dev *pdev,
 
ds->dev = >dev;
ds->num_ports = felix->info->num_ports;
+   ds->num_tx_queues = felix->info->num_tx_queues;
ds->ops = _switch_ops;
ds->priv = ocelot;
felix->ds = ds;
diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h
index b94386fa8d63..352f7b940af7 100644
--- a/drivers/net/dsa/ocelot/felix.h
+++ b/drivers/net/dsa/ocelot/felix.h
@@ -20,6 +20,7 @@ struct felix_info {
const struct ocelot_stat_layout *stats_layout;
unsigned intnum_stats;
int num_ports;
+   int num_tx_queues;
struct vcap_field   *vcap_is2_keys;
struct vcap_field   *vcap_is2_actions;
const struct vcap_props *vcap;
@@ -35,6 +36,10 @@ struct felix_info {
  struct phylink_link_state *state);
int (*prevalidate_phy_mode)(struct ocelot *ocelot, int port,
phy_interface_t phy_mode);
+   int (*port_setup_tc)(struct dsa_switch *ds, int port,
+enum tc_setup_type type, void *type_data);
+   void(*port_sched_speed_set)(struct ocelot *ocelot, int port,
+   u32 speed);
 };
 
 extern struct felix_info   felix_info_vsc9959;
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c 
b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 1c56568d5aca..efdcc547e0c9 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -3,9 +3,12 @@
  * Copyright 2018-2019 NXP Semiconductors
  */
 #include 
+#include 
 #include 
+#include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include "felix.h"
@@ -27,6 +30,8 @@
 #define USXGMII_LPA_DUPLEX(lpa)(((lpa) & GENMASK(12, 12)) >> 
12)
 #define USXGMII_LPA_SPEED(lpa) (((lpa) & GENMASK(11, 9)) >> 9)
 
+#define VSC9959_TAS_GCL_ENTRY_MAX  63
+
 enum usxgmii_speed {
USXGMII_SPEED_10= 0,
USXGMII_SPEED_100   = 1,
@@ -1209,6 +1214,138 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot)
mdiobus_unregister(felix->imdio);
 }
 
+static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
+   u32 speed)
+{
+   ocelot_rmw_rix(ocelot,
+  QSYS_TAG_CONFIG_LINK_SPEED(speed),
+  QSYS_TAG_CONFIG_LINK_SPEED_M,
+  QSYS_TAG_CONFIG, port);
+}
+

[PATCH v2 net-next 3/3] net: dsa: felix: add support Credit Based Shaper(CBS) for hardware offload

2020-05-12 Thread Xiaoliang Yang

VSC9959 hardware support the Credit Based Shaper(CBS) which part
of the IEEE-802.1Qav. This patch support sch_cbs set for VSC9959.

Signed-off-by: Xiaoliang Yang 
---
 drivers/net/dsa/ocelot/felix_vsc9959.c | 50 +-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c 
b/drivers/net/dsa/ocelot/felix_vsc9959.c
index efdcc547e0c9..df4498c0e864 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -207,7 +207,7 @@ static const u32 vsc9959_qsys_regmap[] = {
REG(QSYS_QMAXSDU_CFG_6, 0x00f62c),
REG(QSYS_QMAXSDU_CFG_7, 0x00f648),
REG(QSYS_PREEMPTION_CFG,0x00f664),
-   REG_RESERVED(QSYS_CIR_CFG),
+   REG(QSYS_CIR_CFG,   0x00),
REG(QSYS_EIR_CFG,   0x04),
REG(QSYS_SE_CFG,0x08),
REG(QSYS_SE_DWRR_CFG,   0x0c),
@@ -1332,6 +1332,52 @@ static int vsc9959_qos_port_tas_set(struct ocelot 
*ocelot, int port,
return ret;
 }
 
+static int vsc9959_qos_port_cbs_set(struct dsa_switch *ds, int port,
+   struct tc_cbs_qopt_offload *cbs_qopt)
+{
+   struct ocelot *ocelot = ds->priv;
+   int port_ix = port * 8 + cbs_qopt->queue;
+   u32 rate, burst;
+
+   if (cbs_qopt->queue >= ds->num_tx_queues)
+   return -EINVAL;
+
+   if (!cbs_qopt->enable) {
+   ocelot_write_gix(ocelot, QSYS_CIR_CFG_CIR_RATE(0) |
+QSYS_CIR_CFG_CIR_BURST(0),
+QSYS_CIR_CFG, port_ix);
+
+   ocelot_rmw_gix(ocelot, 0, QSYS_SE_CFG_SE_AVB_ENA,
+  QSYS_SE_CFG, port_ix);
+
+   return 0;
+   }
+
+   /* Rate unit is 100 kbps */
+   rate = DIV_ROUND_UP(cbs_qopt->idleslope, 100);
+   /* Avoid using zero rate */
+   rate = clamp_t(u32, rate, 1, GENMASK(14, 0));
+   /* Burst unit is 4kB */
+   burst = DIV_ROUND_UP(cbs_qopt->hicredit, 4096);
+   /* Avoid using zero burst size */
+   burst = clamp_t(u32, rate, 1, GENMASK(5, 0));
+   ocelot_write_gix(ocelot,
+QSYS_CIR_CFG_CIR_RATE(rate) |
+QSYS_CIR_CFG_CIR_BURST(burst),
+QSYS_CIR_CFG,
+port_ix);
+
+   ocelot_rmw_gix(ocelot,
+  QSYS_SE_CFG_SE_FRM_MODE(0) |
+  QSYS_SE_CFG_SE_AVB_ENA,
+  QSYS_SE_CFG_SE_AVB_ENA |
+  QSYS_SE_CFG_SE_FRM_MODE_M,
+  QSYS_SE_CFG,
+  port_ix);
+
+   return 0;
+}
+
 static int vsc9959_port_setup_tc(struct dsa_switch *ds, int port,
 enum tc_setup_type type,
 void *type_data)
@@ -1341,6 +1387,8 @@ static int vsc9959_port_setup_tc(struct dsa_switch *ds, 
int port,
switch (type) {
case TC_SETUP_QDISC_TAPRIO:
return vsc9959_qos_port_tas_set(ocelot, port, type_data);
+   case TC_SETUP_QDISC_CBS:
+   return vsc9959_qos_port_cbs_set(ds, port, type_data);
default:
return -EOPNOTSUPP;
}
-- 
2.17.1

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1442 matches

Mail list logo