Re: [PATCH v4 2/2] x86/amd: Fixup cpu_core_id for family17h downcore configuration

2017-07-24 Thread Borislav Petkov
On Tue, Jul 25, 2017 at 12:51:53PM +0700, Suravee Suthikulpanit wrote:
> Ok. Sure, it doesn't need be contiguous. But at least the cpu_core_id should
> represent an ID that make some sense since it is used in the
> arch/x86/kernel/smpboot.c: match_smt() and some other places. So, if it's
> invalid for the downcore configuration (i.e. duplicated where it should not
> be), we should at least clean this up.

Ah right, we do use it for the SMT siblings. So yes, it should be
correct for them. And I'm pretty sure the numbers we derive from the
initial APIC ID are already good enough for that.

> I will update the patch to only limit the fixup to pre-family17h.

Yeah, give that a try. Make sure to check the topology masks are
correct.

Thanks.

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


[PATCH 1/2] remoteproc: qcom: Add support for SSR notifications

2017-07-24 Thread Bjorn Andersson
This adds the remoteproc part of subsystem restart, which is responsible
for emitting notifications to other processors in the system about a
dying remoteproc instance.

These notifications are propagated to the various communication systems
in the various remote processors to shut down communication links that
was left in a dangling state as the remoteproc was stopped (or crashed).

Signed-off-by: Bjorn Andersson 
---
 drivers/remoteproc/qcom_adsp_pil.c|  6 +++
 drivers/remoteproc/qcom_common.c  | 71 +++
 drivers/remoteproc/qcom_common.h  | 10 +
 drivers/remoteproc/qcom_q6v5_pil.c|  3 ++
 include/linux/remoteproc/qcom_rproc.h | 22 +++
 5 files changed, 112 insertions(+)
 create mode 100644 include/linux/remoteproc/qcom_rproc.h

diff --git a/drivers/remoteproc/qcom_adsp_pil.c 
b/drivers/remoteproc/qcom_adsp_pil.c
index 49fe2f807e1d..a41d399766fd 100644
--- a/drivers/remoteproc/qcom_adsp_pil.c
+++ b/drivers/remoteproc/qcom_adsp_pil.c
@@ -38,6 +38,7 @@ struct adsp_data {
const char *firmware_name;
int pas_id;
bool has_aggre2_clk;
+   const char *ssr_name;
 };
 
 struct qcom_adsp {
@@ -72,6 +73,7 @@ struct qcom_adsp {
size_t mem_size;
 
struct qcom_rproc_subdev smd_subdev;
+   struct qcom_rproc_ssr ssr_subdev;
 };
 
 static int adsp_load(struct rproc *rproc, const struct firmware *fw)
@@ -402,6 +404,7 @@ static int adsp_probe(struct platform_device *pdev)
}
 
qcom_add_smd_subdev(rproc, >smd_subdev);
+   qcom_add_ssr_subdev(rproc, >ssr_subdev, desc->ssr_name);
 
ret = rproc_add(rproc);
if (ret)
@@ -423,6 +426,7 @@ static int adsp_remove(struct platform_device *pdev)
rproc_del(adsp->rproc);
 
qcom_remove_smd_subdev(adsp->rproc, >smd_subdev);
+   qcom_remove_ssr_subdev(adsp->rproc, >ssr_subdev);
rproc_free(adsp->rproc);
 
return 0;
@@ -433,6 +437,7 @@ static const struct adsp_data adsp_resource_init = {
.firmware_name = "adsp.mdt",
.pas_id = 1,
.has_aggre2_clk = false,
+   .ssr_name = "lpass",
 };
 
 static const struct adsp_data slpi_resource_init = {
@@ -440,6 +445,7 @@ static const struct adsp_data slpi_resource_init = {
.firmware_name = "slpi.mdt",
.pas_id = 12,
.has_aggre2_clk = true,
+   .ssr_name = "dsps",
 };
 
 static const struct of_device_id adsp_of_match[] = {
diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
index bb90481215c6..5556e767d65b 100644
--- a/drivers/remoteproc/qcom_common.c
+++ b/drivers/remoteproc/qcom_common.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -25,6 +26,9 @@
 #include "qcom_common.h"
 
 #define to_smd_subdev(d) container_of(d, struct qcom_rproc_subdev, subdev)
+#define to_ssr_subdev(d) container_of(d, struct qcom_rproc_ssr, subdev)
+
+BLOCKING_NOTIFIER_HEAD(ssr_notifiers);
 
 /**
  * qcom_mdt_find_rsc_table() - provide dummy resource table for remoteproc
@@ -92,5 +96,72 @@ void qcom_remove_smd_subdev(struct rproc *rproc, struct 
qcom_rproc_subdev *smd)
 }
 EXPORT_SYMBOL_GPL(qcom_remove_smd_subdev);
 
+/**
+ * qcom_register_ssr_notifier() - register SSR notification handler
+ * @nb:notifier_block to notify for restart notifications
+ *
+ * Returns 0 on success, negative errno on failure.
+ *
+ * This register the @notify function as handler for restart notifications. As
+ * remote processors are stopped this function will be called, with the SSR
+ * name passed as a parameter.
+ */
+int qcom_register_ssr_notifier(struct notifier_block *nb)
+{
+   return blocking_notifier_chain_register(_notifiers, nb);
+}
+EXPORT_SYMBOL_GPL(qcom_register_ssr_notifier);
+
+/**
+ * qcom_unregister_ssr_notifier() - unregister SSR notification handler
+ * @nb:notifier_block to unregister
+ */
+void qcom_unregister_ssr_notifier(struct notifier_block *nb)
+{
+   blocking_notifier_chain_unregister(_notifiers, nb);
+}
+EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
+
+static int ssr_notify_start(struct rproc_subdev *subdev)
+{
+   return  0;
+}
+
+static void ssr_notify_stop(struct rproc_subdev *subdev)
+{
+   struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
+
+   blocking_notifier_call_chain(_notifiers, 0, (void *)ssr->name);
+}
+
+/**
+ * qcom_add_ssr_subdev() - register subdevice as restart notification source
+ * @rproc: rproc handle
+ * @ssr:   SSR subdevice handle
+ * @ssr_name:  identifier to use for notifications originating from @rproc
+ *
+ * As the @ssr is registered with the @rproc SSR events will be sent to all
+ * registered listeners in the system as the remoteproc is shut down.
+ */
+void qcom_add_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr,
+const char *ssr_name)
+{
+   ssr->name = 

Re: [PATCH v4 2/2] x86/amd: Fixup cpu_core_id for family17h downcore configuration

2017-07-24 Thread Borislav Petkov
On Tue, Jul 25, 2017 at 12:51:53PM +0700, Suravee Suthikulpanit wrote:
> Ok. Sure, it doesn't need be contiguous. But at least the cpu_core_id should
> represent an ID that make some sense since it is used in the
> arch/x86/kernel/smpboot.c: match_smt() and some other places. So, if it's
> invalid for the downcore configuration (i.e. duplicated where it should not
> be), we should at least clean this up.

Ah right, we do use it for the SMT siblings. So yes, it should be
correct for them. And I'm pretty sure the numbers we derive from the
initial APIC ID are already good enough for that.

> I will update the patch to only limit the fixup to pre-family17h.

Yeah, give that a try. Make sure to check the topology masks are
correct.

Thanks.

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


[PATCH 1/2] remoteproc: qcom: Add support for SSR notifications

2017-07-24 Thread Bjorn Andersson
This adds the remoteproc part of subsystem restart, which is responsible
for emitting notifications to other processors in the system about a
dying remoteproc instance.

These notifications are propagated to the various communication systems
in the various remote processors to shut down communication links that
was left in a dangling state as the remoteproc was stopped (or crashed).

Signed-off-by: Bjorn Andersson 
---
 drivers/remoteproc/qcom_adsp_pil.c|  6 +++
 drivers/remoteproc/qcom_common.c  | 71 +++
 drivers/remoteproc/qcom_common.h  | 10 +
 drivers/remoteproc/qcom_q6v5_pil.c|  3 ++
 include/linux/remoteproc/qcom_rproc.h | 22 +++
 5 files changed, 112 insertions(+)
 create mode 100644 include/linux/remoteproc/qcom_rproc.h

diff --git a/drivers/remoteproc/qcom_adsp_pil.c 
b/drivers/remoteproc/qcom_adsp_pil.c
index 49fe2f807e1d..a41d399766fd 100644
--- a/drivers/remoteproc/qcom_adsp_pil.c
+++ b/drivers/remoteproc/qcom_adsp_pil.c
@@ -38,6 +38,7 @@ struct adsp_data {
const char *firmware_name;
int pas_id;
bool has_aggre2_clk;
+   const char *ssr_name;
 };
 
 struct qcom_adsp {
@@ -72,6 +73,7 @@ struct qcom_adsp {
size_t mem_size;
 
struct qcom_rproc_subdev smd_subdev;
+   struct qcom_rproc_ssr ssr_subdev;
 };
 
 static int adsp_load(struct rproc *rproc, const struct firmware *fw)
@@ -402,6 +404,7 @@ static int adsp_probe(struct platform_device *pdev)
}
 
qcom_add_smd_subdev(rproc, >smd_subdev);
+   qcom_add_ssr_subdev(rproc, >ssr_subdev, desc->ssr_name);
 
ret = rproc_add(rproc);
if (ret)
@@ -423,6 +426,7 @@ static int adsp_remove(struct platform_device *pdev)
rproc_del(adsp->rproc);
 
qcom_remove_smd_subdev(adsp->rproc, >smd_subdev);
+   qcom_remove_ssr_subdev(adsp->rproc, >ssr_subdev);
rproc_free(adsp->rproc);
 
return 0;
@@ -433,6 +437,7 @@ static const struct adsp_data adsp_resource_init = {
.firmware_name = "adsp.mdt",
.pas_id = 1,
.has_aggre2_clk = false,
+   .ssr_name = "lpass",
 };
 
 static const struct adsp_data slpi_resource_init = {
@@ -440,6 +445,7 @@ static const struct adsp_data slpi_resource_init = {
.firmware_name = "slpi.mdt",
.pas_id = 12,
.has_aggre2_clk = true,
+   .ssr_name = "dsps",
 };
 
 static const struct of_device_id adsp_of_match[] = {
diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
index bb90481215c6..5556e767d65b 100644
--- a/drivers/remoteproc/qcom_common.c
+++ b/drivers/remoteproc/qcom_common.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -25,6 +26,9 @@
 #include "qcom_common.h"
 
 #define to_smd_subdev(d) container_of(d, struct qcom_rproc_subdev, subdev)
+#define to_ssr_subdev(d) container_of(d, struct qcom_rproc_ssr, subdev)
+
+BLOCKING_NOTIFIER_HEAD(ssr_notifiers);
 
 /**
  * qcom_mdt_find_rsc_table() - provide dummy resource table for remoteproc
@@ -92,5 +96,72 @@ void qcom_remove_smd_subdev(struct rproc *rproc, struct 
qcom_rproc_subdev *smd)
 }
 EXPORT_SYMBOL_GPL(qcom_remove_smd_subdev);
 
+/**
+ * qcom_register_ssr_notifier() - register SSR notification handler
+ * @nb:notifier_block to notify for restart notifications
+ *
+ * Returns 0 on success, negative errno on failure.
+ *
+ * This register the @notify function as handler for restart notifications. As
+ * remote processors are stopped this function will be called, with the SSR
+ * name passed as a parameter.
+ */
+int qcom_register_ssr_notifier(struct notifier_block *nb)
+{
+   return blocking_notifier_chain_register(_notifiers, nb);
+}
+EXPORT_SYMBOL_GPL(qcom_register_ssr_notifier);
+
+/**
+ * qcom_unregister_ssr_notifier() - unregister SSR notification handler
+ * @nb:notifier_block to unregister
+ */
+void qcom_unregister_ssr_notifier(struct notifier_block *nb)
+{
+   blocking_notifier_chain_unregister(_notifiers, nb);
+}
+EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
+
+static int ssr_notify_start(struct rproc_subdev *subdev)
+{
+   return  0;
+}
+
+static void ssr_notify_stop(struct rproc_subdev *subdev)
+{
+   struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
+
+   blocking_notifier_call_chain(_notifiers, 0, (void *)ssr->name);
+}
+
+/**
+ * qcom_add_ssr_subdev() - register subdevice as restart notification source
+ * @rproc: rproc handle
+ * @ssr:   SSR subdevice handle
+ * @ssr_name:  identifier to use for notifications originating from @rproc
+ *
+ * As the @ssr is registered with the @rproc SSR events will be sent to all
+ * registered listeners in the system as the remoteproc is shut down.
+ */
+void qcom_add_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr,
+const char *ssr_name)
+{
+   ssr->name = ssr_name;
+
+   

[PATCH 2/2] soc: qcom: GLINK SSR notifier

2017-07-24 Thread Bjorn Andersson
This driver register as a subsystem restart notifier and will send out
notifications to remote processors that has opened the "glink_ssr" GLINK
channel.

This mechanism is used to signal any GLINK participants that a 3rd party
is gone and that the communication state has to be reset; i.e. that read
and write pointers of the GLINK FIFOs are stale.

Signed-off-by: Bjorn Andersson 
---
 drivers/soc/qcom/Kconfig |   9 +++
 drivers/soc/qcom/Makefile|   1 +
 drivers/soc/qcom/glink_ssr.c | 164 +++
 3 files changed, 174 insertions(+)
 create mode 100644 drivers/soc/qcom/glink_ssr.c

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 9fca977ef18d..d0fc331972d2 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -1,6 +1,15 @@
 #
 # QCOM Soc drivers
 #
+config QCOM_GLINK_SSR
+   tristate "Qualcomm Glink SSR driver"
+   depends on RPMSG
+   depends on QCOM_RPROC_COMMON
+   help
+ Say y here to enable GLINK SSR support. The GLINK SSR driver
+ implements the SSR protocol for notifying the remote processor about
+ neighboring subsystems going up or down.
+
 config QCOM_GSBI
 tristate "QCOM General Serial Bus Interface"
 depends on ARCH_QCOM
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 414f0de274fa..f151de41eb93 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_QCOM_GLINK_SSR) +=glink_ssr.o
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
 obj-$(CONFIG_QCOM_MDT_LOADER)  += mdt_loader.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
diff --git a/drivers/soc/qcom/glink_ssr.c b/drivers/soc/qcom/glink_ssr.c
new file mode 100644
index ..19c7399eddb5
--- /dev/null
+++ b/drivers/soc/qcom/glink_ssr.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2014-2017, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2017, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * struct do_cleanup_msg - The data structure for an SSR do_cleanup message
+ * version: The G-Link SSR protocol version
+ * command: The G-Link SSR command - do_cleanup
+ * seq_num: Sequence number
+ * name_len:Length of the name of the subsystem being restarted
+ * name:G-Link edge name of the subsystem being restarted
+ */
+struct do_cleanup_msg {
+   __le32 version;
+   __le32 command;
+   __le32 seq_num;
+   __le32 name_len;
+   char name[32];
+};
+
+/**
+ * struct cleanup_done_msg - The data structure for an SSR cleanup_done message
+ * version: The G-Link SSR protocol version
+ * response:The G-Link SSR response to a do_cleanup command, cleanup_done
+ * seq_num: Sequence number
+ */
+struct cleanup_done_msg {
+   __le32 version;
+   __le32 response;
+   __le32 seq_num;
+};
+
+/**
+ * G-Link SSR protocol commands
+ */
+#define GLINK_SSR_DO_CLEANUP   0
+#define GLINK_SSR_CLEANUP_DONE 1
+
+struct glink_ssr {
+   struct device *dev;
+   struct rpmsg_endpoint *ept;
+
+   struct notifier_block nb;
+
+   u32 seq_num;
+   struct completion completion;
+};
+
+static int qcom_glink_ssr_callback(struct rpmsg_device *rpdev,
+  void *data, int len, void *priv, u32 addr)
+{
+   struct cleanup_done_msg *msg = data;
+   struct glink_ssr *ssr = dev_get_drvdata(>dev);
+
+   if (len < sizeof(*msg)) {
+   dev_err(ssr->dev, "message too short\n");
+   return -EINVAL;
+   }
+
+   if (le32_to_cpu(msg->version) != 0)
+   return -EINVAL;
+
+   if (le32_to_cpu(msg->response) != GLINK_SSR_CLEANUP_DONE)
+   return 0;
+
+   if (le32_to_cpu(msg->seq_num) != ssr->seq_num) {
+   dev_err(ssr->dev, "invalid sequence number of response\n");
+   return -EINVAL;
+   }
+
+   complete(>completion);
+
+   return 0;
+}
+
+static int qcom_glink_ssr_notify(struct notifier_block *nb, unsigned long 
event,
+void *data)
+{
+   struct glink_ssr *ssr = container_of(nb, struct glink_ssr, nb);
+   struct do_cleanup_msg msg;
+   char *ssr_name = data;
+   int ret;
+
+   ssr->seq_num++;
+   reinit_completion(>completion);
+
+   memset(, 0, sizeof(msg));
+   msg.command = cpu_to_le32(GLINK_SSR_DO_CLEANUP);
+   msg.seq_num = cpu_to_le32(ssr->seq_num);
+   

[PATCH 2/2] soc: qcom: GLINK SSR notifier

2017-07-24 Thread Bjorn Andersson
This driver register as a subsystem restart notifier and will send out
notifications to remote processors that has opened the "glink_ssr" GLINK
channel.

This mechanism is used to signal any GLINK participants that a 3rd party
is gone and that the communication state has to be reset; i.e. that read
and write pointers of the GLINK FIFOs are stale.

Signed-off-by: Bjorn Andersson 
---
 drivers/soc/qcom/Kconfig |   9 +++
 drivers/soc/qcom/Makefile|   1 +
 drivers/soc/qcom/glink_ssr.c | 164 +++
 3 files changed, 174 insertions(+)
 create mode 100644 drivers/soc/qcom/glink_ssr.c

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 9fca977ef18d..d0fc331972d2 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -1,6 +1,15 @@
 #
 # QCOM Soc drivers
 #
+config QCOM_GLINK_SSR
+   tristate "Qualcomm Glink SSR driver"
+   depends on RPMSG
+   depends on QCOM_RPROC_COMMON
+   help
+ Say y here to enable GLINK SSR support. The GLINK SSR driver
+ implements the SSR protocol for notifying the remote processor about
+ neighboring subsystems going up or down.
+
 config QCOM_GSBI
 tristate "QCOM General Serial Bus Interface"
 depends on ARCH_QCOM
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 414f0de274fa..f151de41eb93 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_QCOM_GLINK_SSR) +=glink_ssr.o
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
 obj-$(CONFIG_QCOM_MDT_LOADER)  += mdt_loader.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
diff --git a/drivers/soc/qcom/glink_ssr.c b/drivers/soc/qcom/glink_ssr.c
new file mode 100644
index ..19c7399eddb5
--- /dev/null
+++ b/drivers/soc/qcom/glink_ssr.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2014-2017, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2017, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * struct do_cleanup_msg - The data structure for an SSR do_cleanup message
+ * version: The G-Link SSR protocol version
+ * command: The G-Link SSR command - do_cleanup
+ * seq_num: Sequence number
+ * name_len:Length of the name of the subsystem being restarted
+ * name:G-Link edge name of the subsystem being restarted
+ */
+struct do_cleanup_msg {
+   __le32 version;
+   __le32 command;
+   __le32 seq_num;
+   __le32 name_len;
+   char name[32];
+};
+
+/**
+ * struct cleanup_done_msg - The data structure for an SSR cleanup_done message
+ * version: The G-Link SSR protocol version
+ * response:The G-Link SSR response to a do_cleanup command, cleanup_done
+ * seq_num: Sequence number
+ */
+struct cleanup_done_msg {
+   __le32 version;
+   __le32 response;
+   __le32 seq_num;
+};
+
+/**
+ * G-Link SSR protocol commands
+ */
+#define GLINK_SSR_DO_CLEANUP   0
+#define GLINK_SSR_CLEANUP_DONE 1
+
+struct glink_ssr {
+   struct device *dev;
+   struct rpmsg_endpoint *ept;
+
+   struct notifier_block nb;
+
+   u32 seq_num;
+   struct completion completion;
+};
+
+static int qcom_glink_ssr_callback(struct rpmsg_device *rpdev,
+  void *data, int len, void *priv, u32 addr)
+{
+   struct cleanup_done_msg *msg = data;
+   struct glink_ssr *ssr = dev_get_drvdata(>dev);
+
+   if (len < sizeof(*msg)) {
+   dev_err(ssr->dev, "message too short\n");
+   return -EINVAL;
+   }
+
+   if (le32_to_cpu(msg->version) != 0)
+   return -EINVAL;
+
+   if (le32_to_cpu(msg->response) != GLINK_SSR_CLEANUP_DONE)
+   return 0;
+
+   if (le32_to_cpu(msg->seq_num) != ssr->seq_num) {
+   dev_err(ssr->dev, "invalid sequence number of response\n");
+   return -EINVAL;
+   }
+
+   complete(>completion);
+
+   return 0;
+}
+
+static int qcom_glink_ssr_notify(struct notifier_block *nb, unsigned long 
event,
+void *data)
+{
+   struct glink_ssr *ssr = container_of(nb, struct glink_ssr, nb);
+   struct do_cleanup_msg msg;
+   char *ssr_name = data;
+   int ret;
+
+   ssr->seq_num++;
+   reinit_completion(>completion);
+
+   memset(, 0, sizeof(msg));
+   msg.command = cpu_to_le32(GLINK_SSR_DO_CLEANUP);
+   msg.seq_num = cpu_to_le32(ssr->seq_num);
+   msg.name_len = 

[PATCH 0/2] Qualcomm GLINK SSR support

2017-07-24 Thread Bjorn Andersson
This adds the common Qualcomm helpers to tie in subsystem restart notifications
with the ADSP and Modem PILs and then adds a driver for the "glink_ssr" channel
to propagate these notifications.

This is needed on GLINK RPM enabled platforms to have the RPM reset the state
of the communication channels to the peripherals, allowing them to be restarted
without the RPM crashing due stale FIFO data.

Bjorn Andersson (2):
  remoteproc: qcom: Add support for SSR notifications
  soc: qcom: GLINK SSR notifier

 drivers/remoteproc/qcom_adsp_pil.c|   6 ++
 drivers/remoteproc/qcom_common.c  |  71 +++
 drivers/remoteproc/qcom_common.h  |  10 +++
 drivers/remoteproc/qcom_q6v5_pil.c|   3 +
 drivers/soc/qcom/Kconfig  |   9 ++
 drivers/soc/qcom/Makefile |   1 +
 drivers/soc/qcom/glink_ssr.c  | 164 ++
 include/linux/remoteproc/qcom_rproc.h |  22 +
 8 files changed, 286 insertions(+)
 create mode 100644 drivers/soc/qcom/glink_ssr.c
 create mode 100644 include/linux/remoteproc/qcom_rproc.h

-- 
2.12.0



[PATCH 0/2] Qualcomm GLINK SSR support

2017-07-24 Thread Bjorn Andersson
This adds the common Qualcomm helpers to tie in subsystem restart notifications
with the ADSP and Modem PILs and then adds a driver for the "glink_ssr" channel
to propagate these notifications.

This is needed on GLINK RPM enabled platforms to have the RPM reset the state
of the communication channels to the peripherals, allowing them to be restarted
without the RPM crashing due stale FIFO data.

Bjorn Andersson (2):
  remoteproc: qcom: Add support for SSR notifications
  soc: qcom: GLINK SSR notifier

 drivers/remoteproc/qcom_adsp_pil.c|   6 ++
 drivers/remoteproc/qcom_common.c  |  71 +++
 drivers/remoteproc/qcom_common.h  |  10 +++
 drivers/remoteproc/qcom_q6v5_pil.c|   3 +
 drivers/soc/qcom/Kconfig  |   9 ++
 drivers/soc/qcom/Makefile |   1 +
 drivers/soc/qcom/glink_ssr.c  | 164 ++
 include/linux/remoteproc/qcom_rproc.h |  22 +
 8 files changed, 286 insertions(+)
 create mode 100644 drivers/soc/qcom/glink_ssr.c
 create mode 100644 include/linux/remoteproc/qcom_rproc.h

-- 
2.12.0



Re: [PATCH v2 1/2] ASoC: sun4i-i2s: Add more quirks for newer SoCs

2017-07-24 Thread Maxime Ripard
Hi Markus,

On Sat, Jul 22, 2017 at 08:53:51AM +0200, codekip...@gmail.com wrote:
> From: Marcus Cooper 
> 
> In preparation for changing this driver to support newer SoC
> implementations then where needed there has been a switch from
> regmap_update_bits to regmap_field. Also included are adjustment
> variables although they are not set as no adjustment is required
> for the current support.
> 
> Signed-off-by: Marcus Cooper 
> ---
>  sound/soc/sunxi/sun4i-i2s.c | 267 
> +---
>  1 file changed, 203 insertions(+), 64 deletions(-)
> 
> diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c
> index 62b307b0c846..1854405cbcb1 100644
> --- a/sound/soc/sunxi/sun4i-i2s.c
> +++ b/sound/soc/sunxi/sun4i-i2s.c
> @@ -50,6 +50,8 @@
>  #define SUN4I_I2S_FMT0_FMT_RIGHT_J   (2 << 0)
>  #define SUN4I_I2S_FMT0_FMT_LEFT_J(1 << 0)
>  #define SUN4I_I2S_FMT0_FMT_I2S   (0 << 0)
> +#define SUN4I_I2S_FMT0_POLARITY_INVERTED (1)
> +#define SUN4I_I2S_FMT0_POLARITY_NORMAL   (0)
>  
>  #define SUN4I_I2S_FMT1_REG   0x08
>  #define SUN4I_I2S_FIFO_TX_REG0x0c
> @@ -72,7 +74,7 @@
>  #define SUN4I_I2S_INT_STA_REG0x20
>  
>  #define SUN4I_I2S_CLK_DIV_REG0x24
> -#define SUN4I_I2S_CLK_DIV_MCLK_ENBIT(7)
> +#define SUN4I_I2S_CLK_DIV_MCLK_EN7
>  #define SUN4I_I2S_CLK_DIV_BCLK_MASK  GENMASK(6, 4)
>  #define SUN4I_I2S_CLK_DIV_BCLK(bclk) ((bclk) << 4)
>  #define SUN4I_I2S_CLK_DIV_MCLK_MASK  GENMASK(3, 0)
> @@ -82,15 +84,39 @@
>  #define SUN4I_I2S_TX_CNT_REG 0x2c
>  
>  #define SUN4I_I2S_TX_CHAN_SEL_REG0x30
> -#define SUN4I_I2S_TX_CHAN_SEL(num_chan)  (((num_chan) - 1) << 0)
> +#define SUN4I_I2S_CHAN_SEL(num_chan) (((num_chan) - 1) << 0)
>  
>  #define SUN4I_I2S_TX_CHAN_MAP_REG0x34
>  #define SUN4I_I2S_TX_CHAN_MAP(chan, sample)  ((sample) << (chan << 2))
> +#define SUN4I_I2S_TX_CHAN_EN(num_chan)   (((1 << num_chan) - 1))
>  
>  #define SUN4I_I2S_RX_CHAN_SEL_REG0x38
>  #define SUN4I_I2S_RX_CHAN_MAP_REG0x3c
>  
> +struct sun4i_i2s_quirks {
> + boolhas_reset;
> + boolhas_master_slave_sel;

I think both variants have a master and slave mode, so it's a bit
misleading.

You should also have a kerneldoc for that structure, to make it clear
what each quirk is supposed to be doing.

> + unsigned intreg_offset_txdata;  /* TX FIFO */
> + unsigned intreg_offset_txchanmap;
> + unsigned intreg_offset_rxchanmap;

Is there any reason for txchanmap and rxchanmap to not be
regmap_fields too?

> + const struct regmap_config  *sun4i_i2s_regmap;
> + unsigned intmclk_adjust;
> + unsigned intbclk_adjust;
> + unsigned intfmt_adjust;

I would replace adjust by offset

> + /* Register fields for i2s */
> + struct reg_fieldfield_clkdiv_mclk_en;
> + struct reg_fieldfield_fmt_set_wss;
> + struct reg_fieldfield_fmt_set_sr;
> + struct reg_fieldfield_fmt_set_bclk_polarity;
> + struct reg_fieldfield_fmt_set_lrclk_polarity;
> + struct reg_fieldfield_fmt_set_mode;
> + struct reg_fieldfield_txchansel;
> + struct reg_fieldfield_rxchansel;
> +};
> +
>  struct sun4i_i2s {
> + struct device   *dev;

You never use it outside of the probe function (and its callee), you
can just pass it directly as an argument

>   struct clk  *bus_clk;
>   struct clk  *mod_clk;
>   struct regmap   *regmap;
> @@ -100,6 +126,18 @@ struct sun4i_i2s {
>  
>   struct snd_dmaengine_dai_dma_data   capture_dma_data;
>   struct snd_dmaengine_dai_dma_data   playback_dma_data;
> +
> + /* Register fields for i2s */
> + struct regmap_field *field_clkdiv_mclk_en;
> + struct regmap_field *field_fmt_set_wss;
> + struct regmap_field *field_fmt_set_sr;
> + struct regmap_field *field_fmt_set_bclk_polarity;
> + struct regmap_field *field_fmt_set_lrclk_polarity;
> + struct regmap_field *field_fmt_set_mode;
> + struct regmap_field *field_txchansel;
> + struct regmap_field *field_rxchansel;
> +
> + const struct sun4i_i2s_quirks   *variant;
>  };
>  
>  struct sun4i_i2s_clk_div {
> @@ -138,7 +176,7 @@ static int sun4i_i2s_get_bclk_div(struct sun4i_i2s *i2s,
>   const struct sun4i_i2s_clk_div *bdiv = _i2s_bclk_div[i];
>  
>   if (bdiv->div == div)
> - return bdiv->val;
> + return bdiv->val + i2s->variant->bclk_adjust;
>   

Re: [PATCH v2 1/2] ASoC: sun4i-i2s: Add more quirks for newer SoCs

2017-07-24 Thread Maxime Ripard
Hi Markus,

On Sat, Jul 22, 2017 at 08:53:51AM +0200, codekip...@gmail.com wrote:
> From: Marcus Cooper 
> 
> In preparation for changing this driver to support newer SoC
> implementations then where needed there has been a switch from
> regmap_update_bits to regmap_field. Also included are adjustment
> variables although they are not set as no adjustment is required
> for the current support.
> 
> Signed-off-by: Marcus Cooper 
> ---
>  sound/soc/sunxi/sun4i-i2s.c | 267 
> +---
>  1 file changed, 203 insertions(+), 64 deletions(-)
> 
> diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c
> index 62b307b0c846..1854405cbcb1 100644
> --- a/sound/soc/sunxi/sun4i-i2s.c
> +++ b/sound/soc/sunxi/sun4i-i2s.c
> @@ -50,6 +50,8 @@
>  #define SUN4I_I2S_FMT0_FMT_RIGHT_J   (2 << 0)
>  #define SUN4I_I2S_FMT0_FMT_LEFT_J(1 << 0)
>  #define SUN4I_I2S_FMT0_FMT_I2S   (0 << 0)
> +#define SUN4I_I2S_FMT0_POLARITY_INVERTED (1)
> +#define SUN4I_I2S_FMT0_POLARITY_NORMAL   (0)
>  
>  #define SUN4I_I2S_FMT1_REG   0x08
>  #define SUN4I_I2S_FIFO_TX_REG0x0c
> @@ -72,7 +74,7 @@
>  #define SUN4I_I2S_INT_STA_REG0x20
>  
>  #define SUN4I_I2S_CLK_DIV_REG0x24
> -#define SUN4I_I2S_CLK_DIV_MCLK_ENBIT(7)
> +#define SUN4I_I2S_CLK_DIV_MCLK_EN7
>  #define SUN4I_I2S_CLK_DIV_BCLK_MASK  GENMASK(6, 4)
>  #define SUN4I_I2S_CLK_DIV_BCLK(bclk) ((bclk) << 4)
>  #define SUN4I_I2S_CLK_DIV_MCLK_MASK  GENMASK(3, 0)
> @@ -82,15 +84,39 @@
>  #define SUN4I_I2S_TX_CNT_REG 0x2c
>  
>  #define SUN4I_I2S_TX_CHAN_SEL_REG0x30
> -#define SUN4I_I2S_TX_CHAN_SEL(num_chan)  (((num_chan) - 1) << 0)
> +#define SUN4I_I2S_CHAN_SEL(num_chan) (((num_chan) - 1) << 0)
>  
>  #define SUN4I_I2S_TX_CHAN_MAP_REG0x34
>  #define SUN4I_I2S_TX_CHAN_MAP(chan, sample)  ((sample) << (chan << 2))
> +#define SUN4I_I2S_TX_CHAN_EN(num_chan)   (((1 << num_chan) - 1))
>  
>  #define SUN4I_I2S_RX_CHAN_SEL_REG0x38
>  #define SUN4I_I2S_RX_CHAN_MAP_REG0x3c
>  
> +struct sun4i_i2s_quirks {
> + boolhas_reset;
> + boolhas_master_slave_sel;

I think both variants have a master and slave mode, so it's a bit
misleading.

You should also have a kerneldoc for that structure, to make it clear
what each quirk is supposed to be doing.

> + unsigned intreg_offset_txdata;  /* TX FIFO */
> + unsigned intreg_offset_txchanmap;
> + unsigned intreg_offset_rxchanmap;

Is there any reason for txchanmap and rxchanmap to not be
regmap_fields too?

> + const struct regmap_config  *sun4i_i2s_regmap;
> + unsigned intmclk_adjust;
> + unsigned intbclk_adjust;
> + unsigned intfmt_adjust;

I would replace adjust by offset

> + /* Register fields for i2s */
> + struct reg_fieldfield_clkdiv_mclk_en;
> + struct reg_fieldfield_fmt_set_wss;
> + struct reg_fieldfield_fmt_set_sr;
> + struct reg_fieldfield_fmt_set_bclk_polarity;
> + struct reg_fieldfield_fmt_set_lrclk_polarity;
> + struct reg_fieldfield_fmt_set_mode;
> + struct reg_fieldfield_txchansel;
> + struct reg_fieldfield_rxchansel;
> +};
> +
>  struct sun4i_i2s {
> + struct device   *dev;

You never use it outside of the probe function (and its callee), you
can just pass it directly as an argument

>   struct clk  *bus_clk;
>   struct clk  *mod_clk;
>   struct regmap   *regmap;
> @@ -100,6 +126,18 @@ struct sun4i_i2s {
>  
>   struct snd_dmaengine_dai_dma_data   capture_dma_data;
>   struct snd_dmaengine_dai_dma_data   playback_dma_data;
> +
> + /* Register fields for i2s */
> + struct regmap_field *field_clkdiv_mclk_en;
> + struct regmap_field *field_fmt_set_wss;
> + struct regmap_field *field_fmt_set_sr;
> + struct regmap_field *field_fmt_set_bclk_polarity;
> + struct regmap_field *field_fmt_set_lrclk_polarity;
> + struct regmap_field *field_fmt_set_mode;
> + struct regmap_field *field_txchansel;
> + struct regmap_field *field_rxchansel;
> +
> + const struct sun4i_i2s_quirks   *variant;
>  };
>  
>  struct sun4i_i2s_clk_div {
> @@ -138,7 +176,7 @@ static int sun4i_i2s_get_bclk_div(struct sun4i_i2s *i2s,
>   const struct sun4i_i2s_clk_div *bdiv = _i2s_bclk_div[i];
>  
>   if (bdiv->div == div)
> - return bdiv->val;
> + return bdiv->val + i2s->variant->bclk_adjust;
>   }
>  
>   return -EINVAL;
> @@ -156,7 

Re: [PATCH v4 2/2] x86/amd: Fixup cpu_core_id for family17h downcore configuration

2017-07-24 Thread Suravee Suthikulpanit

Boris,

On 7/24/17 21:44, Borislav Petkov wrote:

On Mon, Jul 24, 2017 at 09:14:18PM +0700, Suravee Suthikulpanit wrote:

Actually, this is not totally accurate. My apology. This patch is
mainly fix to incorrect core ID in /proc/cpuinfo.


So you're "fixing" only some numbering thing. Because core_id doesn't
have any influence on anything. Here's on an Intel box I have here:

processor :  0   physical id : 0 core id : 0
processor :  1   physical id : 1 core id : 0
processor :  2   physical id : 2 core id : 0
processor :  3   physical id : 3 core id : 0
processor :  4   physical id : 0 core id : 8
processor :  5   physical id : 1 core id : 8
processor :  6   physical id : 2 core id : 8
processor :  7   physical id : 3 core id : 8
processor :  8   physical id : 0 core id : 2
processor :  9   physical id : 1 core id : 2
processor : 10   physical id : 2 core id : 2
processor : 11   physical id : 3 core id : 2
processor : 12   physical id : 0 core id : 10
processor : 13   physical id : 1 core id : 10
processor : 14   physical id : 2 core id : 10
processor : 15   physical id : 3 core id : 10

[]

So those core id numbers can be anything as long as the cpumasks used by
the scheduler are correct.


Ok. Sure, it doesn't need be contiguous. But at least the cpu_core_id should 
represent an ID that make some sense since it is used in the 
arch/x86/kernel/smpboot.c: match_smt() and some other places. So, if it's 
invalid for the downcore configuration (i.e. duplicated where it should not be), 
we should at least clean this up.



This is due to the cpu_core_id fixup in amd_get_topology() below:

/* fixup multi-node processor information */
if (nodes_per_socket > 1) {
u32 cus_per_node;

set_cpu_cap(c, X86_FEATURE_AMD_DCM);
cus_per_node = c->x86_max_cores / nodes_per_socket;

/* core id has to be in the [0 .. cores_per_node - 1] range */
c->cpu_core_id %= cus_per_node;
}


AFAICT, Andreas did this for MC at the time:

4a376ec3a259 ("x86: Fix CPU llc_shared_map information for AMD Magny-Cours")

but I don't think we need to care about core_ids fitting into the node
range anymore. For the above reason - topology doesn't use core ids.


Agree to the point that it does not need to be fitting into the node range.


So you can just as well let ->cpu_core_id be derived from the
->initial_apicid as it is being done now in amd_detect_cmp().


Actually, for family17h, this is from the CPUID_Fn801E_EBX[CoreId]. But I 
get your point.



In order not to cause any more confusion, you can limit the above fixup
to anything below F17h so that we don't upset existing users and add a
big fat comment as to why we're doing this. But if it is only a silly
numbering thing, I don't see the need for doing that jumping through
hoops.



I will update the patch to only limit the fixup to pre-family17h.

Thanks,
Suravee


Re: [PATCH v4 2/2] x86/amd: Fixup cpu_core_id for family17h downcore configuration

2017-07-24 Thread Suravee Suthikulpanit

Boris,

On 7/24/17 21:44, Borislav Petkov wrote:

On Mon, Jul 24, 2017 at 09:14:18PM +0700, Suravee Suthikulpanit wrote:

Actually, this is not totally accurate. My apology. This patch is
mainly fix to incorrect core ID in /proc/cpuinfo.


So you're "fixing" only some numbering thing. Because core_id doesn't
have any influence on anything. Here's on an Intel box I have here:

processor :  0   physical id : 0 core id : 0
processor :  1   physical id : 1 core id : 0
processor :  2   physical id : 2 core id : 0
processor :  3   physical id : 3 core id : 0
processor :  4   physical id : 0 core id : 8
processor :  5   physical id : 1 core id : 8
processor :  6   physical id : 2 core id : 8
processor :  7   physical id : 3 core id : 8
processor :  8   physical id : 0 core id : 2
processor :  9   physical id : 1 core id : 2
processor : 10   physical id : 2 core id : 2
processor : 11   physical id : 3 core id : 2
processor : 12   physical id : 0 core id : 10
processor : 13   physical id : 1 core id : 10
processor : 14   physical id : 2 core id : 10
processor : 15   physical id : 3 core id : 10

[]

So those core id numbers can be anything as long as the cpumasks used by
the scheduler are correct.


Ok. Sure, it doesn't need be contiguous. But at least the cpu_core_id should 
represent an ID that make some sense since it is used in the 
arch/x86/kernel/smpboot.c: match_smt() and some other places. So, if it's 
invalid for the downcore configuration (i.e. duplicated where it should not be), 
we should at least clean this up.



This is due to the cpu_core_id fixup in amd_get_topology() below:

/* fixup multi-node processor information */
if (nodes_per_socket > 1) {
u32 cus_per_node;

set_cpu_cap(c, X86_FEATURE_AMD_DCM);
cus_per_node = c->x86_max_cores / nodes_per_socket;

/* core id has to be in the [0 .. cores_per_node - 1] range */
c->cpu_core_id %= cus_per_node;
}


AFAICT, Andreas did this for MC at the time:

4a376ec3a259 ("x86: Fix CPU llc_shared_map information for AMD Magny-Cours")

but I don't think we need to care about core_ids fitting into the node
range anymore. For the above reason - topology doesn't use core ids.


Agree to the point that it does not need to be fitting into the node range.


So you can just as well let ->cpu_core_id be derived from the
->initial_apicid as it is being done now in amd_detect_cmp().


Actually, for family17h, this is from the CPUID_Fn801E_EBX[CoreId]. But I 
get your point.



In order not to cause any more confusion, you can limit the above fixup
to anything below F17h so that we don't upset existing users and add a
big fat comment as to why we're doing this. But if it is only a silly
numbering thing, I don't see the need for doing that jumping through
hoops.



I will update the patch to only limit the fixup to pre-family17h.

Thanks,
Suravee


Re: [PATCH v5 2/2] x86/mm: Improve TLB flush documentation

2017-07-24 Thread Andy Lutomirski
On Mon, Jul 24, 2017 at 9:47 PM, Nadav Amit  wrote:
> Andy Lutomirski  wrote:
>
>> Improve comments as requested by PeterZ and also add some
>> documentation at the top of the file.
>>
>> Signed-off-by: Andy Lutomirski 
>> ---
>> arch/x86/mm/tlb.c | 43 +--
>> 1 file changed, 33 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
>> index ce104b962a17..d4ee781ca656 100644
>> --- a/arch/x86/mm/tlb.c
>> +++ b/arch/x86/mm/tlb.c
>> @@ -15,17 +15,24 @@
>> #include 
>>
>> /*
>> - *   TLB flushing, formerly SMP-only
>> - *   c/o Linus Torvalds.
>> + * The code in this file handles mm switches and TLB flushes.
>>  *
>> - *   These mean you can really definitely utterly forget about
>> - *   writing to user space from interrupts. (Its not allowed anyway).
>> + * An mm's TLB state is logically represented by a totally ordered sequence
>> + * of TLB flushes.  Each flush increments the mm's tlb_gen.
>>  *
>> - *   Optimizations Manfred Spraul 
>> + * Each CPU that might have an mm in its TLB (and that might ever use
>> + * those TLB entries) will have an entry for it in its cpu_tlbstate.ctxs
>> + * array.  The kernel maintains the following invariant: for each CPU and
>> + * for each mm in its cpu_tlbstate.ctxs array, the CPU has performed all
>> + * flushes in that mms history up to the tlb_gen in cpu_tlbstate.ctxs
>> + * or the CPU has performed an equivalent set of flushes.
>>  *
>> - *   More scalable flush, from Andi Kleen
>> - *
>> - *   Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
>> + * For this purpose, an equivalent set is a set that is at least as strong.
>> + * So, for example, if the flush history is a full flush at time 1,
>> + * a full flush after time 1 is sufficient, but a full flush before time 1
>> + * is not.  Similarly, any number of flushes can be replaced by a single
>> + * full flush so long as that replacement flush is after all the flushes
>> + * that it's replacing.
>>  */
>>
>> atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
>> @@ -138,7 +145,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct 
>> mm_struct *next,
>>   return;
>>   }
>>
>> - /* Resume remote flushes and then read tlb_gen. */
>> + /*
>> +  * Resume remote flushes and then read tlb_gen.  The
>> +  * implied barrier in atomic64_read() synchronizes
>> +  * with inc_mm_tlb_gen() like this:
>
> You mean the implied memory barrier in cpumask_set_cpu(), no?
>


Ugh, yes.  And I misread PeterZ's email and incorrectly removed the
smp_mb__after_atomic().  I'll respin this patch.


Re: [PATCH v5 2/2] x86/mm: Improve TLB flush documentation

2017-07-24 Thread Andy Lutomirski
On Mon, Jul 24, 2017 at 9:47 PM, Nadav Amit  wrote:
> Andy Lutomirski  wrote:
>
>> Improve comments as requested by PeterZ and also add some
>> documentation at the top of the file.
>>
>> Signed-off-by: Andy Lutomirski 
>> ---
>> arch/x86/mm/tlb.c | 43 +--
>> 1 file changed, 33 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
>> index ce104b962a17..d4ee781ca656 100644
>> --- a/arch/x86/mm/tlb.c
>> +++ b/arch/x86/mm/tlb.c
>> @@ -15,17 +15,24 @@
>> #include 
>>
>> /*
>> - *   TLB flushing, formerly SMP-only
>> - *   c/o Linus Torvalds.
>> + * The code in this file handles mm switches and TLB flushes.
>>  *
>> - *   These mean you can really definitely utterly forget about
>> - *   writing to user space from interrupts. (Its not allowed anyway).
>> + * An mm's TLB state is logically represented by a totally ordered sequence
>> + * of TLB flushes.  Each flush increments the mm's tlb_gen.
>>  *
>> - *   Optimizations Manfred Spraul 
>> + * Each CPU that might have an mm in its TLB (and that might ever use
>> + * those TLB entries) will have an entry for it in its cpu_tlbstate.ctxs
>> + * array.  The kernel maintains the following invariant: for each CPU and
>> + * for each mm in its cpu_tlbstate.ctxs array, the CPU has performed all
>> + * flushes in that mms history up to the tlb_gen in cpu_tlbstate.ctxs
>> + * or the CPU has performed an equivalent set of flushes.
>>  *
>> - *   More scalable flush, from Andi Kleen
>> - *
>> - *   Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
>> + * For this purpose, an equivalent set is a set that is at least as strong.
>> + * So, for example, if the flush history is a full flush at time 1,
>> + * a full flush after time 1 is sufficient, but a full flush before time 1
>> + * is not.  Similarly, any number of flushes can be replaced by a single
>> + * full flush so long as that replacement flush is after all the flushes
>> + * that it's replacing.
>>  */
>>
>> atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
>> @@ -138,7 +145,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct 
>> mm_struct *next,
>>   return;
>>   }
>>
>> - /* Resume remote flushes and then read tlb_gen. */
>> + /*
>> +  * Resume remote flushes and then read tlb_gen.  The
>> +  * implied barrier in atomic64_read() synchronizes
>> +  * with inc_mm_tlb_gen() like this:
>
> You mean the implied memory barrier in cpumask_set_cpu(), no?
>


Ugh, yes.  And I misread PeterZ's email and incorrectly removed the
smp_mb__after_atomic().  I'll respin this patch.


[PATCH 1/5] arm64: dts: qcom: Add RPM glink nodes to msm8996

2017-07-24 Thread Bjorn Andersson
Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/msm8996.dtsi | 78 +++
 1 file changed, 78 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi 
b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 8f085716e258..8ebef05e1750 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -276,12 +276,83 @@
hwlocks = <_mutex 3>;
};
 
+   rpm-glink {
+   compatible = "qcom,glink-rpm";
+
+   interrupts = ;
+
+   qcom,rpm-msg-ram = <_msg_ram>;
+
+   mboxes = <_glb 0>;
+
+   rpm_requests {
+   compatible = "qcom,rpm-msm8996";
+   qcom,glink-channels = "rpm_requests";
+
+   pm8994-regulators {
+   compatible = "qcom,rpm-pm8994-regulators";
+
+   pm8994_s1: s1 {};
+   pm8994_s2: s2 {};
+   pm8994_s3: s3 {};
+   pm8994_s4: s4 {};
+   pm8994_s5: s5 {};
+   pm8994_s6: s6 {};
+   pm8994_s7: s7 {};
+   pm8994_s8: s8 {};
+   pm8994_s9: s9 {};
+   pm8994_s10: s10 {};
+   pm8994_s11: s11 {};
+   pm8994_s12: s12 {};
+
+   pm8994_l1: l1 {};
+   pm8994_l2: l2 {};
+   pm8994_l3: l3 {};
+   pm8994_l4: l4 {};
+   pm8994_l5: l5 {};
+   pm8994_l6: l6 {};
+   pm8994_l7: l7 {};
+   pm8994_l8: l8 {};
+   pm8994_l9: l9 {};
+   pm8994_l10: l10 {};
+   pm8994_l11: l11 {};
+   pm8994_l12: l12 {};
+   pm8994_l13: l13 {};
+   pm8994_l14: l14 {};
+   pm8994_l15: l15 {};
+   pm8994_l16: l16 {};
+   pm8994_l17: l17 {};
+   pm8994_l18: l18 {};
+   pm8994_l19: l19 {};
+   pm8994_l20: l20 {};
+   pm8994_l21: l21 {};
+   pm8994_l22: l22 {};
+   pm8994_l23: l23 {};
+   pm8994_l24: l24 {};
+   pm8994_l25: l25 {};
+   pm8994_l26: l26 {};
+   pm8994_l27: l27 {};
+   pm8994_l28: l28 {};
+   pm8994_l29: l29 {};
+   pm8994_l30: l30 {};
+   pm8994_l31: l31 {};
+   pm8994_l32: l32 {};
+   };
+
+   };
+   };
+
soc: soc {
#address-cells = <1>;
#size-cells = <1>;
ranges = <0 0 0 0x>;
compatible = "simple-bus";
 
+   rpm_msg_ram: memory@68000 {
+   compatible = "qcom,rpm-msg-ram";
+   reg = <0x68000 0x6000>;
+   };
+
tcsr_mutex_regs: syscon@74 {
compatible = "syscon";
reg = <0x74 0x2>;
@@ -303,6 +374,13 @@
reg = <0x982 0x1000>;
};
 
+   apcs_glb: mailbox@982 {
+   compatible = "qcom,msm8996-apcs-hmss-global";
+   reg = <0x982 0x1000>;
+
+   #mbox-cells = <1>;
+   };
+
gcc: clock-controller@30 {
compatible = "qcom,gcc-msm8996";
#clock-cells = <1>;
-- 
2.12.0



[PATCH 4/5] arm64: dts: qcom: msm8996: Specify smd-edge for ADSP

2017-07-24 Thread Bjorn Andersson
Add the smd-edge node for the adsp, to allow SMD communication with the
ADSP.

Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/msm8996.dtsi | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi 
b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 7d909546ca24..395666dcce1b 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -636,6 +636,15 @@
 
qcom,smem-states = <_smp2p_out 0>;
qcom,smem-state-names = "stop";
+
+   smd-edge {
+   interrupts = <0 156 IRQ_TYPE_EDGE_RISING>;
+
+   label = "lpass";
+   qcom,ipc = < 16 8>;
+   qcom,smd-edge = <1>;
+   qcom,remote-pid = <2>;
+   };
};
 
adsp-smp2p {
-- 
2.12.0



[PATCH 4/5] arm64: dts: qcom: msm8996: Specify smd-edge for ADSP

2017-07-24 Thread Bjorn Andersson
Add the smd-edge node for the adsp, to allow SMD communication with the
ADSP.

Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/msm8996.dtsi | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi 
b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 7d909546ca24..395666dcce1b 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -636,6 +636,15 @@
 
qcom,smem-states = <_smp2p_out 0>;
qcom,smem-state-names = "stop";
+
+   smd-edge {
+   interrupts = <0 156 IRQ_TYPE_EDGE_RISING>;
+
+   label = "lpass";
+   qcom,ipc = < 16 8>;
+   qcom,smd-edge = <1>;
+   qcom,remote-pid = <2>;
+   };
};
 
adsp-smp2p {
-- 
2.12.0



[PATCH 1/5] arm64: dts: qcom: Add RPM glink nodes to msm8996

2017-07-24 Thread Bjorn Andersson
Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/msm8996.dtsi | 78 +++
 1 file changed, 78 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi 
b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 8f085716e258..8ebef05e1750 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -276,12 +276,83 @@
hwlocks = <_mutex 3>;
};
 
+   rpm-glink {
+   compatible = "qcom,glink-rpm";
+
+   interrupts = ;
+
+   qcom,rpm-msg-ram = <_msg_ram>;
+
+   mboxes = <_glb 0>;
+
+   rpm_requests {
+   compatible = "qcom,rpm-msm8996";
+   qcom,glink-channels = "rpm_requests";
+
+   pm8994-regulators {
+   compatible = "qcom,rpm-pm8994-regulators";
+
+   pm8994_s1: s1 {};
+   pm8994_s2: s2 {};
+   pm8994_s3: s3 {};
+   pm8994_s4: s4 {};
+   pm8994_s5: s5 {};
+   pm8994_s6: s6 {};
+   pm8994_s7: s7 {};
+   pm8994_s8: s8 {};
+   pm8994_s9: s9 {};
+   pm8994_s10: s10 {};
+   pm8994_s11: s11 {};
+   pm8994_s12: s12 {};
+
+   pm8994_l1: l1 {};
+   pm8994_l2: l2 {};
+   pm8994_l3: l3 {};
+   pm8994_l4: l4 {};
+   pm8994_l5: l5 {};
+   pm8994_l6: l6 {};
+   pm8994_l7: l7 {};
+   pm8994_l8: l8 {};
+   pm8994_l9: l9 {};
+   pm8994_l10: l10 {};
+   pm8994_l11: l11 {};
+   pm8994_l12: l12 {};
+   pm8994_l13: l13 {};
+   pm8994_l14: l14 {};
+   pm8994_l15: l15 {};
+   pm8994_l16: l16 {};
+   pm8994_l17: l17 {};
+   pm8994_l18: l18 {};
+   pm8994_l19: l19 {};
+   pm8994_l20: l20 {};
+   pm8994_l21: l21 {};
+   pm8994_l22: l22 {};
+   pm8994_l23: l23 {};
+   pm8994_l24: l24 {};
+   pm8994_l25: l25 {};
+   pm8994_l26: l26 {};
+   pm8994_l27: l27 {};
+   pm8994_l28: l28 {};
+   pm8994_l29: l29 {};
+   pm8994_l30: l30 {};
+   pm8994_l31: l31 {};
+   pm8994_l32: l32 {};
+   };
+
+   };
+   };
+
soc: soc {
#address-cells = <1>;
#size-cells = <1>;
ranges = <0 0 0 0x>;
compatible = "simple-bus";
 
+   rpm_msg_ram: memory@68000 {
+   compatible = "qcom,rpm-msg-ram";
+   reg = <0x68000 0x6000>;
+   };
+
tcsr_mutex_regs: syscon@74 {
compatible = "syscon";
reg = <0x74 0x2>;
@@ -303,6 +374,13 @@
reg = <0x982 0x1000>;
};
 
+   apcs_glb: mailbox@982 {
+   compatible = "qcom,msm8996-apcs-hmss-global";
+   reg = <0x982 0x1000>;
+
+   #mbox-cells = <1>;
+   };
+
gcc: clock-controller@30 {
compatible = "qcom,gcc-msm8996";
#clock-cells = <1>;
-- 
2.12.0



[PATCH 0/5] DB820c DTS dump

2017-07-24 Thread Bjorn Andersson
A dump of DTS patches for MSM8996 and DB820c, found in the Linaro landing team
tree.

Bjorn Andersson (3):
  ARM64: dts: qcom: Add RPM glink nodes to msm8996
  arm64: dts: msm8996: Add modem smp2p nodes
  arm64: dts: qcom: msm8996: Specify smd-edge for ADSP

Rajendra Nayak (1):
  ARM64: dts: qcom: db820c: Add pm8994 regulator node

Vivek Gautam (1):
  arm64: dts: msm8996: Add QFPROM node

 arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 148 +++
 arch/arm64/boot/dts/qcom/msm8996.dtsi| 122 ++
 2 files changed, 270 insertions(+)

-- 
2.12.0



[PATCH 3/5] arm64: dts: msm8996: Add modem smp2p nodes

2017-07-24 Thread Bjorn Andersson
Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/msm8996.dtsi | 24 
 1 file changed, 24 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi 
b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 8ebef05e1750..7d909546ca24 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -662,6 +662,30 @@
};
};
 
+   modem-smp2p {
+   compatible = "qcom,smp2p";
+   qcom,smem = <435>, <428>;
+
+   interrupts = <0 451 IRQ_TYPE_EDGE_RISING>;
+
+   qcom,ipc = < 16 14>;
+
+   qcom,local-pid = <0>;
+   qcom,remote-pid = <1>;
+
+   modem_smp2p_out: master-kernel {
+   qcom,entry-name = "master-kernel";
+   #qcom,smem-state-cells = <1>;
+   };
+
+   modem_smp2p_in: slave-kernel {
+   qcom,entry-name = "slave-kernel";
+
+   interrupt-controller;
+   #interrupt-cells = <2>;
+   };
+   };
+
smp2p-slpi {
compatible = "qcom,smp2p";
qcom,smem = <481>, <430>;
-- 
2.12.0



[PATCH 0/5] DB820c DTS dump

2017-07-24 Thread Bjorn Andersson
A dump of DTS patches for MSM8996 and DB820c, found in the Linaro landing team
tree.

Bjorn Andersson (3):
  ARM64: dts: qcom: Add RPM glink nodes to msm8996
  arm64: dts: msm8996: Add modem smp2p nodes
  arm64: dts: qcom: msm8996: Specify smd-edge for ADSP

Rajendra Nayak (1):
  ARM64: dts: qcom: db820c: Add pm8994 regulator node

Vivek Gautam (1):
  arm64: dts: msm8996: Add QFPROM node

 arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 148 +++
 arch/arm64/boot/dts/qcom/msm8996.dtsi| 122 ++
 2 files changed, 270 insertions(+)

-- 
2.12.0



[PATCH 3/5] arm64: dts: msm8996: Add modem smp2p nodes

2017-07-24 Thread Bjorn Andersson
Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/msm8996.dtsi | 24 
 1 file changed, 24 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi 
b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 8ebef05e1750..7d909546ca24 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -662,6 +662,30 @@
};
};
 
+   modem-smp2p {
+   compatible = "qcom,smp2p";
+   qcom,smem = <435>, <428>;
+
+   interrupts = <0 451 IRQ_TYPE_EDGE_RISING>;
+
+   qcom,ipc = < 16 14>;
+
+   qcom,local-pid = <0>;
+   qcom,remote-pid = <1>;
+
+   modem_smp2p_out: master-kernel {
+   qcom,entry-name = "master-kernel";
+   #qcom,smem-state-cells = <1>;
+   };
+
+   modem_smp2p_in: slave-kernel {
+   qcom,entry-name = "slave-kernel";
+
+   interrupt-controller;
+   #interrupt-cells = <2>;
+   };
+   };
+
smp2p-slpi {
compatible = "qcom,smp2p";
qcom,smem = <481>, <430>;
-- 
2.12.0



[PATCH 5/5] arm64: dts: msm8996: Add QFPROM node

2017-07-24 Thread Bjorn Andersson
From: Vivek Gautam 

Signed-off-by: Vivek Gautam 
Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/msm8996.dtsi | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi 
b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 395666dcce1b..d0865d2f731e 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -496,6 +496,17 @@
status = "disabled";
};
 
+   qfprom: qfprom@74000 {
+   compatible = "qcom,qfprom";
+   reg = <0x00074000 0x8ff>;
+   #address-cells = <1>;
+   #size-cells = <1>;
+   qusb2p_hstx_trim: hstx_trim@24e {
+   reg = <0x24e 0x2>;
+   bits = <5 4>;
+   };
+   };
+
sdhc2: sdhci@74a4900 {
 status = "disabled";
 compatible = "qcom,sdhci-msm-v4";
-- 
2.12.0



[PATCH 5/5] arm64: dts: msm8996: Add QFPROM node

2017-07-24 Thread Bjorn Andersson
From: Vivek Gautam 

Signed-off-by: Vivek Gautam 
Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/msm8996.dtsi | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi 
b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 395666dcce1b..d0865d2f731e 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -496,6 +496,17 @@
status = "disabled";
};
 
+   qfprom: qfprom@74000 {
+   compatible = "qcom,qfprom";
+   reg = <0x00074000 0x8ff>;
+   #address-cells = <1>;
+   #size-cells = <1>;
+   qusb2p_hstx_trim: hstx_trim@24e {
+   reg = <0x24e 0x2>;
+   bits = <5 4>;
+   };
+   };
+
sdhc2: sdhci@74a4900 {
 status = "disabled";
 compatible = "qcom,sdhci-msm-v4";
-- 
2.12.0



[PATCH 2/5] arm64: dts: qcom: db820c: Add pm8994 regulator node

2017-07-24 Thread Bjorn Andersson
From: Rajendra Nayak 

Add PM8994 RPM regulators with their min/max voltages to DB820c.

Signed-off-by: Rajendra Nayak 
Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 148 +++
 1 file changed, 148 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi 
b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
index d2196fc6d739..b4817d6c0d50 100644
--- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
@@ -106,4 +106,152 @@
gpios = <_gpios 2 GPIO_ACTIVE_LOW>;
};
};
+
+   rpm-glink {
+   rpm_requests {
+   pm8994-regulators {
+   vdd_l1-supply = <_s3>;
+   vdd_l2_l26_l28-supply = <_s3>;
+   vdd_l3_l11-supply = <_s3>;
+   vdd_l4_l27_l31-supply = <_s3>;
+   vdd_l5_l7-supply = <_s5>;
+   vdd_l14_l15-supply = <_s5>;
+   vdd_l20_l21-supply = <_s5>;
+   vdd_l25-supply = <_s3>;
+
+   s3 {
+   regulator-min-microvolt = <130>;
+   regulator-max-microvolt = <130>;
+   };
+   s4 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <180>;
+   };
+   s5 {
+   regulator-min-microvolt = <215>;
+   regulator-max-microvolt = <215>;
+   };
+   s7 {
+   regulator-min-microvolt = <80>;
+   regulator-max-microvolt = <80>;
+   };
+
+   l1 {
+   regulator-min-microvolt = <100>;
+   regulator-max-microvolt = <100>;
+   };
+   l2 {
+   regulator-min-microvolt = <125>;
+   regulator-max-microvolt = <125>;
+   };
+   l3 {
+   regulator-min-microvolt = <85>;
+   regulator-max-microvolt = <85>;
+   };
+   l4 {
+   regulator-min-microvolt = <1225000>;
+   regulator-max-microvolt = <1225000>;
+   };
+   l6 {
+   regulator-min-microvolt = <120>;
+   regulator-max-microvolt = <120>;
+   };
+   l8 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <180>;
+   };
+   l9 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <180>;
+   };
+   l10 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <180>;
+   };
+   l11 {
+   regulator-min-microvolt = <115>;
+   regulator-max-microvolt = <115>;
+   };
+   l12 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <180>;
+   };
+   l13 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <295>;
+   };
+   l14 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <180>;
+   };
+   l15 {
+   

[PATCH 2/5] arm64: dts: qcom: db820c: Add pm8994 regulator node

2017-07-24 Thread Bjorn Andersson
From: Rajendra Nayak 

Add PM8994 RPM regulators with their min/max voltages to DB820c.

Signed-off-by: Rajendra Nayak 
Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 148 +++
 1 file changed, 148 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi 
b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
index d2196fc6d739..b4817d6c0d50 100644
--- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
@@ -106,4 +106,152 @@
gpios = <_gpios 2 GPIO_ACTIVE_LOW>;
};
};
+
+   rpm-glink {
+   rpm_requests {
+   pm8994-regulators {
+   vdd_l1-supply = <_s3>;
+   vdd_l2_l26_l28-supply = <_s3>;
+   vdd_l3_l11-supply = <_s3>;
+   vdd_l4_l27_l31-supply = <_s3>;
+   vdd_l5_l7-supply = <_s5>;
+   vdd_l14_l15-supply = <_s5>;
+   vdd_l20_l21-supply = <_s5>;
+   vdd_l25-supply = <_s3>;
+
+   s3 {
+   regulator-min-microvolt = <130>;
+   regulator-max-microvolt = <130>;
+   };
+   s4 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <180>;
+   };
+   s5 {
+   regulator-min-microvolt = <215>;
+   regulator-max-microvolt = <215>;
+   };
+   s7 {
+   regulator-min-microvolt = <80>;
+   regulator-max-microvolt = <80>;
+   };
+
+   l1 {
+   regulator-min-microvolt = <100>;
+   regulator-max-microvolt = <100>;
+   };
+   l2 {
+   regulator-min-microvolt = <125>;
+   regulator-max-microvolt = <125>;
+   };
+   l3 {
+   regulator-min-microvolt = <85>;
+   regulator-max-microvolt = <85>;
+   };
+   l4 {
+   regulator-min-microvolt = <1225000>;
+   regulator-max-microvolt = <1225000>;
+   };
+   l6 {
+   regulator-min-microvolt = <120>;
+   regulator-max-microvolt = <120>;
+   };
+   l8 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <180>;
+   };
+   l9 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <180>;
+   };
+   l10 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <180>;
+   };
+   l11 {
+   regulator-min-microvolt = <115>;
+   regulator-max-microvolt = <115>;
+   };
+   l12 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <180>;
+   };
+   l13 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <295>;
+   };
+   l14 {
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <180>;
+   };
+   l15 {
+   regulator-min-microvolt = <180>;
+   

Re: [RFC Part1 PATCH v3 01/17] Documentation/x86: Add AMD Secure Encrypted Virtualization (SEV) descrption

2017-07-24 Thread Borislav Petkov
On Mon, Jul 24, 2017 at 02:07:41PM -0500, Brijesh Singh wrote:

Subject: Re: [RFC Part1 PATCH v3 01/17] Documentation/x86: Add AMD Secure 
Encrypted Virtualization (SEV) descrption

 ^^

Please introduce a spellchecker into your workflow.

> Update amd-memory-encryption document describing the AMD Secure Encrypted

"Update the AMD memory encryption document...

The patch has the proper URL already.

> Virtualization (SEV) feature.
> 
> Signed-off-by: Brijesh Singh 
> ---
>  Documentation/x86/amd-memory-encryption.txt | 29 
> ++---
>  1 file changed, 26 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/x86/amd-memory-encryption.txt 
> b/Documentation/x86/amd-memory-encryption.txt
> index f512ab7..747df07 100644
> --- a/Documentation/x86/amd-memory-encryption.txt
> +++ b/Documentation/x86/amd-memory-encryption.txt
> @@ -1,4 +1,5 @@
> -Secure Memory Encryption (SME) is a feature found on AMD processors.
> +Secure Memory Encryption (SME) and Secure Encrypted Virtualization (SEV) are
> +features found on AMD processors.
>  
>  SME provides the ability to mark individual pages of memory as encrypted 
> using
>  the standard x86 page tables.  A page that is marked encrypted will be
> @@ -6,6 +7,12 @@ automatically decrypted when read from DRAM and encrypted 
> when written to
>  DRAM.  SME can therefore be used to protect the contents of DRAM from 
> physical
>  attacks on the system.
>  
> +SEV enables running encrypted virtual machine (VMs) in which the code and 
> data

 machines

> +of the virtual machine are secured so that decrypted version is available 
> only

... of the guest VM ...   ... so that a decrypted ...

> +within the VM itself. SEV guest VMs have concept of private and shared 
> memory.

have *the* concept - you need to use
definite and indefinite articles in your
text.

> +Private memory is encrypted with the guest-specific key, while shared memory
> +may be encrypted with hypervisor key.

And here you explain that the hypervisor key is the same key which we
use in SME. So that people can make the connection.

> +
>  A page is encrypted when a page table entry has the encryption bit set (see
>  below on how to determine its position).  The encryption bit can also be
>  specified in the cr3 register, allowing the PGD table to be encrypted. Each
> @@ -19,11 +26,20 @@ so that the PGD is encrypted, but not set the encryption 
> bit in the PGD entry
>  for a PUD which results in the PUD pointed to by that entry to not be
>  encrypted.
>  
> -Support for SME can be determined through the CPUID instruction. The CPUID
> -function 0x801f reports information related to SME:
> +When SEV is enabled, certain type of memory (namely insruction pages and 
> guest

When SEV is enabled, instruction pages and guest page tables are ...

> +page tables) are always treated as private. Due to security reasons all DMA

security reasons??

> +operations inside the guest must be performed on shared memory. Since the
> +memory encryption bit is only controllable by the guest OS when it is 
> operating

 ... is controlled ...

> +in 64-bit or 32-bit PAE mode, in all other modes the SEV hardware forces 
> memory

... forces the 
memory ...

> +encryption bit to 1.
> +
> +Support for SME and SEV can be determined through the CPUID instruction. The
> +CPUID function 0x801f reports information related to SME:
>  
>   0x801f[eax]:
>   Bit[0] indicates support for SME
> + 0x81f[eax]:

There's a 0 missing and you don't really need it as it is already above.

> + Bit[1] indicates support for SEV
>   0x801f[ebx]:
>   Bits[5:0]  pagetable bit number used to activate memory
>  encryption
> @@ -39,6 +55,13 @@ determine if SME is enabled and/or to enable memory 
> encryption:
>   Bit[23]   0 = memory encryption features are disabled
> 1 = memory encryption features are enabled
>  
> +If SEV is supported, MSR 0xc0010131 (MSR_F17H_SEV) can be used to determine 
> if

If this MSR is going to be part of the architecture - and I really think
it is - then call it MSR_AMD64_SEV.

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


Re: [RFC Part1 PATCH v3 01/17] Documentation/x86: Add AMD Secure Encrypted Virtualization (SEV) descrption

2017-07-24 Thread Borislav Petkov
On Mon, Jul 24, 2017 at 02:07:41PM -0500, Brijesh Singh wrote:

Subject: Re: [RFC Part1 PATCH v3 01/17] Documentation/x86: Add AMD Secure 
Encrypted Virtualization (SEV) descrption

 ^^

Please introduce a spellchecker into your workflow.

> Update amd-memory-encryption document describing the AMD Secure Encrypted

"Update the AMD memory encryption document...

The patch has the proper URL already.

> Virtualization (SEV) feature.
> 
> Signed-off-by: Brijesh Singh 
> ---
>  Documentation/x86/amd-memory-encryption.txt | 29 
> ++---
>  1 file changed, 26 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/x86/amd-memory-encryption.txt 
> b/Documentation/x86/amd-memory-encryption.txt
> index f512ab7..747df07 100644
> --- a/Documentation/x86/amd-memory-encryption.txt
> +++ b/Documentation/x86/amd-memory-encryption.txt
> @@ -1,4 +1,5 @@
> -Secure Memory Encryption (SME) is a feature found on AMD processors.
> +Secure Memory Encryption (SME) and Secure Encrypted Virtualization (SEV) are
> +features found on AMD processors.
>  
>  SME provides the ability to mark individual pages of memory as encrypted 
> using
>  the standard x86 page tables.  A page that is marked encrypted will be
> @@ -6,6 +7,12 @@ automatically decrypted when read from DRAM and encrypted 
> when written to
>  DRAM.  SME can therefore be used to protect the contents of DRAM from 
> physical
>  attacks on the system.
>  
> +SEV enables running encrypted virtual machine (VMs) in which the code and 
> data

 machines

> +of the virtual machine are secured so that decrypted version is available 
> only

... of the guest VM ...   ... so that a decrypted ...

> +within the VM itself. SEV guest VMs have concept of private and shared 
> memory.

have *the* concept - you need to use
definite and indefinite articles in your
text.

> +Private memory is encrypted with the guest-specific key, while shared memory
> +may be encrypted with hypervisor key.

And here you explain that the hypervisor key is the same key which we
use in SME. So that people can make the connection.

> +
>  A page is encrypted when a page table entry has the encryption bit set (see
>  below on how to determine its position).  The encryption bit can also be
>  specified in the cr3 register, allowing the PGD table to be encrypted. Each
> @@ -19,11 +26,20 @@ so that the PGD is encrypted, but not set the encryption 
> bit in the PGD entry
>  for a PUD which results in the PUD pointed to by that entry to not be
>  encrypted.
>  
> -Support for SME can be determined through the CPUID instruction. The CPUID
> -function 0x801f reports information related to SME:
> +When SEV is enabled, certain type of memory (namely insruction pages and 
> guest

When SEV is enabled, instruction pages and guest page tables are ...

> +page tables) are always treated as private. Due to security reasons all DMA

security reasons??

> +operations inside the guest must be performed on shared memory. Since the
> +memory encryption bit is only controllable by the guest OS when it is 
> operating

 ... is controlled ...

> +in 64-bit or 32-bit PAE mode, in all other modes the SEV hardware forces 
> memory

... forces the 
memory ...

> +encryption bit to 1.
> +
> +Support for SME and SEV can be determined through the CPUID instruction. The
> +CPUID function 0x801f reports information related to SME:
>  
>   0x801f[eax]:
>   Bit[0] indicates support for SME
> + 0x81f[eax]:

There's a 0 missing and you don't really need it as it is already above.

> + Bit[1] indicates support for SEV
>   0x801f[ebx]:
>   Bits[5:0]  pagetable bit number used to activate memory
>  encryption
> @@ -39,6 +55,13 @@ determine if SME is enabled and/or to enable memory 
> encryption:
>   Bit[23]   0 = memory encryption features are disabled
> 1 = memory encryption features are enabled
>  
> +If SEV is supported, MSR 0xc0010131 (MSR_F17H_SEV) can be used to determine 
> if

If this MSR is going to be part of the architecture - and I really think
it is - then call it MSR_AMD64_SEV.

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


Re: [PATCH v2 6/7] mailbox: bcm-flexrm-mailbox: Set msg_queue_len for each channel

2017-07-24 Thread Anup Patel
On Mon, Jul 24, 2017 at 10:06 PM, Jassi Brar  wrote:
> On Mon, Jul 24, 2017 at 9:26 AM, Anup Patel  wrote:
>> Hi Jassi,
>>
>> Sorry for the delayed response...
>>
>> On Fri, Jul 21, 2017 at 9:16 PM, Jassi Brar  wrote:
>>> Hi Anup,
>>>
>>> On Fri, Jul 21, 2017 at 12:25 PM, Anup Patel  
>>> wrote:
 The Broadcom FlexRM ring (i.e. mailbox channel) can handle
 larger number of messages queued in one FlexRM ring hence
 this patch sets msg_queue_len for each mailbox channel to
 be same as RING_MAX_REQ_COUNT.

 Signed-off-by: Anup Patel 
 Reviewed-by: Scott Branden 
 ---
  drivers/mailbox/bcm-flexrm-mailbox.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

 diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c 
 b/drivers/mailbox/bcm-flexrm-mailbox.c
 index 9873818..20055a0 100644
 --- a/drivers/mailbox/bcm-flexrm-mailbox.c
 +++ b/drivers/mailbox/bcm-flexrm-mailbox.c
 @@ -1683,8 +1683,11 @@ static int flexrm_mbox_probe(struct platform_device 
 *pdev)
 ret = -ENOMEM;
 goto fail_free_debugfs_root;
 }
 -   for (index = 0; index < mbox->num_rings; index++)
 +   for (index = 0; index < mbox->num_rings; index++) {
 +   mbox->controller.chans[index].msg_queue_len =
 +   RING_MAX_REQ_COUNT;
 mbox->controller.chans[index].con_priv = 
 >rings[index];
 +   }

>>> While writing mailbox.c I wasn't unaware that there is the option to
>>> choose the queue length at runtime.
>>> The idea was to keep the code as simple as possible. I am open to
>>> making it a runtime thing, but first, please help me understand how
>>> that is useful here.
>>>
>>> I understand FlexRm has a ring buffer of RING_MAX_REQ_COUNT(1024)
>>> elements. Any message submitted to mailbox api can be immediately
>>> written onto the ringbuffer if there is some space.
>>> Is there any mechanism to report back to a client driver, if its
>>> message in ringbuffer failed "to be sent"?
>>> If there isn't any, then I think, in flexrm_last_tx_done() you should
>>> simply return true if there is some space left in the rung-buffer,
>>> false otherwise.
>>
>> Yes, we have error code in "struct brcm_message" to report back
>> errors from send_message. In our mailbox clients, we check
>> return value of mbox_send_message() and also the error code
>> in "struct brcm_message".
>>
> I meant after the message has been accepted in the ringbuffer but the
> remote failed to receive it.

Yes, even this case is handled.

In case of IO errors after message has been put in ring buffer, we get
completion message with error code and mailbox client drivers will
receive back "struct brcm_message" with error set.

You can refer flexrm_process_completions() for more details.

> There seems no such provision. IIANW, then you should be able to
> consider every message as "sent successfully" once it is in the ring
> buffer i.e, immediately after mbox_send_message() returns 0.
> In that case I would think you don't need more than a couple of
> entries out of MBOX_TX_QUEUE_LEN ?

What I am trying to suggest is that we can take upto 1024 messages
in a FlexRM ring but the MBOX_TX_QUEUE_LEN limits us queuing
more messages. This issue manifest easily when multiple CPUs
queues to same FlexRM ring (i.e. same mailbox channel).

Another quick fix is to make MBOX_TX_QUEUE_LEN as 1024 but
it will not be generic fix.

Regards,
Anup


Re: [PATCH v2 6/7] mailbox: bcm-flexrm-mailbox: Set msg_queue_len for each channel

2017-07-24 Thread Anup Patel
On Mon, Jul 24, 2017 at 10:06 PM, Jassi Brar  wrote:
> On Mon, Jul 24, 2017 at 9:26 AM, Anup Patel  wrote:
>> Hi Jassi,
>>
>> Sorry for the delayed response...
>>
>> On Fri, Jul 21, 2017 at 9:16 PM, Jassi Brar  wrote:
>>> Hi Anup,
>>>
>>> On Fri, Jul 21, 2017 at 12:25 PM, Anup Patel  
>>> wrote:
 The Broadcom FlexRM ring (i.e. mailbox channel) can handle
 larger number of messages queued in one FlexRM ring hence
 this patch sets msg_queue_len for each mailbox channel to
 be same as RING_MAX_REQ_COUNT.

 Signed-off-by: Anup Patel 
 Reviewed-by: Scott Branden 
 ---
  drivers/mailbox/bcm-flexrm-mailbox.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

 diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c 
 b/drivers/mailbox/bcm-flexrm-mailbox.c
 index 9873818..20055a0 100644
 --- a/drivers/mailbox/bcm-flexrm-mailbox.c
 +++ b/drivers/mailbox/bcm-flexrm-mailbox.c
 @@ -1683,8 +1683,11 @@ static int flexrm_mbox_probe(struct platform_device 
 *pdev)
 ret = -ENOMEM;
 goto fail_free_debugfs_root;
 }
 -   for (index = 0; index < mbox->num_rings; index++)
 +   for (index = 0; index < mbox->num_rings; index++) {
 +   mbox->controller.chans[index].msg_queue_len =
 +   RING_MAX_REQ_COUNT;
 mbox->controller.chans[index].con_priv = 
 >rings[index];
 +   }

>>> While writing mailbox.c I wasn't unaware that there is the option to
>>> choose the queue length at runtime.
>>> The idea was to keep the code as simple as possible. I am open to
>>> making it a runtime thing, but first, please help me understand how
>>> that is useful here.
>>>
>>> I understand FlexRm has a ring buffer of RING_MAX_REQ_COUNT(1024)
>>> elements. Any message submitted to mailbox api can be immediately
>>> written onto the ringbuffer if there is some space.
>>> Is there any mechanism to report back to a client driver, if its
>>> message in ringbuffer failed "to be sent"?
>>> If there isn't any, then I think, in flexrm_last_tx_done() you should
>>> simply return true if there is some space left in the rung-buffer,
>>> false otherwise.
>>
>> Yes, we have error code in "struct brcm_message" to report back
>> errors from send_message. In our mailbox clients, we check
>> return value of mbox_send_message() and also the error code
>> in "struct brcm_message".
>>
> I meant after the message has been accepted in the ringbuffer but the
> remote failed to receive it.

Yes, even this case is handled.

In case of IO errors after message has been put in ring buffer, we get
completion message with error code and mailbox client drivers will
receive back "struct brcm_message" with error set.

You can refer flexrm_process_completions() for more details.

> There seems no such provision. IIANW, then you should be able to
> consider every message as "sent successfully" once it is in the ring
> buffer i.e, immediately after mbox_send_message() returns 0.
> In that case I would think you don't need more than a couple of
> entries out of MBOX_TX_QUEUE_LEN ?

What I am trying to suggest is that we can take upto 1024 messages
in a FlexRM ring but the MBOX_TX_QUEUE_LEN limits us queuing
more messages. This issue manifest easily when multiple CPUs
queues to same FlexRM ring (i.e. same mailbox channel).

Another quick fix is to make MBOX_TX_QUEUE_LEN as 1024 but
it will not be generic fix.

Regards,
Anup


[GIT PULL] s390 fixes for 4.13-rc3

2017-07-24 Thread Martin Schwidefsky
Hi Linus,

please pull from the 'for-linus' branch of

git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git for-linus

to receive the following updates:

Three bug fixes.

Christian Borntraeger (2):
  s390/perf: fix problem state detection
  s390/mm: set change and reference bit on lazy key enablement

Dong Jia Shi (1):
  s390: chp: handle CRW_ERC_INIT for channel-path status change

 arch/s390/kernel/perf_cpum_sf.c | 2 +-
 arch/s390/mm/pgtable.c  | 6 +++---
 drivers/s390/cio/chp.c  | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 0c82f79..c1bf75f 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -998,7 +998,7 @@ static int perf_push_sample(struct perf_event *event, 
struct sf_raw_sample *sfr)
psw_bits(regs.psw).ia   = sfr->basic.ia;
psw_bits(regs.psw).dat  = sfr->basic.T;
psw_bits(regs.psw).wait = sfr->basic.W;
-   psw_bits(regs.psw).per  = sfr->basic.P;
+   psw_bits(regs.psw).pstate = sfr->basic.P;
psw_bits(regs.psw).as   = sfr->basic.AS;
 
/*
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index d4d409b..4a1f736 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -591,11 +591,11 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long 
addr, pte_t *ptep)
unsigned long ptev;
pgste_t pgste;
 
-   /* Clear storage key */
+   /* Clear storage key ACC and F, but set R/C */
preempt_disable();
pgste = pgste_get_lock(ptep);
-   pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
- PGSTE_GR_BIT | PGSTE_GC_BIT);
+   pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+   pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
ptev = pte_val(*ptep);
if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 7e0d4f7..432fc40 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -559,6 +559,7 @@ static void chp_process_crw(struct crw *crw0, struct crw 
*crw1,
chpid.id = crw0->rsid;
switch (crw0->erc) {
case CRW_ERC_IPARM: /* Path has come. */
+   case CRW_ERC_INIT:
if (!chp_is_registered(chpid))
chp_new(chpid);
chsc_chp_online(chpid);



[GIT PULL] s390 fixes for 4.13-rc3

2017-07-24 Thread Martin Schwidefsky
Hi Linus,

please pull from the 'for-linus' branch of

git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git for-linus

to receive the following updates:

Three bug fixes.

Christian Borntraeger (2):
  s390/perf: fix problem state detection
  s390/mm: set change and reference bit on lazy key enablement

Dong Jia Shi (1):
  s390: chp: handle CRW_ERC_INIT for channel-path status change

 arch/s390/kernel/perf_cpum_sf.c | 2 +-
 arch/s390/mm/pgtable.c  | 6 +++---
 drivers/s390/cio/chp.c  | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 0c82f79..c1bf75f 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -998,7 +998,7 @@ static int perf_push_sample(struct perf_event *event, 
struct sf_raw_sample *sfr)
psw_bits(regs.psw).ia   = sfr->basic.ia;
psw_bits(regs.psw).dat  = sfr->basic.T;
psw_bits(regs.psw).wait = sfr->basic.W;
-   psw_bits(regs.psw).per  = sfr->basic.P;
+   psw_bits(regs.psw).pstate = sfr->basic.P;
psw_bits(regs.psw).as   = sfr->basic.AS;
 
/*
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index d4d409b..4a1f736 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -591,11 +591,11 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long 
addr, pte_t *ptep)
unsigned long ptev;
pgste_t pgste;
 
-   /* Clear storage key */
+   /* Clear storage key ACC and F, but set R/C */
preempt_disable();
pgste = pgste_get_lock(ptep);
-   pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
- PGSTE_GR_BIT | PGSTE_GC_BIT);
+   pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+   pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
ptev = pte_val(*ptep);
if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 7e0d4f7..432fc40 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -559,6 +559,7 @@ static void chp_process_crw(struct crw *crw0, struct crw 
*crw1,
chpid.id = crw0->rsid;
switch (crw0->erc) {
case CRW_ERC_IPARM: /* Path has come. */
+   case CRW_ERC_INIT:
if (!chp_is_registered(chpid))
chp_new(chpid);
chsc_chp_online(chpid);



Re: [PATCH v4 0/8] Multibyte memset variations

2017-07-24 Thread Michael Ellerman
Matthew Wilcox  writes:

> From: Matthew Wilcox 
>
> A relatively common idiom we're missing is a function to fill an area
> of memory with a pattern which is larger than a single byte.  I first
> noticed this with a zram patch which wanted to fill a page with an
> 'unsigned long' value.  There turn out to be quite a few places in
> the kernel which can benefit from using an optimised function rather
> than a loop; sometimes text size, sometimes speed, and sometimes both.
> The optimised PowerPC version (not included here) improves performance
> by about 30% on POWER8 on just the raw memset_l().

Is the plan that Andrew will merge this series, or are you planning to
put them in a tree of yours?

cheers


Re: [PATCH v4 0/8] Multibyte memset variations

2017-07-24 Thread Michael Ellerman
Matthew Wilcox  writes:

> From: Matthew Wilcox 
>
> A relatively common idiom we're missing is a function to fill an area
> of memory with a pattern which is larger than a single byte.  I first
> noticed this with a zram patch which wanted to fill a page with an
> 'unsigned long' value.  There turn out to be quite a few places in
> the kernel which can benefit from using an optimised function rather
> than a loop; sometimes text size, sometimes speed, and sometimes both.
> The optimised PowerPC version (not included here) improves performance
> by about 30% on POWER8 on just the raw memset_l().

Is the plan that Andrew will merge this series, or are you planning to
put them in a tree of yours?

cheers


Re: [PATCH v2 00/13] mpt3sas driver NVMe support:

2017-07-24 Thread Suganath Prabu Subramani
Is there any update on these patches ?

Thanks,
Suganath Prabu S

On Fri, Jul 14, 2017 at 6:52 PM, Suganath Prabu S
 wrote:
> Ventura Series controller are Tri-mode. The controller and
> firmware are capable of supporting NVMe devices and
> PCIe switches to be connected with the controller. This
> patch set adds driver level support for NVMe devices and
> PCIe switches.
>
> Suganath Prabu S (13):
>   mpt3sas: Update MPI Header
>   mpt3sas: Add nvme device support in slave alloc, target alloc and
> probe
>   mpt3sas: SGL to PRP Translation for I/Os to NVMe  devices
>   mpt3sas: Added support for nvme encapsulated request message.
>   mpt3sas: API 's to support NVMe drive addition to SML
>   mpt3sas: API's to remove nvme drive from sml
>   mpt3sas: Handle NVMe PCIe device related events generated
>from firmware.
>   mpt3sas: Set NVMe device queue depth as 128
>   mpt3sas: scan and add nvme device after controller reset
>   mpt3as: Add-Task-management-debug-info-for-NVMe-drives.
>   mpt3sas: NVMe drive support for BTDHMAPPING ioctl command and log
> info
>   mpt3sas: Fix nvme drives checking for tlr.
>   mpt3sas: Update mpt3sas driver version.
>
>  drivers/scsi/mpt3sas/mpi/mpi2.h  |   43 +-
>  drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h |  647 ++-
>  drivers/scsi/mpt3sas/mpi/mpi2_init.h |   11 +-
>  drivers/scsi/mpt3sas/mpi/mpi2_ioc.h  |  331 ++-
>  drivers/scsi/mpt3sas/mpi/mpi2_pci.h  |  142 +++
>  drivers/scsi/mpt3sas/mpi/mpi2_tool.h |   14 +-
>  drivers/scsi/mpt3sas/mpt3sas_base.c  |  710 +++-
>  drivers/scsi/mpt3sas/mpt3sas_base.h  |  171 +++-
>  drivers/scsi/mpt3sas/mpt3sas_config.c|  100 ++
>  drivers/scsi/mpt3sas/mpt3sas_ctl.c   |  158 ++-
>  drivers/scsi/mpt3sas/mpt3sas_scsih.c | 1874 
> --
>  drivers/scsi/mpt3sas/mpt3sas_warpdrive.c |2 +-
>  12 files changed, 4063 insertions(+), 140 deletions(-)
>  create mode 100644 drivers/scsi/mpt3sas/mpi/mpi2_pci.h
>
> Thanks,
> Suganath Prabu S


Re: [linux-next] cpus stalls detected few hours after booting next kernel

2017-07-24 Thread Abdul Haleem
On Fri, 2017-06-30 at 17:28 +1000, Nicholas Piggin wrote:
> On Fri, 30 Jun 2017 10:52:18 +0530
> Abdul Haleem  wrote:
> 
> > On Fri, 2017-06-30 at 00:45 +1000, Nicholas Piggin wrote:
> > > On Thu, 29 Jun 2017 20:23:05 +1000
> > > Nicholas Piggin  wrote:
> > > 
> > > > On Thu, 29 Jun 2017 19:36:14 +1000
> > > > Nicholas Piggin  wrote:
> > > 
> > > > > I don't *think* the replay-wakeup-interrupt patch is directly 
> > > > > involved, but
> > > > > it's likely to be one of the idle patches.  
> > > 
> > > Okay this turned out to be misconfigured sleep states I added for the
> > > simulator, sorry for the false alarm.
> > > 
> > > > Although you have this in the backtrace. I wonder if that's a stuck
> > > > lock in rcu_process_callbacks?
> > > 
> > > So this spinlock becomes top of the list of suspects. Can you try
> > > enabling lockdep and try to reproduce it?
> > 
> > Yes, recreated again with CONFIG_LOCKDEP=y & CONFIG_DEBUG_LOCKDEP=y set.
> > I do not see any difference in trace messages with and without LOCKDEP
> > enabled.
> > 
> > Please find the attached log file.
> 
> Can you get an rcu_invoke_callback event trace that Paul suggested?

Yes, I have collected the perf report.
> 
> Does this bug show up with just the powerpc next branch?

Now started seeing the call trace on mainline too (4.13.0-rc2)

> 
> Thanks,
> Nick
> 


-- 
Regard's

Abdul Haleem
IBM Linux Technology Centre





Re: [PATCH v2 00/13] mpt3sas driver NVMe support:

2017-07-24 Thread Suganath Prabu Subramani
Is there any update on these patches ?

Thanks,
Suganath Prabu S

On Fri, Jul 14, 2017 at 6:52 PM, Suganath Prabu S
 wrote:
> Ventura Series controller are Tri-mode. The controller and
> firmware are capable of supporting NVMe devices and
> PCIe switches to be connected with the controller. This
> patch set adds driver level support for NVMe devices and
> PCIe switches.
>
> Suganath Prabu S (13):
>   mpt3sas: Update MPI Header
>   mpt3sas: Add nvme device support in slave alloc, target alloc and
> probe
>   mpt3sas: SGL to PRP Translation for I/Os to NVMe  devices
>   mpt3sas: Added support for nvme encapsulated request message.
>   mpt3sas: API 's to support NVMe drive addition to SML
>   mpt3sas: API's to remove nvme drive from sml
>   mpt3sas: Handle NVMe PCIe device related events generated
>from firmware.
>   mpt3sas: Set NVMe device queue depth as 128
>   mpt3sas: scan and add nvme device after controller reset
>   mpt3as: Add-Task-management-debug-info-for-NVMe-drives.
>   mpt3sas: NVMe drive support for BTDHMAPPING ioctl command and log
> info
>   mpt3sas: Fix nvme drives checking for tlr.
>   mpt3sas: Update mpt3sas driver version.
>
>  drivers/scsi/mpt3sas/mpi/mpi2.h  |   43 +-
>  drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h |  647 ++-
>  drivers/scsi/mpt3sas/mpi/mpi2_init.h |   11 +-
>  drivers/scsi/mpt3sas/mpi/mpi2_ioc.h  |  331 ++-
>  drivers/scsi/mpt3sas/mpi/mpi2_pci.h  |  142 +++
>  drivers/scsi/mpt3sas/mpi/mpi2_tool.h |   14 +-
>  drivers/scsi/mpt3sas/mpt3sas_base.c  |  710 +++-
>  drivers/scsi/mpt3sas/mpt3sas_base.h  |  171 +++-
>  drivers/scsi/mpt3sas/mpt3sas_config.c|  100 ++
>  drivers/scsi/mpt3sas/mpt3sas_ctl.c   |  158 ++-
>  drivers/scsi/mpt3sas/mpt3sas_scsih.c | 1874 
> --
>  drivers/scsi/mpt3sas/mpt3sas_warpdrive.c |2 +-
>  12 files changed, 4063 insertions(+), 140 deletions(-)
>  create mode 100644 drivers/scsi/mpt3sas/mpi/mpi2_pci.h
>
> Thanks,
> Suganath Prabu S


Re: [linux-next] cpus stalls detected few hours after booting next kernel

2017-07-24 Thread Abdul Haleem
On Fri, 2017-06-30 at 17:28 +1000, Nicholas Piggin wrote:
> On Fri, 30 Jun 2017 10:52:18 +0530
> Abdul Haleem  wrote:
> 
> > On Fri, 2017-06-30 at 00:45 +1000, Nicholas Piggin wrote:
> > > On Thu, 29 Jun 2017 20:23:05 +1000
> > > Nicholas Piggin  wrote:
> > > 
> > > > On Thu, 29 Jun 2017 19:36:14 +1000
> > > > Nicholas Piggin  wrote:
> > > 
> > > > > I don't *think* the replay-wakeup-interrupt patch is directly 
> > > > > involved, but
> > > > > it's likely to be one of the idle patches.  
> > > 
> > > Okay this turned out to be misconfigured sleep states I added for the
> > > simulator, sorry for the false alarm.
> > > 
> > > > Although you have this in the backtrace. I wonder if that's a stuck
> > > > lock in rcu_process_callbacks?
> > > 
> > > So this spinlock becomes top of the list of suspects. Can you try
> > > enabling lockdep and try to reproduce it?
> > 
> > Yes, recreated again with CONFIG_LOCKDEP=y & CONFIG_DEBUG_LOCKDEP=y set.
> > I do not see any difference in trace messages with and without LOCKDEP
> > enabled.
> > 
> > Please find the attached log file.
> 
> Can you get an rcu_invoke_callback event trace that Paul suggested?

Yes, I have collected the perf report.
> 
> Does this bug show up with just the powerpc next branch?

Now started seeing the call trace on mainline too (4.13.0-rc2)

> 
> Thanks,
> Nick
> 


-- 
Regard's

Abdul Haleem
IBM Linux Technology Centre





Re: [Patch v5 12/12] Documention: v4l: Documentation for HEVC CIDs

2017-07-24 Thread Smitha T Murthy
On Thu, 2017-07-20 at 18:46 +0300, Stanimir Varbanov wrote:
> Hi,
> 
> >>> +
> >>> +* - ``V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN``
> >>> +  - Main profile.
> >>
> >> MAIN10?
> >>
> > No just MAIN.
> 
> I haven't because the MFC does not supported it?
> 
> If so, I think we have to add MAIN10 for completeness and because other
> drivers could have support for it.
> 
MFC supports Main and Main Still profile for encoder. Main, Main10, Main
Still for decoder. I will add both Main and Main10 in the next patch
series.
Thank you for the review.

Regards,
Smitha




Re: [Patch v5 12/12] Documention: v4l: Documentation for HEVC CIDs

2017-07-24 Thread Smitha T Murthy
On Thu, 2017-07-20 at 18:46 +0300, Stanimir Varbanov wrote:
> Hi,
> 
> >>> +
> >>> +* - ``V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN``
> >>> +  - Main profile.
> >>
> >> MAIN10?
> >>
> > No just MAIN.
> 
> I haven't because the MFC does not supported it?
> 
> If so, I think we have to add MAIN10 for completeness and because other
> drivers could have support for it.
> 
MFC supports Main and Main Still profile for encoder. Main, Main10, Main
Still for decoder. I will add both Main and Main10 in the next patch
series.
Thank you for the review.

Regards,
Smitha




[PATCH 0/4] ARM: sun9i: SMP bring-up with Multi-Cluster Power Management

2017-07-24 Thread Chen-Yu Tsai
Hi everyone,

This is a partial resend of my sun9i SMP support with MCPM series from
over two years ago [1]. Not much has changed since then. We've tried
to implement PSCI for both the A80 and A83T. Results were not promising.
The issue is that these two chips have a broken security extensions
implementation. If a specific bit is not burned in its e-fuse, most if
not all security protections don't work [2]. Even worse, non-secure
access to the GIC become secure. This requires a crazy workaround in
the GIC driver which probably doesn't work in all cases [3].

Nicolas mentioned that the MCPM framework is likely overkill in our
case [4]. However the framework does provide cluster/core state tracking
and proper sequencing of cache related operations. We could rework
the code to use standard smp_ops, but I would like to actually get
a working version in first.

Core and cluster power-down, aka hotplugging, is not included in this
series. Nicolas mentioned that a new optional callback should be added
in cases where the kernel has to do the actual power down [5]. This
will be done later on. Only patches 1 ~ 4 from the original RFC series
are resent.

Changes since RFC:

  - Have MACH_SUN9I imply MCPM, and have SUN9I_A80_MCPM default to
MACH_SUN9I. This means no defconfig changes are required.


Please have a look.

Regards
ChenYu

[1] http://www.spinics.net/lists/arm-kernel/msg418350.html
[2] https://lists.denx.de/pipermail/u-boot/2017-June/294637.html
[3] 
https://github.com/wens/linux/commit/c48654c1f737116e7a7660183c8c74fa91970528
[4] http://www.spinics.net/lists/arm-kernel/msg434160.html
[5] http://www.spinics.net/lists/arm-kernel/msg434408.html


Original cover letter from the old RFC series:

This is my attempt to support SMP and CPU hot plugging on the Allwinner
A80 SoC. The A80 is a big.Little processor with 2 clusters of 4x Cortex-A7
and 4x Cortex-A15 cores.

Much of the sunxi-specific MCPM code is derived from Allwinner code and
documentation, with some references to the other MCPM implementations,
as well as the Cortex's Technical Reference Manuals for the power
sequencing info.

One major difference compared to other platforms is we currently do not
have a standalone PMU or other embedded firmware to do the actually power
sequencing. All power/reset control is done by the kernel. As such, I
couldn't figure out where to put the code to power off the outbound
processor. I'm putting it in the .wait_for_powerdown() callback for now.
This does not get called by the big.Little switcher. But since we lack
cpufreq support at the moment, big.Little switcher is probably not going
to work anyway.

The code has been tested on my A80 Optimus, and reliably brings up all
cores. CPU hotplugging works as well. One issue I have is the processors
in cluster 0 do not stay in WFI after they are signaled to go offline.
I haven't tested the CCI-400 PMU bits yet.

I've done the best I could to fit the code into the new MCPM callbacks,
unlike the Allwinner code which uses the old .power_up()/.power_down()
ones. However my knowledge of ARM internals is limited, so it is quite
possible I got something wrong. Reviews are highly appreciated.

The actual work is split into 3 phases:

Patch 1 adds basic SMP bringup code using the common MCPM code.
No hotplugging is supported.

Patch 2 ~ 4 add the required DT device nodes.

Patch 5 adds support for hotplugging processor cores 1~7.

Patch 6 adds support for cpu0 hotplugging. The BROM checks a region
of secure SRAM for special flags. If they are set, execution is
diverted to the configured secondary startup address, just like it
would be for all the other processor cores.

Patch 7 adds the DT nodes for the secure SRAM.

Chen-Yu Tsai (4):
  ARM: sun9i: Support SMP on A80 with Multi-Cluster Power Management
(MCPM)
  ARM: dts: sun9i: Add CCI-400 device nodes for A80
  ARM: dts: sun9i: Add CPUCFG device node for A80 dtsi
  ARM: dts: sun9i: Add PRCM device node for the A80 dtsi

 arch/arm/boot/dts/sun9i-a80.dtsi |  56 ++
 arch/arm/mach-sunxi/Kconfig  |  10 +
 arch/arm/mach-sunxi/Makefile |   1 +
 arch/arm/mach-sunxi/mcpm.c   | 391 +++
 4 files changed, 458 insertions(+)
 create mode 100644 arch/arm/mach-sunxi/mcpm.c

-- 
2.13.3



[PATCH 0/4] ARM: sun9i: SMP bring-up with Multi-Cluster Power Management

2017-07-24 Thread Chen-Yu Tsai
Hi everyone,

This is a partial resend of my sun9i SMP support with MCPM series from
over two years ago [1]. Not much has changed since then. We've tried
to implement PSCI for both the A80 and A83T. Results were not promising.
The issue is that these two chips have a broken security extensions
implementation. If a specific bit is not burned in its e-fuse, most if
not all security protections don't work [2]. Even worse, non-secure
access to the GIC become secure. This requires a crazy workaround in
the GIC driver which probably doesn't work in all cases [3].

Nicolas mentioned that the MCPM framework is likely overkill in our
case [4]. However the framework does provide cluster/core state tracking
and proper sequencing of cache related operations. We could rework
the code to use standard smp_ops, but I would like to actually get
a working version in first.

Core and cluster power-down, aka hotplugging, is not included in this
series. Nicolas mentioned that a new optional callback should be added
in cases where the kernel has to do the actual power down [5]. This
will be done later on. Only patches 1 ~ 4 from the original RFC series
are resent.

Changes since RFC:

  - Have MACH_SUN9I imply MCPM, and have SUN9I_A80_MCPM default to
MACH_SUN9I. This means no defconfig changes are required.


Please have a look.

Regards
ChenYu

[1] http://www.spinics.net/lists/arm-kernel/msg418350.html
[2] https://lists.denx.de/pipermail/u-boot/2017-June/294637.html
[3] 
https://github.com/wens/linux/commit/c48654c1f737116e7a7660183c8c74fa91970528
[4] http://www.spinics.net/lists/arm-kernel/msg434160.html
[5] http://www.spinics.net/lists/arm-kernel/msg434408.html


Original cover letter from the old RFC series:

This is my attempt to support SMP and CPU hot plugging on the Allwinner
A80 SoC. The A80 is a big.Little processor with 2 clusters of 4x Cortex-A7
and 4x Cortex-A15 cores.

Much of the sunxi-specific MCPM code is derived from Allwinner code and
documentation, with some references to the other MCPM implementations,
as well as the Cortex's Technical Reference Manuals for the power
sequencing info.

One major difference compared to other platforms is we currently do not
have a standalone PMU or other embedded firmware to do the actually power
sequencing. All power/reset control is done by the kernel. As such, I
couldn't figure out where to put the code to power off the outbound
processor. I'm putting it in the .wait_for_powerdown() callback for now.
This does not get called by the big.Little switcher. But since we lack
cpufreq support at the moment, big.Little switcher is probably not going
to work anyway.

The code has been tested on my A80 Optimus, and reliably brings up all
cores. CPU hotplugging works as well. One issue I have is the processors
in cluster 0 do not stay in WFI after they are signaled to go offline.
I haven't tested the CCI-400 PMU bits yet.

I've done the best I could to fit the code into the new MCPM callbacks,
unlike the Allwinner code which uses the old .power_up()/.power_down()
ones. However my knowledge of ARM internals is limited, so it is quite
possible I got something wrong. Reviews are highly appreciated.

The actual work is split into 3 phases:

Patch 1 adds basic SMP bringup code using the common MCPM code.
No hotplugging is supported.

Patch 2 ~ 4 add the required DT device nodes.

Patch 5 adds support for hotplugging processor cores 1~7.

Patch 6 adds support for cpu0 hotplugging. The BROM checks a region
of secure SRAM for special flags. If they are set, execution is
diverted to the configured secondary startup address, just like it
would be for all the other processor cores.

Patch 7 adds the DT nodes for the secure SRAM.

Chen-Yu Tsai (4):
  ARM: sun9i: Support SMP on A80 with Multi-Cluster Power Management
(MCPM)
  ARM: dts: sun9i: Add CCI-400 device nodes for A80
  ARM: dts: sun9i: Add CPUCFG device node for A80 dtsi
  ARM: dts: sun9i: Add PRCM device node for the A80 dtsi

 arch/arm/boot/dts/sun9i-a80.dtsi |  56 ++
 arch/arm/mach-sunxi/Kconfig  |  10 +
 arch/arm/mach-sunxi/Makefile |   1 +
 arch/arm/mach-sunxi/mcpm.c   | 391 +++
 4 files changed, 458 insertions(+)
 create mode 100644 arch/arm/mach-sunxi/mcpm.c

-- 
2.13.3



[PATCH 4/4] ARM: dts: sun9i: Add PRCM device node for the A80 dtsi

2017-07-24 Thread Chen-Yu Tsai
The PRCM is a collection of clock controls, reset controls, and various
power switches/gates. Some of these can be independently listed and
supported, while a number of CPU related ones are used in tandem with
CPUCFG for SMP bringup and CPU hotplugging.

Signed-off-by: Chen-Yu Tsai 
---
 arch/arm/boot/dts/sun9i-a80.dtsi | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi
index cc5db467f616..cadf3a5e6997 100644
--- a/arch/arm/boot/dts/sun9i-a80.dtsi
+++ b/arch/arm/boot/dts/sun9i-a80.dtsi
@@ -714,6 +714,11 @@
interrupts = ;
};
 
+   prcm@08001400 {
+   compatible = "allwinner,sun9i-a80-prcm";
+   reg = <0x08001400 0x200>;
+   };
+
apbs_rst: reset@080014b0 {
reg = <0x080014b0 0x4>;
compatible = "allwinner,sun6i-a31-clock-reset";
-- 
2.13.3



[PATCH 4/4] ARM: dts: sun9i: Add PRCM device node for the A80 dtsi

2017-07-24 Thread Chen-Yu Tsai
The PRCM is a collection of clock controls, reset controls, and various
power switches/gates. Some of these can be independently listed and
supported, while a number of CPU related ones are used in tandem with
CPUCFG for SMP bringup and CPU hotplugging.

Signed-off-by: Chen-Yu Tsai 
---
 arch/arm/boot/dts/sun9i-a80.dtsi | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi
index cc5db467f616..cadf3a5e6997 100644
--- a/arch/arm/boot/dts/sun9i-a80.dtsi
+++ b/arch/arm/boot/dts/sun9i-a80.dtsi
@@ -714,6 +714,11 @@
interrupts = ;
};
 
+   prcm@08001400 {
+   compatible = "allwinner,sun9i-a80-prcm";
+   reg = <0x08001400 0x200>;
+   };
+
apbs_rst: reset@080014b0 {
reg = <0x080014b0 0x4>;
compatible = "allwinner,sun6i-a31-clock-reset";
-- 
2.13.3



[PATCH 3/4] ARM: dts: sun9i: Add CPUCFG device node for A80 dtsi

2017-07-24 Thread Chen-Yu Tsai
CPUCFG is a collection of registers that are mapped to the SoC's signals
from each individual processor core and associated peripherals, such as
resets for processors, L1/L2 cache and other things.

These registers are used for SMP bringup and CPU hotplugging.

Signed-off-by: Chen-Yu Tsai 
---
 arch/arm/boot/dts/sun9i-a80.dtsi | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi
index fc179b8ab038..cc5db467f616 100644
--- a/arch/arm/boot/dts/sun9i-a80.dtsi
+++ b/arch/arm/boot/dts/sun9i-a80.dtsi
@@ -368,6 +368,11 @@
#reset-cells = <1>;
};
 
+   cpucfg@0170 {
+   compatible = "allwinner,sun9i-a80-cpucfg";
+   reg = <0x0170 0x100>;
+   };
+
mmc0: mmc@01c0f000 {
compatible = "allwinner,sun9i-a80-mmc";
reg = <0x01c0f000 0x1000>;
-- 
2.13.3



[PATCH 1/4] ARM: sun9i: Support SMP on A80 with Multi-Cluster Power Management (MCPM)

2017-07-24 Thread Chen-Yu Tsai
The A80 is a big.LITTLE SoC with 1 cluster of 4 Cortex-A7s and
1 cluster of 4 Cortex-A15s.

This patch adds support to bring up the second cluster and thus all
cores using the common MCPM code. Core/cluster power down has not
been implemented, thus CPU hotplugging and big.LITTLE switcher is
not supported.

Signed-off-by: Chen-Yu Tsai 
---
 arch/arm/mach-sunxi/Kconfig  |  10 ++
 arch/arm/mach-sunxi/Makefile |   1 +
 arch/arm/mach-sunxi/mcpm.c   | 391 +++
 3 files changed, 402 insertions(+)
 create mode 100644 arch/arm/mach-sunxi/mcpm.c

diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig
index 58153cdf025b..177380548d99 100644
--- a/arch/arm/mach-sunxi/Kconfig
+++ b/arch/arm/mach-sunxi/Kconfig
@@ -47,5 +47,15 @@ config MACH_SUN9I
bool "Allwinner (sun9i) SoCs support"
default ARCH_SUNXI
select ARM_GIC
+   imply MCPM
+
+config SUN9I_A80_MCPM
+   bool "Allwinner A80 Multi-Cluster PM support"
+   depends on MCPM && MACH_SUN9I
+   default MACH_SUN9I
+   select ARM_CCI400_PORT_CTRL
+   help
+ This is needed to provide CPU and cluster power management
+ on Allwinner A80 implementing big.LITTLE.
 
 endif
diff --git a/arch/arm/mach-sunxi/Makefile b/arch/arm/mach-sunxi/Makefile
index 27b168f121a1..e8558912c714 100644
--- a/arch/arm/mach-sunxi/Makefile
+++ b/arch/arm/mach-sunxi/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_ARCH_SUNXI) += sunxi.o
 obj-$(CONFIG_SMP) += platsmp.o
+obj-$(CONFIG_SUN9I_A80_MCPM) += mcpm.o
diff --git a/arch/arm/mach-sunxi/mcpm.c b/arch/arm/mach-sunxi/mcpm.c
new file mode 100644
index ..4b6e1d6ae379
--- /dev/null
+++ b/arch/arm/mach-sunxi/mcpm.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) 2015 Chen-Yu Tsai
+ *
+ * Chen-Yu Tsai 
+ *
+ * arch/arm/mach-sunxi/mcpm.c
+ *
+ * Based on arch/arm/mach-exynos/mcpm-exynos.c and Allwinner code
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#define SUNXI_CPUS_PER_CLUSTER 4
+#define SUNXI_NR_CLUSTERS  2
+
+#define SUN9I_A80_A15_CLUSTER  1
+
+#define CPUCFG_CX_CTRL_REG0(c) (0x10 * (c))
+#define CPUCFG_CX_CTRL_REG0_L1_RST_DISABLE(n)  BIT(n)
+#define CPUCFG_CX_CTRL_REG0_L1_RST_DISABLE_ALL 0xf
+#define CPUCFG_CX_CTRL_REG0_L2_RST_DISABLE_A7  BIT(4)
+#define CPUCFG_CX_CTRL_REG0_L2_RST_DISABLE_A15 BIT(0)
+#define CPUCFG_CX_CTRL_REG1(c) (0x10 * (c) + 0x4)
+#define CPUCFG_CX_CTRL_REG1_ACINACTM   BIT(0)
+#define CPUCFG_CX_RST_CTRL(c)  (0x80 + 0x4 * (c))
+#define CPUCFG_CX_RST_CTRL_DBG_SOC_RST BIT(24)
+#define CPUCFG_CX_RST_CTRL_ETM_RST(n)  BIT(20 + (n))
+#define CPUCFG_CX_RST_CTRL_ETM_RST_ALL (0xf << 20)
+#define CPUCFG_CX_RST_CTRL_DBG_RST(n)  BIT(16 + (n))
+#define CPUCFG_CX_RST_CTRL_DBG_RST_ALL (0xf << 16)
+#define CPUCFG_CX_RST_CTRL_H_RST   BIT(12)
+#define CPUCFG_CX_RST_CTRL_L2_RST  BIT(8)
+#define CPUCFG_CX_RST_CTRL_CX_RST(n)   BIT(4 + (n))
+#define CPUCFG_CX_RST_CTRL_CORE_RST(n) BIT(n)
+
+#define PRCM_CPU_PO_RST_CTRL(c)(0x4 + 0x4 * (c))
+#define PRCM_CPU_PO_RST_CTRL_CORE(n)   BIT(n)
+#define PRCM_CPU_PO_RST_CTRL_CORE_ALL  0xf
+#define PRCM_PWROFF_GATING_REG(c)  (0x100 + 0x4 * (c))
+#define PRCM_PWROFF_GATING_REG_CLUSTER BIT(4)
+#define PRCM_PWROFF_GATING_REG_CORE(n) BIT(n)
+#define PRCM_PWR_SWITCH_REG(c, cpu)(0x140 + 0x10 * (c) + 0x4 * (cpu))
+#define PRCM_CPU_SOFT_ENTRY_REG0x164
+
+static void __iomem *cpucfg_base;
+static void __iomem *prcm_base;
+
+static int sunxi_cpu_power_switch_set(unsigned int cpu, unsigned int cluster,
+ bool enable)
+{
+   u32 reg;
+
+   /* control sequence from Allwinner A80 user manual v1.2 PRCM section */
+   reg = readl(prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   if (enable) {
+   if (reg == 0x00) {
+   pr_debug("power clamp for cluster %u cpu %u already 
open\n",
+cluster, cpu);
+   return 0;
+   }
+
+   writel(0xff, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   udelay(10);
+   writel(0xfe, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   udelay(10);
+   writel(0xf8, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   udelay(10);
+   writel(0xf0, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   udelay(10);
+   writel(0x00, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   udelay(10);
+   } else {
+   writel(0xff, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   udelay(10);
+   }
+
+   return 0;
+}
+
+static int 

[PATCH 3/4] ARM: dts: sun9i: Add CPUCFG device node for A80 dtsi

2017-07-24 Thread Chen-Yu Tsai
CPUCFG is a collection of registers that are mapped to the SoC's signals
from each individual processor core and associated peripherals, such as
resets for processors, L1/L2 cache and other things.

These registers are used for SMP bringup and CPU hotplugging.

Signed-off-by: Chen-Yu Tsai 
---
 arch/arm/boot/dts/sun9i-a80.dtsi | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi
index fc179b8ab038..cc5db467f616 100644
--- a/arch/arm/boot/dts/sun9i-a80.dtsi
+++ b/arch/arm/boot/dts/sun9i-a80.dtsi
@@ -368,6 +368,11 @@
#reset-cells = <1>;
};
 
+   cpucfg@0170 {
+   compatible = "allwinner,sun9i-a80-cpucfg";
+   reg = <0x0170 0x100>;
+   };
+
mmc0: mmc@01c0f000 {
compatible = "allwinner,sun9i-a80-mmc";
reg = <0x01c0f000 0x1000>;
-- 
2.13.3



[PATCH 1/4] ARM: sun9i: Support SMP on A80 with Multi-Cluster Power Management (MCPM)

2017-07-24 Thread Chen-Yu Tsai
The A80 is a big.LITTLE SoC with 1 cluster of 4 Cortex-A7s and
1 cluster of 4 Cortex-A15s.

This patch adds support to bring up the second cluster and thus all
cores using the common MCPM code. Core/cluster power down has not
been implemented, thus CPU hotplugging and big.LITTLE switcher is
not supported.

Signed-off-by: Chen-Yu Tsai 
---
 arch/arm/mach-sunxi/Kconfig  |  10 ++
 arch/arm/mach-sunxi/Makefile |   1 +
 arch/arm/mach-sunxi/mcpm.c   | 391 +++
 3 files changed, 402 insertions(+)
 create mode 100644 arch/arm/mach-sunxi/mcpm.c

diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig
index 58153cdf025b..177380548d99 100644
--- a/arch/arm/mach-sunxi/Kconfig
+++ b/arch/arm/mach-sunxi/Kconfig
@@ -47,5 +47,15 @@ config MACH_SUN9I
bool "Allwinner (sun9i) SoCs support"
default ARCH_SUNXI
select ARM_GIC
+   imply MCPM
+
+config SUN9I_A80_MCPM
+   bool "Allwinner A80 Multi-Cluster PM support"
+   depends on MCPM && MACH_SUN9I
+   default MACH_SUN9I
+   select ARM_CCI400_PORT_CTRL
+   help
+ This is needed to provide CPU and cluster power management
+ on Allwinner A80 implementing big.LITTLE.
 
 endif
diff --git a/arch/arm/mach-sunxi/Makefile b/arch/arm/mach-sunxi/Makefile
index 27b168f121a1..e8558912c714 100644
--- a/arch/arm/mach-sunxi/Makefile
+++ b/arch/arm/mach-sunxi/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_ARCH_SUNXI) += sunxi.o
 obj-$(CONFIG_SMP) += platsmp.o
+obj-$(CONFIG_SUN9I_A80_MCPM) += mcpm.o
diff --git a/arch/arm/mach-sunxi/mcpm.c b/arch/arm/mach-sunxi/mcpm.c
new file mode 100644
index ..4b6e1d6ae379
--- /dev/null
+++ b/arch/arm/mach-sunxi/mcpm.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) 2015 Chen-Yu Tsai
+ *
+ * Chen-Yu Tsai 
+ *
+ * arch/arm/mach-sunxi/mcpm.c
+ *
+ * Based on arch/arm/mach-exynos/mcpm-exynos.c and Allwinner code
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#define SUNXI_CPUS_PER_CLUSTER 4
+#define SUNXI_NR_CLUSTERS  2
+
+#define SUN9I_A80_A15_CLUSTER  1
+
+#define CPUCFG_CX_CTRL_REG0(c) (0x10 * (c))
+#define CPUCFG_CX_CTRL_REG0_L1_RST_DISABLE(n)  BIT(n)
+#define CPUCFG_CX_CTRL_REG0_L1_RST_DISABLE_ALL 0xf
+#define CPUCFG_CX_CTRL_REG0_L2_RST_DISABLE_A7  BIT(4)
+#define CPUCFG_CX_CTRL_REG0_L2_RST_DISABLE_A15 BIT(0)
+#define CPUCFG_CX_CTRL_REG1(c) (0x10 * (c) + 0x4)
+#define CPUCFG_CX_CTRL_REG1_ACINACTM   BIT(0)
+#define CPUCFG_CX_RST_CTRL(c)  (0x80 + 0x4 * (c))
+#define CPUCFG_CX_RST_CTRL_DBG_SOC_RST BIT(24)
+#define CPUCFG_CX_RST_CTRL_ETM_RST(n)  BIT(20 + (n))
+#define CPUCFG_CX_RST_CTRL_ETM_RST_ALL (0xf << 20)
+#define CPUCFG_CX_RST_CTRL_DBG_RST(n)  BIT(16 + (n))
+#define CPUCFG_CX_RST_CTRL_DBG_RST_ALL (0xf << 16)
+#define CPUCFG_CX_RST_CTRL_H_RST   BIT(12)
+#define CPUCFG_CX_RST_CTRL_L2_RST  BIT(8)
+#define CPUCFG_CX_RST_CTRL_CX_RST(n)   BIT(4 + (n))
+#define CPUCFG_CX_RST_CTRL_CORE_RST(n) BIT(n)
+
+#define PRCM_CPU_PO_RST_CTRL(c)(0x4 + 0x4 * (c))
+#define PRCM_CPU_PO_RST_CTRL_CORE(n)   BIT(n)
+#define PRCM_CPU_PO_RST_CTRL_CORE_ALL  0xf
+#define PRCM_PWROFF_GATING_REG(c)  (0x100 + 0x4 * (c))
+#define PRCM_PWROFF_GATING_REG_CLUSTER BIT(4)
+#define PRCM_PWROFF_GATING_REG_CORE(n) BIT(n)
+#define PRCM_PWR_SWITCH_REG(c, cpu)(0x140 + 0x10 * (c) + 0x4 * (cpu))
+#define PRCM_CPU_SOFT_ENTRY_REG0x164
+
+static void __iomem *cpucfg_base;
+static void __iomem *prcm_base;
+
+static int sunxi_cpu_power_switch_set(unsigned int cpu, unsigned int cluster,
+ bool enable)
+{
+   u32 reg;
+
+   /* control sequence from Allwinner A80 user manual v1.2 PRCM section */
+   reg = readl(prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   if (enable) {
+   if (reg == 0x00) {
+   pr_debug("power clamp for cluster %u cpu %u already 
open\n",
+cluster, cpu);
+   return 0;
+   }
+
+   writel(0xff, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   udelay(10);
+   writel(0xfe, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   udelay(10);
+   writel(0xf8, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   udelay(10);
+   writel(0xf0, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   udelay(10);
+   writel(0x00, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   udelay(10);
+   } else {
+   writel(0xff, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+   udelay(10);
+   }
+
+   return 0;
+}
+
+static int sunxi_cpu_powerup(unsigned int cpu, 

[PATCH 2/4] ARM: dts: sun9i: Add CCI-400 device nodes for A80

2017-07-24 Thread Chen-Yu Tsai
The A80 includes an ARM CCI-400 interconnect to support multi-cluster
CPU caches.

Also add the maximum clock frequency for the CPUs, as listed in the
A80 Optimus Board FEX file.

Signed-off-by: Chen-Yu Tsai 
---
 arch/arm/boot/dts/sun9i-a80.dtsi | 46 
 1 file changed, 46 insertions(+)

diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi
index 759a72317eb8..fc179b8ab038 100644
--- a/arch/arm/boot/dts/sun9i-a80.dtsi
+++ b/arch/arm/boot/dts/sun9i-a80.dtsi
@@ -63,48 +63,64 @@
cpu0: cpu@0 {
compatible = "arm,cortex-a7";
device_type = "cpu";
+   cci-control-port = <_control0>;
+   clock-frequency = <1200>;
reg = <0x0>;
};
 
cpu1: cpu@1 {
compatible = "arm,cortex-a7";
device_type = "cpu";
+   cci-control-port = <_control0>;
+   clock-frequency = <1200>;
reg = <0x1>;
};
 
cpu2: cpu@2 {
compatible = "arm,cortex-a7";
device_type = "cpu";
+   cci-control-port = <_control0>;
+   clock-frequency = <1200>;
reg = <0x2>;
};
 
cpu3: cpu@3 {
compatible = "arm,cortex-a7";
device_type = "cpu";
+   cci-control-port = <_control0>;
+   clock-frequency = <1200>;
reg = <0x3>;
};
 
cpu4: cpu@100 {
compatible = "arm,cortex-a15";
device_type = "cpu";
+   cci-control-port = <_control1>;
+   clock-frequency = <1800>;
reg = <0x100>;
};
 
cpu5: cpu@101 {
compatible = "arm,cortex-a15";
device_type = "cpu";
+   cci-control-port = <_control1>;
+   clock-frequency = <1800>;
reg = <0x101>;
};
 
cpu6: cpu@102 {
compatible = "arm,cortex-a15";
device_type = "cpu";
+   cci-control-port = <_control1>;
+   clock-frequency = <1800>;
reg = <0x102>;
};
 
cpu7: cpu@103 {
compatible = "arm,cortex-a15";
device_type = "cpu";
+   cci-control-port = <_control1>;
+   clock-frequency = <1800>;
reg = <0x103>;
};
};
@@ -436,6 +452,36 @@
interrupts = ;
};
 
+   cci: cci@01c9 {
+   compatible = "arm,cci-400";
+   #address-cells = <1>;
+   #size-cells = <1>;
+   reg = <0x01c9 0x1000>;
+   ranges = <0x0 0x01c9 0x1>;
+
+   cci_control0: slave-if@4000 {
+   compatible = "arm,cci-400-ctrl-if";
+   interface-type = "ace";
+   reg = <0x4000 0x1000>;
+   };
+
+   cci_control1: slave-if@5000 {
+   compatible = "arm,cci-400-ctrl-if";
+   interface-type = "ace";
+   reg = <0x5000 0x1000>;
+   };
+
+   pmu@9000 {
+compatible = "arm,cci-400-pmu,r1";
+reg = <0x9000 0x5000>;
+interrupts = ,
+ ,
+ ,
+ ,
+ ;
+   };
+   };
+
de_clocks: clock@0300 {
compatible = "allwinner,sun9i-a80-de-clks";
reg = <0x0300 0x30>;
-- 
2.13.3



[PATCH 2/4] ARM: dts: sun9i: Add CCI-400 device nodes for A80

2017-07-24 Thread Chen-Yu Tsai
The A80 includes an ARM CCI-400 interconnect to support multi-cluster
CPU caches.

Also add the maximum clock frequency for the CPUs, as listed in the
A80 Optimus Board FEX file.

Signed-off-by: Chen-Yu Tsai 
---
 arch/arm/boot/dts/sun9i-a80.dtsi | 46 
 1 file changed, 46 insertions(+)

diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi
index 759a72317eb8..fc179b8ab038 100644
--- a/arch/arm/boot/dts/sun9i-a80.dtsi
+++ b/arch/arm/boot/dts/sun9i-a80.dtsi
@@ -63,48 +63,64 @@
cpu0: cpu@0 {
compatible = "arm,cortex-a7";
device_type = "cpu";
+   cci-control-port = <_control0>;
+   clock-frequency = <1200>;
reg = <0x0>;
};
 
cpu1: cpu@1 {
compatible = "arm,cortex-a7";
device_type = "cpu";
+   cci-control-port = <_control0>;
+   clock-frequency = <1200>;
reg = <0x1>;
};
 
cpu2: cpu@2 {
compatible = "arm,cortex-a7";
device_type = "cpu";
+   cci-control-port = <_control0>;
+   clock-frequency = <1200>;
reg = <0x2>;
};
 
cpu3: cpu@3 {
compatible = "arm,cortex-a7";
device_type = "cpu";
+   cci-control-port = <_control0>;
+   clock-frequency = <1200>;
reg = <0x3>;
};
 
cpu4: cpu@100 {
compatible = "arm,cortex-a15";
device_type = "cpu";
+   cci-control-port = <_control1>;
+   clock-frequency = <1800>;
reg = <0x100>;
};
 
cpu5: cpu@101 {
compatible = "arm,cortex-a15";
device_type = "cpu";
+   cci-control-port = <_control1>;
+   clock-frequency = <1800>;
reg = <0x101>;
};
 
cpu6: cpu@102 {
compatible = "arm,cortex-a15";
device_type = "cpu";
+   cci-control-port = <_control1>;
+   clock-frequency = <1800>;
reg = <0x102>;
};
 
cpu7: cpu@103 {
compatible = "arm,cortex-a15";
device_type = "cpu";
+   cci-control-port = <_control1>;
+   clock-frequency = <1800>;
reg = <0x103>;
};
};
@@ -436,6 +452,36 @@
interrupts = ;
};
 
+   cci: cci@01c9 {
+   compatible = "arm,cci-400";
+   #address-cells = <1>;
+   #size-cells = <1>;
+   reg = <0x01c9 0x1000>;
+   ranges = <0x0 0x01c9 0x1>;
+
+   cci_control0: slave-if@4000 {
+   compatible = "arm,cci-400-ctrl-if";
+   interface-type = "ace";
+   reg = <0x4000 0x1000>;
+   };
+
+   cci_control1: slave-if@5000 {
+   compatible = "arm,cci-400-ctrl-if";
+   interface-type = "ace";
+   reg = <0x5000 0x1000>;
+   };
+
+   pmu@9000 {
+compatible = "arm,cci-400-pmu,r1";
+reg = <0x9000 0x5000>;
+interrupts = ,
+ ,
+ ,
+ ,
+ ;
+   };
+   };
+
de_clocks: clock@0300 {
compatible = "allwinner,sun9i-a80-de-clks";
reg = <0x0300 0x30>;
-- 
2.13.3



Re: [Patch v5 12/12] Documention: v4l: Documentation for HEVC CIDs

2017-07-24 Thread Smitha T Murthy
On Thu, 2017-07-20 at 16:50 +0200, Hans Verkuil wrote:
> On 19/06/17 07:10, Smitha T Murthy wrote:
> > Added V4l2 controls for HEVC encoder
> > 
> > Signed-off-by: Smitha T Murthy 
> > ---
> >  Documentation/media/uapi/v4l/extended-controls.rst | 364 
> > +
> >  1 file changed, 364 insertions(+)
> > 
> > diff --git a/Documentation/media/uapi/v4l/extended-controls.rst 
> > b/Documentation/media/uapi/v4l/extended-controls.rst
> > index abb1057..7767c70 100644
> > --- a/Documentation/media/uapi/v4l/extended-controls.rst
> > +++ b/Documentation/media/uapi/v4l/extended-controls.rst
> > @@ -1960,6 +1960,370 @@ enum v4l2_vp8_golden_frame_sel -
> >  1, 2 and 3 corresponding to encoder profiles 0, 1, 2 and 3.
> >  
> >  
> > +High Efficiency Video Coding (HEVC/H.265) Control Reference
> > +---
> > +
> > +The HEVC/H.265 controls include controls for encoding parameters of 
> > HEVC/H.265
> > +video codec.
> > +
> > +
> > +.. _hevc-control-id:
> > +
> > +HEVC/H.265 Control IDs
> > +^^
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_MIN_QP (integer)``
> > +Minimum quantization parameter for HEVC.
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_MAX_QP (integer)``
> > +Maximum quantization parameter for HEVC.
> 
> It's a bit ambiguous. Are these supposed to be read-only parameters?
> Normally min-max is already implied in the control range, so this is a
> bit odd. Perhaps it is clear for people who know HEVC, but I'm not
> quite sure what to make of it.
> 
These controls are used to set the QP bound for encoding.
This control is present for all other codecs as well.

> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_QP (integer)``
> > +Quantization parameter for an I frame for HEVC.
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_P_FRAME_QP (integer)``
> > +Quantization parameter for a P frame for HEVC.
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_B_FRAME_QP (integer)``
> > +Quantization parameter for a B frame for HEVC.
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_HIER_QP (boolean)``
> > +HIERARCHICAL_QP allows host to specify the quantization parameter 
> > values
> > +for each temporal layer through HIERARCHICAL_QP_LAYER. This is valid 
> > only
> > +if HIERARCHICAL_CODING_LAYER is greater than 1. Setting the control 
> > value
> > +to 1 enables setting of the QP values for the layers.
> > +
> > +.. _v4l2-hevc-hier-coding-type:
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_TYPE``
> > +(enum)
> > +
> > +enum v4l2_mpeg_video_hevc_hier_coding_type -
> > +Selects the hierarchical coding type for encoding. Possible values are:
> > +
> > +.. raw:: latex
> > +
> > +\begin{adjustbox}{width=\columnwidth}
> > +
> > +.. tabularcolumns:: |p{11.0cm}|p{10.0cm}|
> > +
> > +.. flat-table::
> > +:header-rows:  0
> > +:stub-columns: 0
> > +
> > +* - ``V4L2_MPEG_VIDEO_HEVC_HIERARCHICAL_CODING_B``
> > +  - Use the B frame for hierarchical coding.
> > +* - ``V4L2_MPEG_VIDEO_HEVC_HIERARCHICAL_CODING_P``
> > +  - Use the P frame for hierarchical coding.
> > +
> > +.. raw:: latex
> > +
> > +\end{adjustbox}
> > +
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_LAYER (integer)``
> > +Selects the hierarchical coding layer. In normal encoding
> > +(non-hierarchial coding), it should be zero. Possible values are 0 ~ 6.
> > +0 indicates HIERARCHICAL CODING LAYER 0, 1 indicates HIERARCHICAL 
> > CODING
> > +LAYER 1 and so on.
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_LAYER_QP (integer)``
> > +Indicates the hierarchical coding layer quantization parameter.
> > +For HEVC it can have a value of 0-51. Hence in the control value passed
> > +the LSB 16 bits will indicate the quantization parameter. The MSB 16 
> > bit
> > +will pass the layer(0-6) it is meant for.
> 
> This is ugly. Why not make this an array control? This really is an array of
> 7 values, right? An alternative is to split this in 7 controls just as you did
> with V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L?_BR.
> 
> The way it is now doesn't work either since 
> G_CTRL(V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_LAYER_QP)
> would just return the QP for whatever was the last layer you set it for and 
> you can't
> query it for another layer.
> 
Ok I will add this as an array control.

> > +
> > +.. _v4l2-hevc-profile:
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_PROFILE``
> > +(enum)
> > +
> > +enum v4l2_mpeg_video_hevc_profile -
> > +Select the desired profile for HEVC encoder.
> > +
> > +.. raw:: latex
> > +
> > +\begin{adjustbox}{width=\columnwidth}
> > +
> > +.. tabularcolumns:: |p{11.0cm}|p{10.0cm}|
> > +
> > +.. flat-table::
> > +:header-rows:  0
> > +:stub-columns: 0
> > +
> > +* - ``V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN``
> > +  - Main profile.
> > +* - ``V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_STILL_PICTURE``
> > +  - Main still picture profile.
> > +
> > +.. raw:: latex
> > +

[PATCH 1/1] usb: xhci: Handle USB transaction error on address command

2017-07-24 Thread Lu Baolu
Xhci driver handles USB transaction errors on transfer events,
but transaction errors are possible on address device command
completion events as well.

The xHCI specification (section 4.6.5) says: A USB Transaction
Error Completion Code for an Address Device Command may be due
to a Stall response from a device. Software should issue a Disable
Slot Command for the Device Slot then an Enable Slot Command to
recover from this error.

This patch handles USB transaction errors on address command
completion events. The related discussion threads can be found
through below links.

http://marc.info/?l=linux-usb=149362010728921=2
http://marc.info/?l=linux-usb=149252752825755=2

Suggested-by: Mathias Nyman 
Signed-off-by: Lu Baolu 
---
 drivers/usb/host/xhci.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index b2ff1ff..9cc56cd 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3836,6 +3836,12 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct 
usb_device *udev,
ret = -EINVAL;
break;
case COMP_USB_TRANSACTION_ERROR:
+   xhci_free_virt_device(xhci, udev->slot_id);
+   ret = xhci_disable_slot(xhci, command, udev->slot_id);
+   udev->slot_id = 0;
+   if (!ret)
+   xhci_alloc_dev(hcd, udev);
+
dev_warn(>dev, "Device not responding to setup %s.\n", 
act);
ret = -EPROTO;
break;
-- 
2.7.4



Re: [Patch v5 12/12] Documention: v4l: Documentation for HEVC CIDs

2017-07-24 Thread Smitha T Murthy
On Thu, 2017-07-20 at 16:50 +0200, Hans Verkuil wrote:
> On 19/06/17 07:10, Smitha T Murthy wrote:
> > Added V4l2 controls for HEVC encoder
> > 
> > Signed-off-by: Smitha T Murthy 
> > ---
> >  Documentation/media/uapi/v4l/extended-controls.rst | 364 
> > +
> >  1 file changed, 364 insertions(+)
> > 
> > diff --git a/Documentation/media/uapi/v4l/extended-controls.rst 
> > b/Documentation/media/uapi/v4l/extended-controls.rst
> > index abb1057..7767c70 100644
> > --- a/Documentation/media/uapi/v4l/extended-controls.rst
> > +++ b/Documentation/media/uapi/v4l/extended-controls.rst
> > @@ -1960,6 +1960,370 @@ enum v4l2_vp8_golden_frame_sel -
> >  1, 2 and 3 corresponding to encoder profiles 0, 1, 2 and 3.
> >  
> >  
> > +High Efficiency Video Coding (HEVC/H.265) Control Reference
> > +---
> > +
> > +The HEVC/H.265 controls include controls for encoding parameters of 
> > HEVC/H.265
> > +video codec.
> > +
> > +
> > +.. _hevc-control-id:
> > +
> > +HEVC/H.265 Control IDs
> > +^^
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_MIN_QP (integer)``
> > +Minimum quantization parameter for HEVC.
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_MAX_QP (integer)``
> > +Maximum quantization parameter for HEVC.
> 
> It's a bit ambiguous. Are these supposed to be read-only parameters?
> Normally min-max is already implied in the control range, so this is a
> bit odd. Perhaps it is clear for people who know HEVC, but I'm not
> quite sure what to make of it.
> 
These controls are used to set the QP bound for encoding.
This control is present for all other codecs as well.

> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_QP (integer)``
> > +Quantization parameter for an I frame for HEVC.
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_P_FRAME_QP (integer)``
> > +Quantization parameter for a P frame for HEVC.
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_B_FRAME_QP (integer)``
> > +Quantization parameter for a B frame for HEVC.
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_HIER_QP (boolean)``
> > +HIERARCHICAL_QP allows host to specify the quantization parameter 
> > values
> > +for each temporal layer through HIERARCHICAL_QP_LAYER. This is valid 
> > only
> > +if HIERARCHICAL_CODING_LAYER is greater than 1. Setting the control 
> > value
> > +to 1 enables setting of the QP values for the layers.
> > +
> > +.. _v4l2-hevc-hier-coding-type:
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_TYPE``
> > +(enum)
> > +
> > +enum v4l2_mpeg_video_hevc_hier_coding_type -
> > +Selects the hierarchical coding type for encoding. Possible values are:
> > +
> > +.. raw:: latex
> > +
> > +\begin{adjustbox}{width=\columnwidth}
> > +
> > +.. tabularcolumns:: |p{11.0cm}|p{10.0cm}|
> > +
> > +.. flat-table::
> > +:header-rows:  0
> > +:stub-columns: 0
> > +
> > +* - ``V4L2_MPEG_VIDEO_HEVC_HIERARCHICAL_CODING_B``
> > +  - Use the B frame for hierarchical coding.
> > +* - ``V4L2_MPEG_VIDEO_HEVC_HIERARCHICAL_CODING_P``
> > +  - Use the P frame for hierarchical coding.
> > +
> > +.. raw:: latex
> > +
> > +\end{adjustbox}
> > +
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_LAYER (integer)``
> > +Selects the hierarchical coding layer. In normal encoding
> > +(non-hierarchial coding), it should be zero. Possible values are 0 ~ 6.
> > +0 indicates HIERARCHICAL CODING LAYER 0, 1 indicates HIERARCHICAL 
> > CODING
> > +LAYER 1 and so on.
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_LAYER_QP (integer)``
> > +Indicates the hierarchical coding layer quantization parameter.
> > +For HEVC it can have a value of 0-51. Hence in the control value passed
> > +the LSB 16 bits will indicate the quantization parameter. The MSB 16 
> > bit
> > +will pass the layer(0-6) it is meant for.
> 
> This is ugly. Why not make this an array control? This really is an array of
> 7 values, right? An alternative is to split this in 7 controls just as you did
> with V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L?_BR.
> 
> The way it is now doesn't work either since 
> G_CTRL(V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_LAYER_QP)
> would just return the QP for whatever was the last layer you set it for and 
> you can't
> query it for another layer.
> 
Ok I will add this as an array control.

> > +
> > +.. _v4l2-hevc-profile:
> > +
> > +``V4L2_CID_MPEG_VIDEO_HEVC_PROFILE``
> > +(enum)
> > +
> > +enum v4l2_mpeg_video_hevc_profile -
> > +Select the desired profile for HEVC encoder.
> > +
> > +.. raw:: latex
> > +
> > +\begin{adjustbox}{width=\columnwidth}
> > +
> > +.. tabularcolumns:: |p{11.0cm}|p{10.0cm}|
> > +
> > +.. flat-table::
> > +:header-rows:  0
> > +:stub-columns: 0
> > +
> > +* - ``V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN``
> > +  - Main profile.
> > +* - ``V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_STILL_PICTURE``
> > +  - Main still picture profile.
> > +
> > +.. raw:: latex
> > +
> > +

[PATCH 1/1] usb: xhci: Handle USB transaction error on address command

2017-07-24 Thread Lu Baolu
Xhci driver handles USB transaction errors on transfer events,
but transaction errors are possible on address device command
completion events as well.

The xHCI specification (section 4.6.5) says: A USB Transaction
Error Completion Code for an Address Device Command may be due
to a Stall response from a device. Software should issue a Disable
Slot Command for the Device Slot then an Enable Slot Command to
recover from this error.

This patch handles USB transaction errors on address command
completion events. The related discussion threads can be found
through below links.

http://marc.info/?l=linux-usb=149362010728921=2
http://marc.info/?l=linux-usb=149252752825755=2

Suggested-by: Mathias Nyman 
Signed-off-by: Lu Baolu 
---
 drivers/usb/host/xhci.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index b2ff1ff..9cc56cd 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3836,6 +3836,12 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct 
usb_device *udev,
ret = -EINVAL;
break;
case COMP_USB_TRANSACTION_ERROR:
+   xhci_free_virt_device(xhci, udev->slot_id);
+   ret = xhci_disable_slot(xhci, command, udev->slot_id);
+   udev->slot_id = 0;
+   if (!ret)
+   xhci_alloc_dev(hcd, udev);
+
dev_warn(>dev, "Device not responding to setup %s.\n", 
act);
ret = -EPROTO;
break;
-- 
2.7.4



[PATCH v4] acpi/iort: numa: Add numa node mapping for smmuv3 devices

2017-07-24 Thread Ganapatrao Kulkarni
ARM IORT specification(rev. C) has added  provision to define proximity
domain in SMMUv3 IORT table. Adding required code to parse Proximity
domain and set numa_node of smmv3 platform devices.

Add code to parse proximity domain in SMMUv3 IORT table to
set numa node mapping for smmuv3 devices.

Signed-off-by: Ganapatrao Kulkarni 
---

This patch has dependency on header file patch [1], which is
already merged to linux-pm.

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git/commit/?h=linux-next=c944230064eb65e4fa018d86779b4fd200b1d7e7

v4:
  - Fix compilation issue in !CONFIG_NUMA

v3:
  - Addressed Lorenzo Pieralisi comment.

v2:
  - Changed as per Lorenzo Pieralisi and Hanjun Guo suggestions.

v1:
  - Initial patch

 drivers/acpi/arm64/iort.c | 33 +++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index a3215ee..c5c82c3 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -908,6 +908,28 @@ static bool __init arm_smmu_v3_is_coherent(struct 
acpi_iort_node *node)
return smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE;
 }
 
+#if defined(CONFIG_ACPI_NUMA) && (ACPI_CA_VERSION >= 0x20170629)
+/*
+ * set numa proximity domain for smmuv3 device
+ */
+static void  __init arm_smmu_v3_set_proximity(struct acpi_iort_node *node,
+   struct device *dev)
+{
+   struct acpi_iort_smmu_v3 *smmu;
+
+   smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
+   if (smmu->flags & ACPI_IORT_SMMU_V3_PXM_VALID) {
+   set_dev_node(dev, acpi_map_pxm_to_node(smmu->pxm));
+   pr_info("SMMUV3[%llx] Mapped to Proximity domain %d\n",
+   smmu->base_address,
+   smmu->pxm);
+   }
+}
+#else
+static void  __init arm_smmu_v3_set_proximity(struct acpi_iort_node *node,
+   struct device *dev) { }
+#endif
+
 static int __init arm_smmu_count_resources(struct acpi_iort_node *node)
 {
struct acpi_iort_smmu *smmu;
@@ -977,20 +999,24 @@ struct iort_iommu_config {
int (*iommu_count_resources)(struct acpi_iort_node *node);
void (*iommu_init_resources)(struct resource *res,
 struct acpi_iort_node *node);
+   void (*iommu_set_proximity)(struct acpi_iort_node *node,
+struct device *dev);
 };
 
 static const struct iort_iommu_config iort_arm_smmu_v3_cfg __initconst = {
.name = "arm-smmu-v3",
.iommu_is_coherent = arm_smmu_v3_is_coherent,
.iommu_count_resources = arm_smmu_v3_count_resources,
-   .iommu_init_resources = arm_smmu_v3_init_resources
+   .iommu_init_resources = arm_smmu_v3_init_resources,
+   .iommu_set_proximity = arm_smmu_v3_set_proximity
 };
 
 static const struct iort_iommu_config iort_arm_smmu_cfg __initconst = {
.name = "arm-smmu",
.iommu_is_coherent = arm_smmu_is_coherent,
.iommu_count_resources = arm_smmu_count_resources,
-   .iommu_init_resources = arm_smmu_init_resources
+   .iommu_init_resources = arm_smmu_init_resources,
+   .iommu_set_proximity = NULL
 };
 
 static __init
@@ -1028,6 +1054,9 @@ static int __init iort_add_smmu_platform_device(struct 
acpi_iort_node *node)
if (!pdev)
return -ENOMEM;
 
+   if (ops->iommu_set_proximity)
+   ops->iommu_set_proximity(node, >dev);
+
count = ops->iommu_count_resources(node);
 
r = kcalloc(count, sizeof(*r), GFP_KERNEL);
-- 
2.9.4



[PATCH v4] acpi/iort: numa: Add numa node mapping for smmuv3 devices

2017-07-24 Thread Ganapatrao Kulkarni
ARM IORT specification(rev. C) has added  provision to define proximity
domain in SMMUv3 IORT table. Adding required code to parse Proximity
domain and set numa_node of smmv3 platform devices.

Add code to parse proximity domain in SMMUv3 IORT table to
set numa node mapping for smmuv3 devices.

Signed-off-by: Ganapatrao Kulkarni 
---

This patch has dependency on header file patch [1], which is
already merged to linux-pm.

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git/commit/?h=linux-next=c944230064eb65e4fa018d86779b4fd200b1d7e7

v4:
  - Fix compilation issue in !CONFIG_NUMA

v3:
  - Addressed Lorenzo Pieralisi comment.

v2:
  - Changed as per Lorenzo Pieralisi and Hanjun Guo suggestions.

v1:
  - Initial patch

 drivers/acpi/arm64/iort.c | 33 +++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index a3215ee..c5c82c3 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -908,6 +908,28 @@ static bool __init arm_smmu_v3_is_coherent(struct 
acpi_iort_node *node)
return smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE;
 }
 
+#if defined(CONFIG_ACPI_NUMA) && (ACPI_CA_VERSION >= 0x20170629)
+/*
+ * set numa proximity domain for smmuv3 device
+ */
+static void  __init arm_smmu_v3_set_proximity(struct acpi_iort_node *node,
+   struct device *dev)
+{
+   struct acpi_iort_smmu_v3 *smmu;
+
+   smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
+   if (smmu->flags & ACPI_IORT_SMMU_V3_PXM_VALID) {
+   set_dev_node(dev, acpi_map_pxm_to_node(smmu->pxm));
+   pr_info("SMMUV3[%llx] Mapped to Proximity domain %d\n",
+   smmu->base_address,
+   smmu->pxm);
+   }
+}
+#else
+static void  __init arm_smmu_v3_set_proximity(struct acpi_iort_node *node,
+   struct device *dev) { }
+#endif
+
 static int __init arm_smmu_count_resources(struct acpi_iort_node *node)
 {
struct acpi_iort_smmu *smmu;
@@ -977,20 +999,24 @@ struct iort_iommu_config {
int (*iommu_count_resources)(struct acpi_iort_node *node);
void (*iommu_init_resources)(struct resource *res,
 struct acpi_iort_node *node);
+   void (*iommu_set_proximity)(struct acpi_iort_node *node,
+struct device *dev);
 };
 
 static const struct iort_iommu_config iort_arm_smmu_v3_cfg __initconst = {
.name = "arm-smmu-v3",
.iommu_is_coherent = arm_smmu_v3_is_coherent,
.iommu_count_resources = arm_smmu_v3_count_resources,
-   .iommu_init_resources = arm_smmu_v3_init_resources
+   .iommu_init_resources = arm_smmu_v3_init_resources,
+   .iommu_set_proximity = arm_smmu_v3_set_proximity
 };
 
 static const struct iort_iommu_config iort_arm_smmu_cfg __initconst = {
.name = "arm-smmu",
.iommu_is_coherent = arm_smmu_is_coherent,
.iommu_count_resources = arm_smmu_count_resources,
-   .iommu_init_resources = arm_smmu_init_resources
+   .iommu_init_resources = arm_smmu_init_resources,
+   .iommu_set_proximity = NULL
 };
 
 static __init
@@ -1028,6 +1054,9 @@ static int __init iort_add_smmu_platform_device(struct 
acpi_iort_node *node)
if (!pdev)
return -ENOMEM;
 
+   if (ops->iommu_set_proximity)
+   ops->iommu_set_proximity(node, >dev);
+
count = ops->iommu_count_resources(node);
 
r = kcalloc(count, sizeof(*r), GFP_KERNEL);
-- 
2.9.4



linux-next: Tree for Jul 25

2017-07-24 Thread Stephen Rothwell
Hi all,

Changes since 20170724:

New tree: wberr

Non-merge commits (relative to Linus' tree): 2223
 2355 files changed, 84862 insertions(+), 43462 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc and an allmodconfig (with
CONFIG_BUILD_DOCSRC=n) for x86_64, a multi_v7_defconfig for arm and a
native build of tools/perf. After the final fixups (if any), I do an
x86_64 modules_install followed by builds for x86_64 allnoconfig,
powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig, allyesconfig
and pseries_le_defconfig and i386, sparc and sparc64 defconfig. And
finally, a simple boot test of the powerpc pseries_le_defconfig kernel
in qemu.

Below is a summary of the state of the merge.

I am currently merging 267 trees (counting Linus' and 41 trees of bug
fix patches pending for the current merge release).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwell

$ git checkout master
$ git reset --hard stable
Merging origin/master (520eccdfe187 Linux 4.13-rc2)
Merging fixes/master (b4b8cbf679c4 Cavium CNN55XX: fix broken default Kconfig 
entry)
Merging kbuild-current/fixes (ad8181060788 kconfig: fix sparse warnings in 
nconfig)
Merging arc-current/for-curr (37f1db0e85ff ARC: [plat-axs10x]: prepare dts 
files for enabling PAE40 on axs103)
Merging arm-current/fixes (9e25ebfe56ec ARM: 8685/1: ensure memblock-limit is 
pmd-aligned)
Merging m68k-current/for-linus (204a2be30a7a m68k: Remove ptrace_signal_deliver)
Merging metag-fixes/fixes (b884a190afce metag/usercopy: Add missing fixups)
Merging powerpc-fixes/fixes (029d9252b116 powerpc/mm: Mark __init memory 
no-execute when STRICT_KERNEL_RWX=y)
Merging sparc/master (8cd3ec51c0c3 sbus: Convert to using %pOF instead of 
full_name)
Merging fscrypt-current/for-stable (42d97eb0ade3 fscrypt: fix renaming and 
linking special files)
Merging net/master (69ec932e364b openvswitch: fix potential out of bound access 
in parse_ct)
Merging ipsec/master (e6194923237f esp: Fix memleaks on error paths.)
Merging netfilter/master (f7fb77fc1235 netfilter: nft_compat: check extension 
hook mask only if set)
Merging ipvs/master (3c5ab3f395d6 ipvs: SNAT packet replies only for NATed 
connections)
Merging wireless-drivers/master (d755cbc26e82 Merge tag 
'iwlwifi-for-kalle-2017-07-21' of 
git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-fixes)
Merging mac80211/master (d7f13f745036 cfg80211: Validate frequencies nested in 
NL80211_ATTR_SCAN_FREQUENCIES)
Merging sound-current/for-linus (e674ac9a4705 ALSA: hda/realtek - No loopback 
on ALC225/ALC295 codec)
Merging pci-current/for-linus (34d5ac2af644 PCI: rockchip: Check for 
pci_scan_root_bus_bridge() failure correctly)
Merging driver-core.current/driver-core-linus (5771a8c08880 Linux v4.13-rc1)
Merging tty.current/tty-linus (520eccdfe187 Linux 4.13-rc2)
Merging usb.current/usb-linus (520eccdfe187 Linux 4.13-rc2)
Merging usb-gadget-fixes/fixes (520eccdfe187 Linux 4.13-rc2)
Merging usb-serial-fixes/usb-linus (9585e340db9f USB: serial: cp210x: add 
support for Qivicon USB ZigBee dongle)
Merging usb-chipidea-fixes/ci-for-usb-stable (cbb22ebcfb99 usb: chipidea: core: 
check before accessing ci_role in ci_role_show)
Merging phy/fixes (5771a8c08880 Linux v4.13-rc1)
Merging staging.current/staging-linus (055655a9f0fe Merge tag 
'iio-fixes-for-4.13a' of 
git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-linus)
Merging char-misc.current/char-misc-linus (520eccdfe187 Linux 4.13-rc2)
Merging input-current/for-linus (293b915fd9be Input: trackpoint - assume 3 
buttons when buttons detection fails)
Merging crypto-current/master (41cdf7a45389 crypto: authencesn - Fix 
digest_null crash)
Merging ide/master (921edf312a6a ide: avoid warning for timings calculation)
Merging vfio-fixes/for-linus (39da7c509acf Linux 4.11-rc6)
Merging kselftest-fixes/fixes (5771a8c08880 Linux v4.13-rc1)
Merging backlight-fixes/for-backlight-fixes (68feaca0b13e backlight: pwm: 

linux-next: Tree for Jul 25

2017-07-24 Thread Stephen Rothwell
Hi all,

Changes since 20170724:

New tree: wberr

Non-merge commits (relative to Linus' tree): 2223
 2355 files changed, 84862 insertions(+), 43462 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc and an allmodconfig (with
CONFIG_BUILD_DOCSRC=n) for x86_64, a multi_v7_defconfig for arm and a
native build of tools/perf. After the final fixups (if any), I do an
x86_64 modules_install followed by builds for x86_64 allnoconfig,
powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig, allyesconfig
and pseries_le_defconfig and i386, sparc and sparc64 defconfig. And
finally, a simple boot test of the powerpc pseries_le_defconfig kernel
in qemu.

Below is a summary of the state of the merge.

I am currently merging 267 trees (counting Linus' and 41 trees of bug
fix patches pending for the current merge release).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwell

$ git checkout master
$ git reset --hard stable
Merging origin/master (520eccdfe187 Linux 4.13-rc2)
Merging fixes/master (b4b8cbf679c4 Cavium CNN55XX: fix broken default Kconfig 
entry)
Merging kbuild-current/fixes (ad8181060788 kconfig: fix sparse warnings in 
nconfig)
Merging arc-current/for-curr (37f1db0e85ff ARC: [plat-axs10x]: prepare dts 
files for enabling PAE40 on axs103)
Merging arm-current/fixes (9e25ebfe56ec ARM: 8685/1: ensure memblock-limit is 
pmd-aligned)
Merging m68k-current/for-linus (204a2be30a7a m68k: Remove ptrace_signal_deliver)
Merging metag-fixes/fixes (b884a190afce metag/usercopy: Add missing fixups)
Merging powerpc-fixes/fixes (029d9252b116 powerpc/mm: Mark __init memory 
no-execute when STRICT_KERNEL_RWX=y)
Merging sparc/master (8cd3ec51c0c3 sbus: Convert to using %pOF instead of 
full_name)
Merging fscrypt-current/for-stable (42d97eb0ade3 fscrypt: fix renaming and 
linking special files)
Merging net/master (69ec932e364b openvswitch: fix potential out of bound access 
in parse_ct)
Merging ipsec/master (e6194923237f esp: Fix memleaks on error paths.)
Merging netfilter/master (f7fb77fc1235 netfilter: nft_compat: check extension 
hook mask only if set)
Merging ipvs/master (3c5ab3f395d6 ipvs: SNAT packet replies only for NATed 
connections)
Merging wireless-drivers/master (d755cbc26e82 Merge tag 
'iwlwifi-for-kalle-2017-07-21' of 
git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-fixes)
Merging mac80211/master (d7f13f745036 cfg80211: Validate frequencies nested in 
NL80211_ATTR_SCAN_FREQUENCIES)
Merging sound-current/for-linus (e674ac9a4705 ALSA: hda/realtek - No loopback 
on ALC225/ALC295 codec)
Merging pci-current/for-linus (34d5ac2af644 PCI: rockchip: Check for 
pci_scan_root_bus_bridge() failure correctly)
Merging driver-core.current/driver-core-linus (5771a8c08880 Linux v4.13-rc1)
Merging tty.current/tty-linus (520eccdfe187 Linux 4.13-rc2)
Merging usb.current/usb-linus (520eccdfe187 Linux 4.13-rc2)
Merging usb-gadget-fixes/fixes (520eccdfe187 Linux 4.13-rc2)
Merging usb-serial-fixes/usb-linus (9585e340db9f USB: serial: cp210x: add 
support for Qivicon USB ZigBee dongle)
Merging usb-chipidea-fixes/ci-for-usb-stable (cbb22ebcfb99 usb: chipidea: core: 
check before accessing ci_role in ci_role_show)
Merging phy/fixes (5771a8c08880 Linux v4.13-rc1)
Merging staging.current/staging-linus (055655a9f0fe Merge tag 
'iio-fixes-for-4.13a' of 
git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-linus)
Merging char-misc.current/char-misc-linus (520eccdfe187 Linux 4.13-rc2)
Merging input-current/for-linus (293b915fd9be Input: trackpoint - assume 3 
buttons when buttons detection fails)
Merging crypto-current/master (41cdf7a45389 crypto: authencesn - Fix 
digest_null crash)
Merging ide/master (921edf312a6a ide: avoid warning for timings calculation)
Merging vfio-fixes/for-linus (39da7c509acf Linux 4.11-rc6)
Merging kselftest-fixes/fixes (5771a8c08880 Linux v4.13-rc1)
Merging backlight-fixes/for-backlight-fixes (68feaca0b13e backlight: pwm: 

ERROR: "gen_pool_virt_to_phys" [drivers/staging/vboxvideo/vboxvideo.ko] undefined!

2017-07-24 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   520eccdfe187591a51ea9ab4c1a024ae4d0f68d9
commit: dd55d44f408419278c00887bfcb2261d0caae350 staging: vboxvideo: Add 
vboxvideo to drivers/staging
date:   8 days ago
config: i386-randconfig-x071-07241824 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
git checkout dd55d44f408419278c00887bfcb2261d0caae350
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

>> ERROR: "gen_pool_virt_to_phys" [drivers/staging/vboxvideo/vboxvideo.ko] 
>> undefined!
>> ERROR: "gen_pool_add_virt" [drivers/staging/vboxvideo/vboxvideo.ko] 
>> undefined!
>> ERROR: "gen_pool_destroy" [drivers/staging/vboxvideo/vboxvideo.ko] undefined!
>> ERROR: "gen_pool_dma_alloc" [drivers/staging/vboxvideo/vboxvideo.ko] 
>> undefined!
>> ERROR: "gen_pool_create" [drivers/staging/vboxvideo/vboxvideo.ko] undefined!
>> ERROR: "gen_pool_free" [drivers/staging/vboxvideo/vboxvideo.ko] undefined!

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: bcache with existing ext4 filesystem

2017-07-24 Thread Theodore Ts'o
On Mon, Jul 24, 2017 at 10:04:51PM +0200, Pavel Machek wrote:
> Question for you was... Is the first 1KiB of each ext4 filesystem still
> free and "reserved for a bootloader"?

Yes.

> If I needed more for bcache superblock (8KiB, IIRC), would that be
> easy to accomplish on existing filesystem?

Huh?  Why would the bcache superblock matter when you're talking about
the ext4 layout?  The bcache superblock will be on the bcache
device/partition, and the ext4 superblock will be on the ext4
device/partition.

- Ted


ERROR: "gen_pool_virt_to_phys" [drivers/staging/vboxvideo/vboxvideo.ko] undefined!

2017-07-24 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   520eccdfe187591a51ea9ab4c1a024ae4d0f68d9
commit: dd55d44f408419278c00887bfcb2261d0caae350 staging: vboxvideo: Add 
vboxvideo to drivers/staging
date:   8 days ago
config: i386-randconfig-x071-07241824 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
git checkout dd55d44f408419278c00887bfcb2261d0caae350
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

>> ERROR: "gen_pool_virt_to_phys" [drivers/staging/vboxvideo/vboxvideo.ko] 
>> undefined!
>> ERROR: "gen_pool_add_virt" [drivers/staging/vboxvideo/vboxvideo.ko] 
>> undefined!
>> ERROR: "gen_pool_destroy" [drivers/staging/vboxvideo/vboxvideo.ko] undefined!
>> ERROR: "gen_pool_dma_alloc" [drivers/staging/vboxvideo/vboxvideo.ko] 
>> undefined!
>> ERROR: "gen_pool_create" [drivers/staging/vboxvideo/vboxvideo.ko] undefined!
>> ERROR: "gen_pool_free" [drivers/staging/vboxvideo/vboxvideo.ko] undefined!

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: bcache with existing ext4 filesystem

2017-07-24 Thread Theodore Ts'o
On Mon, Jul 24, 2017 at 10:04:51PM +0200, Pavel Machek wrote:
> Question for you was... Is the first 1KiB of each ext4 filesystem still
> free and "reserved for a bootloader"?

Yes.

> If I needed more for bcache superblock (8KiB, IIRC), would that be
> easy to accomplish on existing filesystem?

Huh?  Why would the bcache superblock matter when you're talking about
the ext4 layout?  The bcache superblock will be on the bcache
device/partition, and the ext4 superblock will be on the ext4
device/partition.

- Ted


[RESEND PATCH 0/2] userfaultfd: Add feature to request for a signal delivery

2017-07-24 Thread Prakash Sangappa
Hi Andrea, Mike,

Rsending - fixed email address. 

Here is the patch set for the proposed userfaultfd UFFD_FEATURE_SIGBUS
feature, including tests in selftest/vm/userfaultfd.c

Please review.

See following for previous discussion.

http://www.spinics.net/lists/linux-mm/msg129224.html
http://www.spinics.net/lists/linux-mm/msg130678.html


Thanks,

Prakash Sangappa (2):
  userfaultfd: Add feature to request for a signal delivery
  userfaultfd: selftest: Add tests for UFFD_FREATURE_SIGBUS

 fs/userfaultfd.c |3 +
 include/uapi/linux/userfaultfd.h |   10 ++-
 tools/testing/selftests/vm/userfaultfd.c |  121 +-
 3 files changed, 130 insertions(+), 4 deletions(-)



[RESEND PATCH 1/2] userfaultfd: Add feature to request for a signal delivery

2017-07-24 Thread Prakash Sangappa
In some cases, userfaultfd mechanism should just deliver a SIGBUS signal
to the faulting process, instead of the page-fault event. Dealing with
page-fault event using a monitor thread can be an overhead in these
cases. For example applications like the database could use the signaling
mechanism for robustness purpose.

Database uses hugetlbfs for performance reason. Files on hugetlbfs
filesystem are created and huge pages allocated using fallocate() API.
Pages are deallocated/freed using fallocate() hole punching support.
These files are mmapped and accessed by many processes as shared memory.
The database keeps track of which offsets in the hugetlbfs file have
pages allocated.

Any access to mapped address over holes in the file, which can occur due
to bugs in the application, is considered invalid and expect the process
to simply receive a SIGBUS.  However, currently when a hole in the file is
accessed via the mapped address, kernel/mm attempts to automatically
allocate a page at page fault time, resulting in implicitly filling the
hole in the file. This may not be the desired behavior for applications
like the database that want to explicitly manage page allocations of
hugetlbfs files.

Using userfaultfd mechanism with this support to get a signal, database
application can prevent pages from being allocated implicitly when
processes access mapped address over holes in the file.

This patch adds UFFD_FEATURE_SIGBUS feature to userfaultfd mechnism to
request for a SIGBUS signal.

See following for previous discussion about the database requirement
leading to this proposal as suggested by Andrea.

http://www.spinics.net/lists/linux-mm/msg129224.html

Signed-off-by: Prakash Sangappa 
---
 fs/userfaultfd.c |3 +++
 include/uapi/linux/userfaultfd.h |   10 +-
 2 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 1d622f2..0bbe7df 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -371,6 +371,9 @@ int handle_userfault(struct vm_fault *vmf, unsigned long 
reason)
VM_BUG_ON(reason & ~(VM_UFFD_MISSING|VM_UFFD_WP));
VM_BUG_ON(!(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP));
 
+   if (ctx->features & UFFD_FEATURE_SIGBUS)
+   goto out;
+
/*
 * If it's already released don't get it. This avoids to loop
 * in __get_user_pages if userfaultfd_release waits on the
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 3b05953..d39d5db 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -23,7 +23,8 @@
   UFFD_FEATURE_EVENT_REMOVE |  \
   UFFD_FEATURE_EVENT_UNMAP |   \
   UFFD_FEATURE_MISSING_HUGETLBFS | \
-  UFFD_FEATURE_MISSING_SHMEM)
+  UFFD_FEATURE_MISSING_SHMEM | \
+  UFFD_FEATURE_SIGBUS)
 #define UFFD_API_IOCTLS\
((__u64)1 << _UFFDIO_REGISTER | \
 (__u64)1 << _UFFDIO_UNREGISTER |   \
@@ -153,6 +154,12 @@ struct uffdio_api {
 * UFFD_FEATURE_MISSING_SHMEM works the same as
 * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
 * (i.e. tmpfs and other shmem based APIs).
+*
+* UFFD_FEATURE_SIGBUS feature means no page-fault
+* (UFFD_EVENT_PAGEFAULT) event will be delivered, instead
+* a SIGBUS signal will be sent to the faulting process.
+* The application process can enable this behavior by adding
+* it to uffdio_api.features.
 */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
 #define UFFD_FEATURE_EVENT_FORK(1<<1)
@@ -161,6 +168,7 @@ struct uffdio_api {
 #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4)
 #define UFFD_FEATURE_MISSING_SHMEM (1<<5)
 #define UFFD_FEATURE_EVENT_UNMAP   (1<<6)
+#define UFFD_FEATURE_SIGBUS(1<<7)
__u64 features;
 
__u64 ioctls;
-- 
1.7.1



[RESEND PATCH 0/2] userfaultfd: Add feature to request for a signal delivery

2017-07-24 Thread Prakash Sangappa
Hi Andrea, Mike,

Rsending - fixed email address. 

Here is the patch set for the proposed userfaultfd UFFD_FEATURE_SIGBUS
feature, including tests in selftest/vm/userfaultfd.c

Please review.

See following for previous discussion.

http://www.spinics.net/lists/linux-mm/msg129224.html
http://www.spinics.net/lists/linux-mm/msg130678.html


Thanks,

Prakash Sangappa (2):
  userfaultfd: Add feature to request for a signal delivery
  userfaultfd: selftest: Add tests for UFFD_FREATURE_SIGBUS

 fs/userfaultfd.c |3 +
 include/uapi/linux/userfaultfd.h |   10 ++-
 tools/testing/selftests/vm/userfaultfd.c |  121 +-
 3 files changed, 130 insertions(+), 4 deletions(-)



[RESEND PATCH 1/2] userfaultfd: Add feature to request for a signal delivery

2017-07-24 Thread Prakash Sangappa
In some cases, userfaultfd mechanism should just deliver a SIGBUS signal
to the faulting process, instead of the page-fault event. Dealing with
page-fault event using a monitor thread can be an overhead in these
cases. For example applications like the database could use the signaling
mechanism for robustness purpose.

Database uses hugetlbfs for performance reason. Files on hugetlbfs
filesystem are created and huge pages allocated using fallocate() API.
Pages are deallocated/freed using fallocate() hole punching support.
These files are mmapped and accessed by many processes as shared memory.
The database keeps track of which offsets in the hugetlbfs file have
pages allocated.

Any access to mapped address over holes in the file, which can occur due
to bugs in the application, is considered invalid and expect the process
to simply receive a SIGBUS.  However, currently when a hole in the file is
accessed via the mapped address, kernel/mm attempts to automatically
allocate a page at page fault time, resulting in implicitly filling the
hole in the file. This may not be the desired behavior for applications
like the database that want to explicitly manage page allocations of
hugetlbfs files.

Using userfaultfd mechanism with this support to get a signal, database
application can prevent pages from being allocated implicitly when
processes access mapped address over holes in the file.

This patch adds UFFD_FEATURE_SIGBUS feature to userfaultfd mechnism to
request for a SIGBUS signal.

See following for previous discussion about the database requirement
leading to this proposal as suggested by Andrea.

http://www.spinics.net/lists/linux-mm/msg129224.html

Signed-off-by: Prakash Sangappa 
---
 fs/userfaultfd.c |3 +++
 include/uapi/linux/userfaultfd.h |   10 +-
 2 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 1d622f2..0bbe7df 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -371,6 +371,9 @@ int handle_userfault(struct vm_fault *vmf, unsigned long 
reason)
VM_BUG_ON(reason & ~(VM_UFFD_MISSING|VM_UFFD_WP));
VM_BUG_ON(!(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP));
 
+   if (ctx->features & UFFD_FEATURE_SIGBUS)
+   goto out;
+
/*
 * If it's already released don't get it. This avoids to loop
 * in __get_user_pages if userfaultfd_release waits on the
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 3b05953..d39d5db 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -23,7 +23,8 @@
   UFFD_FEATURE_EVENT_REMOVE |  \
   UFFD_FEATURE_EVENT_UNMAP |   \
   UFFD_FEATURE_MISSING_HUGETLBFS | \
-  UFFD_FEATURE_MISSING_SHMEM)
+  UFFD_FEATURE_MISSING_SHMEM | \
+  UFFD_FEATURE_SIGBUS)
 #define UFFD_API_IOCTLS\
((__u64)1 << _UFFDIO_REGISTER | \
 (__u64)1 << _UFFDIO_UNREGISTER |   \
@@ -153,6 +154,12 @@ struct uffdio_api {
 * UFFD_FEATURE_MISSING_SHMEM works the same as
 * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
 * (i.e. tmpfs and other shmem based APIs).
+*
+* UFFD_FEATURE_SIGBUS feature means no page-fault
+* (UFFD_EVENT_PAGEFAULT) event will be delivered, instead
+* a SIGBUS signal will be sent to the faulting process.
+* The application process can enable this behavior by adding
+* it to uffdio_api.features.
 */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
 #define UFFD_FEATURE_EVENT_FORK(1<<1)
@@ -161,6 +168,7 @@ struct uffdio_api {
 #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4)
 #define UFFD_FEATURE_MISSING_SHMEM (1<<5)
 #define UFFD_FEATURE_EVENT_UNMAP   (1<<6)
+#define UFFD_FEATURE_SIGBUS(1<<7)
__u64 features;
 
__u64 ioctls;
-- 
1.7.1



Re: [PATCH] powerpc/pseries: Fix of_node_put() underflow during pseries remove

2017-07-24 Thread Michael Ellerman
Tyrel Datwyler  writes:

> On 07/24/2017 03:42 AM, Michael Ellerman wrote:
>> Laurent Vivier  writes:
>> 
>>> As for commit 68baf692c435 ("powerpc/pseries: Fix of_node_put()
>>> underflow during DLPAR remove"), the call to of_node_put()
>>> must be removed from pSeries_reconfig_remove_node().
>>>
>>> dlpar_detach_node() and pSeries_reconfig_remove_node() call
>>> of_detach_node(), and thus the node should not be released
>>> in this case too.
>>>
>>> Signed-off-by: Laurent Vivier 
>>> ---
>>>  arch/powerpc/platforms/pseries/reconfig.c | 1 -
>>>  1 file changed, 1 deletion(-)
>> 
>> Thanks. I'll spare you the swearing about why we have the same bug in
>> two places.
>
> That's probably my bad. I must have failed to test with older powerpc-util 
> tooling where
> drmgr uses the /proc/ofdt interface for device tree modification.

OK. Really we should have automated tests of the various cases, I've
just never had time to write any.

Mainly the thing that bugs me is that we still have the two separate
paths. Or if we must maintain both they could at least share more code,
the two functions do basically the same thing AFAICS.

cheers


Re: [PATCH] powerpc/pseries: Fix of_node_put() underflow during pseries remove

2017-07-24 Thread Michael Ellerman
Tyrel Datwyler  writes:

> On 07/24/2017 03:42 AM, Michael Ellerman wrote:
>> Laurent Vivier  writes:
>> 
>>> As for commit 68baf692c435 ("powerpc/pseries: Fix of_node_put()
>>> underflow during DLPAR remove"), the call to of_node_put()
>>> must be removed from pSeries_reconfig_remove_node().
>>>
>>> dlpar_detach_node() and pSeries_reconfig_remove_node() call
>>> of_detach_node(), and thus the node should not be released
>>> in this case too.
>>>
>>> Signed-off-by: Laurent Vivier 
>>> ---
>>>  arch/powerpc/platforms/pseries/reconfig.c | 1 -
>>>  1 file changed, 1 deletion(-)
>> 
>> Thanks. I'll spare you the swearing about why we have the same bug in
>> two places.
>
> That's probably my bad. I must have failed to test with older powerpc-util 
> tooling where
> drmgr uses the /proc/ofdt interface for device tree modification.

OK. Really we should have automated tests of the various cases, I've
just never had time to write any.

Mainly the thing that bugs me is that we still have the two separate
paths. Or if we must maintain both they could at least share more code,
the two functions do basically the same thing AFAICS.

cheers


[RESEND PATCH 2/2] userfaultfd: selftest: Add tests for UFFD_FREATURE_SIGBUS

2017-07-24 Thread Prakash Sangappa
Signed-off-by: Prakash Sangappa 
---
 tools/testing/selftests/vm/userfaultfd.c |  121 +-
 1 files changed, 118 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/vm/userfaultfd.c 
b/tools/testing/selftests/vm/userfaultfd.c
index 1eae79a..6a43e84 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -66,6 +66,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef __NR_userfaultfd
 
@@ -408,6 +409,7 @@ static int copy_page(int ufd, unsigned long offset)
userfaults++;
break;
case UFFD_EVENT_FORK:
+   close(uffd);
uffd = msg.arg.fork.ufd;
pollfd[0].fd = uffd;
break;
@@ -572,6 +574,17 @@ static int userfaultfd_open(int features)
return 0;
 }
 
+sigjmp_buf jbuf, *sigbuf;
+
+static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
+{
+if (sig == SIGBUS) {
+if (sigbuf)
+ siglongjmp(*sigbuf, 1);
+abort();
+}
+}
+
 /*
  * For non-cooperative userfaultfd test we fork() a process that will
  * generate pagefaults, will mremap the area monitored by the
@@ -585,19 +598,54 @@ static int userfaultfd_open(int features)
  * The release of the pages currently generates event for shmem and
  * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
  * for hugetlb.
+ * For signal test(UFFD_FEATURE_SIGBUS), primarily test signal
+ * delivery and ensure no userfault events are generated.
  */
-static int faulting_process(void)
+static int faulting_process(int signal_test)
 {
unsigned long nr;
unsigned long long count;
unsigned long split_nr_pages;
+   unsigned long lastnr;
+   struct sigaction act;
+   unsigned long signalled=0, sig_repeats = 0;
 
if (test_type != TEST_HUGETLB)
split_nr_pages = (nr_pages + 1) / 2;
else
split_nr_pages = nr_pages;
 
+   if (signal_test) {
+   sigbuf = 
+   memset (, 0, sizeof(act));
+   act.sa_sigaction = sighndl;
+   act.sa_flags = SA_SIGINFO;
+   if (sigaction(SIGBUS, , 0)) {
+   perror("sigaction");
+   return 1;
+   }
+   lastnr = (unsigned long)-1;
+   }
+
for (nr = 0; nr < split_nr_pages; nr++) {
+   if (signal_test) {
+   if (sigsetjmp(*sigbuf, 1) != 0) {
+   if (nr == lastnr) {
+   sig_repeats++;
+   continue;
+   }
+
+   lastnr = nr;
+   if (signal_test == 1) {
+   if (copy_page(uffd, nr * page_size))
+   signalled++;
+   } else {
+   signalled++;
+   continue;
+   }
+   }
+   }
+
count = *area_count(area_dst, nr);
if (count != count_verify[nr]) {
fprintf(stderr,
@@ -607,6 +655,8 @@ static int faulting_process(void)
}
}
 
+   if (signal_test)
+   return signalled != split_nr_pages || sig_repeats != 0;
if (test_type == TEST_HUGETLB)
return 0;
 
@@ -761,7 +811,7 @@ static int userfaultfd_events_test(void)
perror("fork"), exit(1);
 
if (!pid)
-   return faulting_process();
+   return faulting_process(0);
 
waitpid(pid, , 0);
if (err)
@@ -778,6 +828,70 @@ static int userfaultfd_events_test(void)
return userfaults != nr_pages;
 }
 
+static int userfaultfd_sig_test(void)
+{
+   struct uffdio_register uffdio_register;
+   unsigned long expected_ioctls;
+   unsigned long userfaults;
+   pthread_t uffd_mon;
+   int err, features;
+   pid_t pid;
+   char c;
+
+   printf("testing signal delivery: ");
+   fflush(stdout);
+
+   if (uffd_test_ops->release_pages(area_dst))
+   return 1;
+
+   features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
+   if (userfaultfd_open(features) < 0)
+   return 1;
+   fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+   uffdio_register.range.start = (unsigned long) area_dst;
+   uffdio_register.range.len = nr_pages * page_size;
+   uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+   if (ioctl(uffd, UFFDIO_REGISTER, _register))
+   fprintf(stderr, "register failure\n"), exit(1);
+
+   expected_ioctls = 

[RESEND PATCH 2/2] userfaultfd: selftest: Add tests for UFFD_FREATURE_SIGBUS

2017-07-24 Thread Prakash Sangappa
Signed-off-by: Prakash Sangappa 
---
 tools/testing/selftests/vm/userfaultfd.c |  121 +-
 1 files changed, 118 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/vm/userfaultfd.c 
b/tools/testing/selftests/vm/userfaultfd.c
index 1eae79a..6a43e84 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -66,6 +66,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef __NR_userfaultfd
 
@@ -408,6 +409,7 @@ static int copy_page(int ufd, unsigned long offset)
userfaults++;
break;
case UFFD_EVENT_FORK:
+   close(uffd);
uffd = msg.arg.fork.ufd;
pollfd[0].fd = uffd;
break;
@@ -572,6 +574,17 @@ static int userfaultfd_open(int features)
return 0;
 }
 
+sigjmp_buf jbuf, *sigbuf;
+
+static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
+{
+if (sig == SIGBUS) {
+if (sigbuf)
+ siglongjmp(*sigbuf, 1);
+abort();
+}
+}
+
 /*
  * For non-cooperative userfaultfd test we fork() a process that will
  * generate pagefaults, will mremap the area monitored by the
@@ -585,19 +598,54 @@ static int userfaultfd_open(int features)
  * The release of the pages currently generates event for shmem and
  * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
  * for hugetlb.
+ * For signal test(UFFD_FEATURE_SIGBUS), primarily test signal
+ * delivery and ensure no userfault events are generated.
  */
-static int faulting_process(void)
+static int faulting_process(int signal_test)
 {
unsigned long nr;
unsigned long long count;
unsigned long split_nr_pages;
+   unsigned long lastnr;
+   struct sigaction act;
+   unsigned long signalled=0, sig_repeats = 0;
 
if (test_type != TEST_HUGETLB)
split_nr_pages = (nr_pages + 1) / 2;
else
split_nr_pages = nr_pages;
 
+   if (signal_test) {
+   sigbuf = 
+   memset (, 0, sizeof(act));
+   act.sa_sigaction = sighndl;
+   act.sa_flags = SA_SIGINFO;
+   if (sigaction(SIGBUS, , 0)) {
+   perror("sigaction");
+   return 1;
+   }
+   lastnr = (unsigned long)-1;
+   }
+
for (nr = 0; nr < split_nr_pages; nr++) {
+   if (signal_test) {
+   if (sigsetjmp(*sigbuf, 1) != 0) {
+   if (nr == lastnr) {
+   sig_repeats++;
+   continue;
+   }
+
+   lastnr = nr;
+   if (signal_test == 1) {
+   if (copy_page(uffd, nr * page_size))
+   signalled++;
+   } else {
+   signalled++;
+   continue;
+   }
+   }
+   }
+
count = *area_count(area_dst, nr);
if (count != count_verify[nr]) {
fprintf(stderr,
@@ -607,6 +655,8 @@ static int faulting_process(void)
}
}
 
+   if (signal_test)
+   return signalled != split_nr_pages || sig_repeats != 0;
if (test_type == TEST_HUGETLB)
return 0;
 
@@ -761,7 +811,7 @@ static int userfaultfd_events_test(void)
perror("fork"), exit(1);
 
if (!pid)
-   return faulting_process();
+   return faulting_process(0);
 
waitpid(pid, , 0);
if (err)
@@ -778,6 +828,70 @@ static int userfaultfd_events_test(void)
return userfaults != nr_pages;
 }
 
+static int userfaultfd_sig_test(void)
+{
+   struct uffdio_register uffdio_register;
+   unsigned long expected_ioctls;
+   unsigned long userfaults;
+   pthread_t uffd_mon;
+   int err, features;
+   pid_t pid;
+   char c;
+
+   printf("testing signal delivery: ");
+   fflush(stdout);
+
+   if (uffd_test_ops->release_pages(area_dst))
+   return 1;
+
+   features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
+   if (userfaultfd_open(features) < 0)
+   return 1;
+   fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+   uffdio_register.range.start = (unsigned long) area_dst;
+   uffdio_register.range.len = nr_pages * page_size;
+   uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+   if (ioctl(uffd, UFFDIO_REGISTER, _register))
+   fprintf(stderr, "register failure\n"), exit(1);
+
+   expected_ioctls = uffd_test_ops->expected_ioctls;
+   if 

Re: [PATCH v5 2/2] x86/mm: Improve TLB flush documentation

2017-07-24 Thread Nadav Amit
Andy Lutomirski  wrote:

> Improve comments as requested by PeterZ and also add some
> documentation at the top of the file.
> 
> Signed-off-by: Andy Lutomirski 
> ---
> arch/x86/mm/tlb.c | 43 +--
> 1 file changed, 33 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index ce104b962a17..d4ee781ca656 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -15,17 +15,24 @@
> #include 
> 
> /*
> - *   TLB flushing, formerly SMP-only
> - *   c/o Linus Torvalds.
> + * The code in this file handles mm switches and TLB flushes.
>  *
> - *   These mean you can really definitely utterly forget about
> - *   writing to user space from interrupts. (Its not allowed anyway).
> + * An mm's TLB state is logically represented by a totally ordered sequence
> + * of TLB flushes.  Each flush increments the mm's tlb_gen.
>  *
> - *   Optimizations Manfred Spraul 
> + * Each CPU that might have an mm in its TLB (and that might ever use
> + * those TLB entries) will have an entry for it in its cpu_tlbstate.ctxs
> + * array.  The kernel maintains the following invariant: for each CPU and
> + * for each mm in its cpu_tlbstate.ctxs array, the CPU has performed all
> + * flushes in that mms history up to the tlb_gen in cpu_tlbstate.ctxs
> + * or the CPU has performed an equivalent set of flushes.
>  *
> - *   More scalable flush, from Andi Kleen
> - *
> - *   Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
> + * For this purpose, an equivalent set is a set that is at least as strong.
> + * So, for example, if the flush history is a full flush at time 1,
> + * a full flush after time 1 is sufficient, but a full flush before time 1
> + * is not.  Similarly, any number of flushes can be replaced by a single
> + * full flush so long as that replacement flush is after all the flushes
> + * that it's replacing.
>  */
> 
> atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
> @@ -138,7 +145,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct 
> mm_struct *next,
>   return;
>   }
> 
> - /* Resume remote flushes and then read tlb_gen. */
> + /*
> +  * Resume remote flushes and then read tlb_gen.  The
> +  * implied barrier in atomic64_read() synchronizes
> +  * with inc_mm_tlb_gen() like this:

You mean the implied memory barrier in cpumask_set_cpu(), no?



Re: [PATCH v5 2/2] x86/mm: Improve TLB flush documentation

2017-07-24 Thread Nadav Amit
Andy Lutomirski  wrote:

> Improve comments as requested by PeterZ and also add some
> documentation at the top of the file.
> 
> Signed-off-by: Andy Lutomirski 
> ---
> arch/x86/mm/tlb.c | 43 +--
> 1 file changed, 33 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index ce104b962a17..d4ee781ca656 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -15,17 +15,24 @@
> #include 
> 
> /*
> - *   TLB flushing, formerly SMP-only
> - *   c/o Linus Torvalds.
> + * The code in this file handles mm switches and TLB flushes.
>  *
> - *   These mean you can really definitely utterly forget about
> - *   writing to user space from interrupts. (Its not allowed anyway).
> + * An mm's TLB state is logically represented by a totally ordered sequence
> + * of TLB flushes.  Each flush increments the mm's tlb_gen.
>  *
> - *   Optimizations Manfred Spraul 
> + * Each CPU that might have an mm in its TLB (and that might ever use
> + * those TLB entries) will have an entry for it in its cpu_tlbstate.ctxs
> + * array.  The kernel maintains the following invariant: for each CPU and
> + * for each mm in its cpu_tlbstate.ctxs array, the CPU has performed all
> + * flushes in that mms history up to the tlb_gen in cpu_tlbstate.ctxs
> + * or the CPU has performed an equivalent set of flushes.
>  *
> - *   More scalable flush, from Andi Kleen
> - *
> - *   Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
> + * For this purpose, an equivalent set is a set that is at least as strong.
> + * So, for example, if the flush history is a full flush at time 1,
> + * a full flush after time 1 is sufficient, but a full flush before time 1
> + * is not.  Similarly, any number of flushes can be replaced by a single
> + * full flush so long as that replacement flush is after all the flushes
> + * that it's replacing.
>  */
> 
> atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
> @@ -138,7 +145,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct 
> mm_struct *next,
>   return;
>   }
> 
> - /* Resume remote flushes and then read tlb_gen. */
> + /*
> +  * Resume remote flushes and then read tlb_gen.  The
> +  * implied barrier in atomic64_read() synchronizes
> +  * with inc_mm_tlb_gen() like this:

You mean the implied memory barrier in cpumask_set_cpu(), no?



[PATCH v5 2/2] x86/mm: Improve TLB flush documentation

2017-07-24 Thread Andy Lutomirski
Improve comments as requested by PeterZ and also add some
documentation at the top of the file.

Signed-off-by: Andy Lutomirski 
---
 arch/x86/mm/tlb.c | 43 +--
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index ce104b962a17..d4ee781ca656 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -15,17 +15,24 @@
 #include 
 
 /*
- * TLB flushing, formerly SMP-only
- * c/o Linus Torvalds.
+ * The code in this file handles mm switches and TLB flushes.
  *
- * These mean you can really definitely utterly forget about
- * writing to user space from interrupts. (Its not allowed anyway).
+ * An mm's TLB state is logically represented by a totally ordered sequence
+ * of TLB flushes.  Each flush increments the mm's tlb_gen.
  *
- * Optimizations Manfred Spraul 
+ * Each CPU that might have an mm in its TLB (and that might ever use
+ * those TLB entries) will have an entry for it in its cpu_tlbstate.ctxs
+ * array.  The kernel maintains the following invariant: for each CPU and
+ * for each mm in its cpu_tlbstate.ctxs array, the CPU has performed all
+ * flushes in that mms history up to the tlb_gen in cpu_tlbstate.ctxs
+ * or the CPU has performed an equivalent set of flushes.
  *
- * More scalable flush, from Andi Kleen
- *
- * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+ * For this purpose, an equivalent set is a set that is at least as strong.
+ * So, for example, if the flush history is a full flush at time 1,
+ * a full flush after time 1 is sufficient, but a full flush before time 1
+ * is not.  Similarly, any number of flushes can be replaced by a single
+ * full flush so long as that replacement flush is after all the flushes
+ * that it's replacing.
  */
 
 atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
@@ -138,7 +145,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct 
mm_struct *next,
return;
}
 
-   /* Resume remote flushes and then read tlb_gen. */
+   /*
+* Resume remote flushes and then read tlb_gen.  The
+* implied barrier in atomic64_read() synchronizes
+* with inc_mm_tlb_gen() like this:
+*
+* switch_mm_irqs_off():flush request:
+*  cpumask_set_cpu(...);inc_mm_tlb_gen();
+*  MB   MB
+*  atomic64_read(.tlb_gen); flush_tlb_others(mm_cpumask());
+*/
cpumask_set_cpu(cpu, mm_cpumask(next));
next_tlb_gen = atomic64_read(>context.tlb_gen);
 
@@ -186,7 +202,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct 
mm_struct *next,
VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
 
/*
-* Start remote flushes and then read tlb_gen.
+* Start remote flushes and then read tlb_gen.  As
+* above, the implied barrier in atomic64_read()
+* synchronizes with inc_mm_tlb_gen() like this:
+*
+* switch_mm_irqs_off():flush request:
+*  cpumask_set_cpu(...);inc_mm_tlb_gen();
+*  MB   MB
+*  atomic64_read(.tlb_gen); flush_tlb_others(mm_cpumask());
 */
cpumask_set_cpu(cpu, mm_cpumask(next));
next_tlb_gen = atomic64_read(>context.tlb_gen);
-- 
2.9.4



[PATCH v5 2/2] x86/mm: Improve TLB flush documentation

2017-07-24 Thread Andy Lutomirski
Improve comments as requested by PeterZ and also add some
documentation at the top of the file.

Signed-off-by: Andy Lutomirski 
---
 arch/x86/mm/tlb.c | 43 +--
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index ce104b962a17..d4ee781ca656 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -15,17 +15,24 @@
 #include 
 
 /*
- * TLB flushing, formerly SMP-only
- * c/o Linus Torvalds.
+ * The code in this file handles mm switches and TLB flushes.
  *
- * These mean you can really definitely utterly forget about
- * writing to user space from interrupts. (Its not allowed anyway).
+ * An mm's TLB state is logically represented by a totally ordered sequence
+ * of TLB flushes.  Each flush increments the mm's tlb_gen.
  *
- * Optimizations Manfred Spraul 
+ * Each CPU that might have an mm in its TLB (and that might ever use
+ * those TLB entries) will have an entry for it in its cpu_tlbstate.ctxs
+ * array.  The kernel maintains the following invariant: for each CPU and
+ * for each mm in its cpu_tlbstate.ctxs array, the CPU has performed all
+ * flushes in that mms history up to the tlb_gen in cpu_tlbstate.ctxs
+ * or the CPU has performed an equivalent set of flushes.
  *
- * More scalable flush, from Andi Kleen
- *
- * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+ * For this purpose, an equivalent set is a set that is at least as strong.
+ * So, for example, if the flush history is a full flush at time 1,
+ * a full flush after time 1 is sufficient, but a full flush before time 1
+ * is not.  Similarly, any number of flushes can be replaced by a single
+ * full flush so long as that replacement flush is after all the flushes
+ * that it's replacing.
  */
 
 atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
@@ -138,7 +145,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct 
mm_struct *next,
return;
}
 
-   /* Resume remote flushes and then read tlb_gen. */
+   /*
+* Resume remote flushes and then read tlb_gen.  The
+* implied barrier in atomic64_read() synchronizes
+* with inc_mm_tlb_gen() like this:
+*
+* switch_mm_irqs_off():flush request:
+*  cpumask_set_cpu(...);inc_mm_tlb_gen();
+*  MB   MB
+*  atomic64_read(.tlb_gen); flush_tlb_others(mm_cpumask());
+*/
cpumask_set_cpu(cpu, mm_cpumask(next));
next_tlb_gen = atomic64_read(>context.tlb_gen);
 
@@ -186,7 +202,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct 
mm_struct *next,
VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
 
/*
-* Start remote flushes and then read tlb_gen.
+* Start remote flushes and then read tlb_gen.  As
+* above, the implied barrier in atomic64_read()
+* synchronizes with inc_mm_tlb_gen() like this:
+*
+* switch_mm_irqs_off():flush request:
+*  cpumask_set_cpu(...);inc_mm_tlb_gen();
+*  MB   MB
+*  atomic64_read(.tlb_gen); flush_tlb_others(mm_cpumask());
 */
cpumask_set_cpu(cpu, mm_cpumask(next));
next_tlb_gen = atomic64_read(>context.tlb_gen);
-- 
2.9.4



[PATCH v5 1/2] x86/mm: Try to preserve old TLB entries using PCID

2017-07-24 Thread Andy Lutomirski
PCID is a "process context ID" -- it's what other architectures call
an address space ID.  Every non-global TLB entry is tagged with a
PCID, only TLB entries that match the currently selected PCID are
used, and we can switch PGDs without flushing the TLB.  x86's
PCID is 12 bits.

This is an unorthodox approach to using PCID.  x86's PCID is far too
short to uniquely identify a process, and we can't even really
uniquely identify a running process because there are monster
systems with over 4096 CPUs.  To make matters worse, past attempts
to use all 12 PCID bits have resulted in slowdowns instead of
speedups.

This patch uses PCID differently.  We use a PCID to identify a
recently-used mm on a per-cpu basis.  An mm has no fixed PCID
binding at all; instead, we give it a fresh PCID each time it's
loaded except in cases where we want to preserve the TLB, in which
case we reuse a recent value.

Here are some benchmark results, done on a Skylake laptop at 2.3 GHz
(turbo off, intel_pstate requesting max performance) under KVM with
the guest using idle=poll (to avoid artifacts when bouncing between
CPUs).  I haven't done any real statistics here -- I just ran them
in a loop and picked the fastest results that didn't look like
outliers.  Unpatched means commit a4eb8b993554, so all the
bookkeeping overhead is gone.

ping-pong between two mms on the same CPU using eventfd:
  patched: 1.22µs
  patched, nopcid: 1.33µs
  unpatched:   1.34µs

Same ping-pong, but now touch 512 pages (all zero-page to minimize
cache misses) each iteration.  dTLB misses are measured by
dtlb_load_misses.miss_causes_a_walk:
  patched: 1.8µs  11M  dTLB misses
  patched, nopcid: 6.2µs, 207M dTLB misses
  unpatched:   6.1µs, 190M dTLB misses

Reviewed-by: Nadav Amit 
Signed-off-by: Andy Lutomirski 
---
 arch/x86/include/asm/mmu_context.h |  3 ++
 arch/x86/include/asm/processor-flags.h |  2 +
 arch/x86/include/asm/tlbflush.h| 18 +++-
 arch/x86/mm/init.c |  1 +
 arch/x86/mm/tlb.c  | 80 +++---
 5 files changed, 86 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h 
b/arch/x86/include/asm/mmu_context.h
index 85f6b5575aad..14b3cdccf4f9 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -300,6 +300,9 @@ static inline unsigned long __get_current_cr3_fast(void)
 {
unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
 
+   if (static_cpu_has(X86_FEATURE_PCID))
+   cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+
/* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || !in_atomic());
 
diff --git a/arch/x86/include/asm/processor-flags.h 
b/arch/x86/include/asm/processor-flags.h
index f5d3e50af98c..8a6d89fc9a79 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -36,6 +36,7 @@
 /* Mask off the address space ID and SME encryption bits. */
 #define CR3_ADDR_MASK __sme_clr(0x7000ull)
 #define CR3_PCID_MASK 0xFFFull
+#define CR3_NOFLUSH (1UL << 63)
 #else
 /*
  * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
@@ -43,6 +44,7 @@
  */
 #define CR3_ADDR_MASK 0xull
 #define CR3_PCID_MASK 0ull
+#define CR3_NOFLUSH 0
 #endif
 
 #endif /* _ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6397275008db..d23e61dc0640 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -82,6 +82,12 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
 #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
 #endif
 
+/*
+ * 6 because 6 should be plenty and struct tlb_state will fit in
+ * two cache lines.
+ */
+#define TLB_NR_DYN_ASIDS 6
+
 struct tlb_context {
u64 ctx_id;
u64 tlb_gen;
@@ -95,6 +101,8 @@ struct tlb_state {
 * mode even if we've already switched back to swapper_pg_dir.
 */
struct mm_struct *loaded_mm;
+   u16 loaded_mm_asid;
+   u16 next_asid;
 
/*
 * Access to this CR4 shadow and to H/W CR4 is protected by
@@ -104,7 +112,8 @@ struct tlb_state {
 
/*
 * This is a list of all contexts that might exist in the TLB.
-* Since we don't yet use PCID, there is only one context.
+* There is one per ASID that we use, and the ASID (what the
+* CPU calls PCID) is the index into ctxts.
 *
 * For each context, ctx_id indicates which mm the TLB's user
 * entries came from.  As an invariant, the TLB will never
@@ -114,8 +123,13 @@ struct tlb_state {
 * To be clear, this means that it's legal for the TLB code to
 * flush the TLB without updating tlb_gen.  This can happen
 * (for now, at least) due to paravirt remote flushes.
+*

[PATCH v5 1/2] x86/mm: Try to preserve old TLB entries using PCID

2017-07-24 Thread Andy Lutomirski
PCID is a "process context ID" -- it's what other architectures call
an address space ID.  Every non-global TLB entry is tagged with a
PCID, only TLB entries that match the currently selected PCID are
used, and we can switch PGDs without flushing the TLB.  x86's
PCID is 12 bits.

This is an unorthodox approach to using PCID.  x86's PCID is far too
short to uniquely identify a process, and we can't even really
uniquely identify a running process because there are monster
systems with over 4096 CPUs.  To make matters worse, past attempts
to use all 12 PCID bits have resulted in slowdowns instead of
speedups.

This patch uses PCID differently.  We use a PCID to identify a
recently-used mm on a per-cpu basis.  An mm has no fixed PCID
binding at all; instead, we give it a fresh PCID each time it's
loaded except in cases where we want to preserve the TLB, in which
case we reuse a recent value.

Here are some benchmark results, done on a Skylake laptop at 2.3 GHz
(turbo off, intel_pstate requesting max performance) under KVM with
the guest using idle=poll (to avoid artifacts when bouncing between
CPUs).  I haven't done any real statistics here -- I just ran them
in a loop and picked the fastest results that didn't look like
outliers.  Unpatched means commit a4eb8b993554, so all the
bookkeeping overhead is gone.

ping-pong between two mms on the same CPU using eventfd:
  patched: 1.22µs
  patched, nopcid: 1.33µs
  unpatched:   1.34µs

Same ping-pong, but now touch 512 pages (all zero-page to minimize
cache misses) each iteration.  dTLB misses are measured by
dtlb_load_misses.miss_causes_a_walk:
  patched: 1.8µs  11M  dTLB misses
  patched, nopcid: 6.2µs, 207M dTLB misses
  unpatched:   6.1µs, 190M dTLB misses

Reviewed-by: Nadav Amit 
Signed-off-by: Andy Lutomirski 
---
 arch/x86/include/asm/mmu_context.h |  3 ++
 arch/x86/include/asm/processor-flags.h |  2 +
 arch/x86/include/asm/tlbflush.h| 18 +++-
 arch/x86/mm/init.c |  1 +
 arch/x86/mm/tlb.c  | 80 +++---
 5 files changed, 86 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h 
b/arch/x86/include/asm/mmu_context.h
index 85f6b5575aad..14b3cdccf4f9 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -300,6 +300,9 @@ static inline unsigned long __get_current_cr3_fast(void)
 {
unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
 
+   if (static_cpu_has(X86_FEATURE_PCID))
+   cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+
/* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || !in_atomic());
 
diff --git a/arch/x86/include/asm/processor-flags.h 
b/arch/x86/include/asm/processor-flags.h
index f5d3e50af98c..8a6d89fc9a79 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -36,6 +36,7 @@
 /* Mask off the address space ID and SME encryption bits. */
 #define CR3_ADDR_MASK __sme_clr(0x7000ull)
 #define CR3_PCID_MASK 0xFFFull
+#define CR3_NOFLUSH (1UL << 63)
 #else
 /*
  * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
@@ -43,6 +44,7 @@
  */
 #define CR3_ADDR_MASK 0xull
 #define CR3_PCID_MASK 0ull
+#define CR3_NOFLUSH 0
 #endif
 
 #endif /* _ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6397275008db..d23e61dc0640 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -82,6 +82,12 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
 #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
 #endif
 
+/*
+ * 6 because 6 should be plenty and struct tlb_state will fit in
+ * two cache lines.
+ */
+#define TLB_NR_DYN_ASIDS 6
+
 struct tlb_context {
u64 ctx_id;
u64 tlb_gen;
@@ -95,6 +101,8 @@ struct tlb_state {
 * mode even if we've already switched back to swapper_pg_dir.
 */
struct mm_struct *loaded_mm;
+   u16 loaded_mm_asid;
+   u16 next_asid;
 
/*
 * Access to this CR4 shadow and to H/W CR4 is protected by
@@ -104,7 +112,8 @@ struct tlb_state {
 
/*
 * This is a list of all contexts that might exist in the TLB.
-* Since we don't yet use PCID, there is only one context.
+* There is one per ASID that we use, and the ASID (what the
+* CPU calls PCID) is the index into ctxts.
 *
 * For each context, ctx_id indicates which mm the TLB's user
 * entries came from.  As an invariant, the TLB will never
@@ -114,8 +123,13 @@ struct tlb_state {
 * To be clear, this means that it's legal for the TLB code to
 * flush the TLB without updating tlb_gen.  This can happen
 * (for now, at least) due to paravirt remote flushes.
+*
+* NB: context 0 is a bit 

[PATCH v5 0/2] x86/mm: PCID

2017-07-24 Thread Andy Lutomirski
Here's PCID v5.

Changes from v4:
 - Remove smp_mb__after_atomic() (Peterz)
 - Rebase, which involved tiny fixups due to SME
 - Add the doc patch, as promised

Andy Lutomirski (2):
  x86/mm: Try to preserve old TLB entries using PCID
  x86/mm: Improve TLB flush documentation

 arch/x86/include/asm/mmu_context.h |   3 +
 arch/x86/include/asm/processor-flags.h |   2 +
 arch/x86/include/asm/tlbflush.h|  18 -
 arch/x86/mm/init.c |   1 +
 arch/x86/mm/tlb.c  | 123 ++---
 5 files changed, 119 insertions(+), 28 deletions(-)

-- 
2.9.4



[PATCH v5 0/2] x86/mm: PCID

2017-07-24 Thread Andy Lutomirski
Here's PCID v5.

Changes from v4:
 - Remove smp_mb__after_atomic() (Peterz)
 - Rebase, which involved tiny fixups due to SME
 - Add the doc patch, as promised

Andy Lutomirski (2):
  x86/mm: Try to preserve old TLB entries using PCID
  x86/mm: Improve TLB flush documentation

 arch/x86/include/asm/mmu_context.h |   3 +
 arch/x86/include/asm/processor-flags.h |   2 +
 arch/x86/include/asm/tlbflush.h|  18 -
 arch/x86/mm/init.c |   1 +
 arch/x86/mm/tlb.c  | 123 ++---
 5 files changed, 119 insertions(+), 28 deletions(-)

-- 
2.9.4



Re: [PATCH 1/2] net: ethernet: ti: cpts: convert to use kthread_worker

2017-07-24 Thread Richard Cochran
On Mon, Jul 24, 2017 at 07:34:38PM -0500, Grygorii Strashko wrote:
> Below if pure TBD/RFC version of patch which add kthread worker to PTP core.
> I'm sending it to get you opinion about implementation in general, before 
> continue with more changes. Pls, take a look if you have time?
> - are you ok with names (API, callbacks, ptp structs members)?

The API and naming looks good to me.
 
> I can prepare, update and resend proper patches tom if feedback is positive.

Please do.

> I also can convert dp83640 driver to use new feature, but I can't test it.

No need for that.  It would be enough to have cpts as the first user
and example.
 
> + if (ptp->info->do_aux_work) {
> + struct sched_param param = {
> + .sched_priority = MAX_RT_PRIO - 1 };
> +
> + kthread_init_delayed_work(>aux_work, ptp_aux_kworker);
> + ptp->kworker = kthread_create_worker(0, info->name);
> + if (IS_ERR(ptp->kworker)) {
> + pr_err("failed to create ptp aux_worker task %ld\n",
> +PTR_ERR(ptp->kworker));
> + return ERR_CAST(ptp->kworker);
> + }
> + err = sched_setscheduler_nocheck(ptp->kworker->task,
> +  SCHED_FIFO, );

I think we should not hard code the scheduler and priority here but
rather leave it to the sysadmin to configure these using chrt(1).
After all, a normal work item is has served just in many situations.

> + if (err)
> + pr_err("sched_setscheduler_nocheck err %d\n", err);
> + }
> +
>   err = ptp_populate_pin_groups(ptp);
>   if (err)
>   goto no_pin_groups;
> @@ -274,6 +305,9 @@ int ptp_clock_unregister(struct ptp_clock *ptp)
>   ptp->defunct = 1;
>   wake_up_interruptible(>tsev_wq);
>  
> + kthread_cancel_delayed_work_sync(>aux_work);
> + kthread_destroy_worker(ptp->kworker);

These can't be called unconditionally.

>   /* Release the clock's resources. */
>   if (ptp->pps_source)
>   pps_unregister_source(ptp->pps_source);

Thanks,
Richard


Re: [PATCH 1/2] net: ethernet: ti: cpts: convert to use kthread_worker

2017-07-24 Thread Richard Cochran
On Mon, Jul 24, 2017 at 07:34:38PM -0500, Grygorii Strashko wrote:
> Below if pure TBD/RFC version of patch which add kthread worker to PTP core.
> I'm sending it to get you opinion about implementation in general, before 
> continue with more changes. Pls, take a look if you have time?
> - are you ok with names (API, callbacks, ptp structs members)?

The API and naming looks good to me.
 
> I can prepare, update and resend proper patches tom if feedback is positive.

Please do.

> I also can convert dp83640 driver to use new feature, but I can't test it.

No need for that.  It would be enough to have cpts as the first user
and example.
 
> + if (ptp->info->do_aux_work) {
> + struct sched_param param = {
> + .sched_priority = MAX_RT_PRIO - 1 };
> +
> + kthread_init_delayed_work(>aux_work, ptp_aux_kworker);
> + ptp->kworker = kthread_create_worker(0, info->name);
> + if (IS_ERR(ptp->kworker)) {
> + pr_err("failed to create ptp aux_worker task %ld\n",
> +PTR_ERR(ptp->kworker));
> + return ERR_CAST(ptp->kworker);
> + }
> + err = sched_setscheduler_nocheck(ptp->kworker->task,
> +  SCHED_FIFO, );

I think we should not hard code the scheduler and priority here but
rather leave it to the sysadmin to configure these using chrt(1).
After all, a normal work item is has served just in many situations.

> + if (err)
> + pr_err("sched_setscheduler_nocheck err %d\n", err);
> + }
> +
>   err = ptp_populate_pin_groups(ptp);
>   if (err)
>   goto no_pin_groups;
> @@ -274,6 +305,9 @@ int ptp_clock_unregister(struct ptp_clock *ptp)
>   ptp->defunct = 1;
>   wake_up_interruptible(>tsev_wq);
>  
> + kthread_cancel_delayed_work_sync(>aux_work);
> + kthread_destroy_worker(ptp->kworker);

These can't be called unconditionally.

>   /* Release the clock's resources. */
>   if (ptp->pps_source)
>   pps_unregister_source(ptp->pps_source);

Thanks,
Richard


Re: [PATCH -tip] [BUGFIX] kprobes/x86: Do not jump-optimize kprobes on irq entry code

2017-07-24 Thread kbuild test robot
Hi Masami,

[auto build test ERROR on tip/x86/core]
[also build test ERROR on v4.13-rc2 next-20170724]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Masami-Hiramatsu/kprobes-x86-Do-not-jump-optimize-kprobes-on-irq-entry-code/20170725-121438
config: i386-randconfig-x019-201730 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   arch/x86/kernel/unwind_frame.c: In function 'in_entry_code':
>> arch/x86/kernel/unwind_frame.c:95:14: error: '__irqentry_text_start' 
>> undeclared (first use in this function)
 if (addr >= __irqentry_text_start && addr < __irqentry_text_end)
 ^
   arch/x86/kernel/unwind_frame.c:95:14: note: each undeclared identifier is 
reported only once for each function it appears in
>> arch/x86/kernel/unwind_frame.c:95:46: error: '__irqentry_text_end' 
>> undeclared (first use in this function)
 if (addr >= __irqentry_text_start && addr < __irqentry_text_end)
 ^~~

vim +/__irqentry_text_start +95 arch/x86/kernel/unwind_frame.c

24d86f59 Josh Poimboeuf   2016-10-27   86  
6b4d2348 Masami Hiramatsu 2017-07-24   87  bool in_entry_code(unsigned long ip)
a8b7a923 Josh Poimboeuf   2017-04-12   88  {
a8b7a923 Josh Poimboeuf   2017-04-12   89   char *addr = (char *)ip;
a8b7a923 Josh Poimboeuf   2017-04-12   90  
a8b7a923 Josh Poimboeuf   2017-04-12   91   if (addr >= __entry_text_start 
&& addr < __entry_text_end)
a8b7a923 Josh Poimboeuf   2017-04-12   92   return true;
a8b7a923 Josh Poimboeuf   2017-04-12   93  
6b4d2348 Masami Hiramatsu 2017-07-24   94  #if 
defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) || 
defined(CONFIG_KPROBES)
a8b7a923 Josh Poimboeuf   2017-04-12  @95   if (addr >= 
__irqentry_text_start && addr < __irqentry_text_end)
a8b7a923 Josh Poimboeuf   2017-04-12   96   return true;
a8b7a923 Josh Poimboeuf   2017-04-12   97  #endif
a8b7a923 Josh Poimboeuf   2017-04-12   98  
a8b7a923 Josh Poimboeuf   2017-04-12   99   return false;
a8b7a923 Josh Poimboeuf   2017-04-12  100  }
a8b7a923 Josh Poimboeuf   2017-04-12  101  

:: The code at line 95 was first introduced by commit
:: a8b7a92318b6d7779f6d8e9aa6ba0e3de01a8943 x86/unwind: Silence 
entry-related warnings

:: TO: Josh Poimboeuf <jpoim...@redhat.com>
:: CC: Ingo Molnar <mi...@kernel.org>

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH -tip] [BUGFIX] kprobes/x86: Do not jump-optimize kprobes on irq entry code

2017-07-24 Thread kbuild test robot
Hi Masami,

[auto build test ERROR on tip/x86/core]
[also build test ERROR on v4.13-rc2 next-20170724]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Masami-Hiramatsu/kprobes-x86-Do-not-jump-optimize-kprobes-on-irq-entry-code/20170725-121438
config: i386-randconfig-x019-201730 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   arch/x86/kernel/unwind_frame.c: In function 'in_entry_code':
>> arch/x86/kernel/unwind_frame.c:95:14: error: '__irqentry_text_start' 
>> undeclared (first use in this function)
 if (addr >= __irqentry_text_start && addr < __irqentry_text_end)
 ^
   arch/x86/kernel/unwind_frame.c:95:14: note: each undeclared identifier is 
reported only once for each function it appears in
>> arch/x86/kernel/unwind_frame.c:95:46: error: '__irqentry_text_end' 
>> undeclared (first use in this function)
 if (addr >= __irqentry_text_start && addr < __irqentry_text_end)
 ^~~

vim +/__irqentry_text_start +95 arch/x86/kernel/unwind_frame.c

24d86f59 Josh Poimboeuf   2016-10-27   86  
6b4d2348 Masami Hiramatsu 2017-07-24   87  bool in_entry_code(unsigned long ip)
a8b7a923 Josh Poimboeuf   2017-04-12   88  {
a8b7a923 Josh Poimboeuf   2017-04-12   89   char *addr = (char *)ip;
a8b7a923 Josh Poimboeuf   2017-04-12   90  
a8b7a923 Josh Poimboeuf   2017-04-12   91   if (addr >= __entry_text_start 
&& addr < __entry_text_end)
a8b7a923 Josh Poimboeuf   2017-04-12   92   return true;
a8b7a923 Josh Poimboeuf   2017-04-12   93  
6b4d2348 Masami Hiramatsu 2017-07-24   94  #if 
defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) || 
defined(CONFIG_KPROBES)
a8b7a923 Josh Poimboeuf   2017-04-12  @95   if (addr >= 
__irqentry_text_start && addr < __irqentry_text_end)
a8b7a923 Josh Poimboeuf   2017-04-12   96   return true;
a8b7a923 Josh Poimboeuf   2017-04-12   97  #endif
a8b7a923 Josh Poimboeuf   2017-04-12   98  
a8b7a923 Josh Poimboeuf   2017-04-12   99   return false;
a8b7a923 Josh Poimboeuf   2017-04-12  100  }
a8b7a923 Josh Poimboeuf   2017-04-12  101  

:: The code at line 95 was first introduced by commit
:: a8b7a92318b6d7779f6d8e9aa6ba0e3de01a8943 x86/unwind: Silence 
entry-related warnings

:: TO: Josh Poimboeuf 
:: CC: Ingo Molnar 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH tip/core/rcu 4/5] sys_membarrier: Add expedited option

2017-07-24 Thread Boqun Feng
On Mon, Jul 24, 2017 at 02:58:16PM -0700, Paul E. McKenney wrote:
> The sys_membarrier() system call has proven too slow for some use
> cases, which has prompted users to instead rely on TLB shootdown.
> Although TLB shootdown is much faster, it has the slight disadvantage
> of not working at all on arm and arm64.  This commit therefore adds
> an expedited option to the sys_membarrier() system call.
> 
> Signed-off-by: Paul E. McKenney 
> ---
>  include/uapi/linux/membarrier.h | 11 +++
>  kernel/membarrier.c |  7 ++-
>  2 files changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h
> index e0b108bd2624..ba36d8a6be61 100644
> --- a/include/uapi/linux/membarrier.h
> +++ b/include/uapi/linux/membarrier.h
> @@ -40,6 +40,16 @@
>   *  (non-running threads are de facto in such a
>   *  state). This covers threads from all processes
>   *  running on the system. This command returns 0.
> + * @MEMBARRIER_CMD_SHARED_EXPEDITED:  Execute a memory barrier on all
> + *   running threads, but in an expedited fashion.
> + *  Upon return from system call, the caller thread
> + *  is ensured that all running threads have passed
> + *  through a state where all memory accesses to
> + *  user-space addresses match program order between
> + *  entry to and return from the system call
> + *  (non-running threads are de facto in such a
> + *  state). This covers threads from all processes
> + *  running on the system. This command returns 0.
>   *
>   * Command to be passed to the membarrier system call. The commands need to
>   * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
> @@ -48,6 +58,7 @@
>  enum membarrier_cmd {
>   MEMBARRIER_CMD_QUERY = 0,
>   MEMBARRIER_CMD_SHARED = (1 << 0),
> + MEMBARRIER_CMD_SHARED_EXPEDITED = (2 << 0),

Should this better be "(1 << 1)" ;-)

Regards,
Boqun

>  };
>  
>  #endif /* _UAPI_LINUX_MEMBARRIER_H */
> diff --git a/kernel/membarrier.c b/kernel/membarrier.c
> index 9f9284f37f8d..b749c39bb219 100644
> --- a/kernel/membarrier.c
> +++ b/kernel/membarrier.c
> @@ -22,7 +22,8 @@
>   * Bitmask made from a "or" of all commands within enum membarrier_cmd,
>   * except MEMBARRIER_CMD_QUERY.
>   */
> -#define MEMBARRIER_CMD_BITMASK   (MEMBARRIER_CMD_SHARED)
> +#define MEMBARRIER_CMD_BITMASK   (MEMBARRIER_CMD_SHARED |
> \
> +  MEMBARRIER_CMD_SHARED_EXPEDITED)
>  
>  /**
>   * sys_membarrier - issue memory barriers on a set of threads
> @@ -64,6 +65,10 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
>   if (num_online_cpus() > 1)
>   synchronize_sched();
>   return 0;
> + case MEMBARRIER_CMD_SHARED_EXPEDITED:
> + if (num_online_cpus() > 1)
> + synchronize_sched_expedited();
> + return 0;
>   default:
>   return -EINVAL;
>   }
> -- 
> 2.5.2
> 


signature.asc
Description: PGP signature


Re: [PATCH tip/core/rcu 4/5] sys_membarrier: Add expedited option

2017-07-24 Thread Boqun Feng
On Mon, Jul 24, 2017 at 02:58:16PM -0700, Paul E. McKenney wrote:
> The sys_membarrier() system call has proven too slow for some use
> cases, which has prompted users to instead rely on TLB shootdown.
> Although TLB shootdown is much faster, it has the slight disadvantage
> of not working at all on arm and arm64.  This commit therefore adds
> an expedited option to the sys_membarrier() system call.
> 
> Signed-off-by: Paul E. McKenney 
> ---
>  include/uapi/linux/membarrier.h | 11 +++
>  kernel/membarrier.c |  7 ++-
>  2 files changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h
> index e0b108bd2624..ba36d8a6be61 100644
> --- a/include/uapi/linux/membarrier.h
> +++ b/include/uapi/linux/membarrier.h
> @@ -40,6 +40,16 @@
>   *  (non-running threads are de facto in such a
>   *  state). This covers threads from all processes
>   *  running on the system. This command returns 0.
> + * @MEMBARRIER_CMD_SHARED_EXPEDITED:  Execute a memory barrier on all
> + *   running threads, but in an expedited fashion.
> + *  Upon return from system call, the caller thread
> + *  is ensured that all running threads have passed
> + *  through a state where all memory accesses to
> + *  user-space addresses match program order between
> + *  entry to and return from the system call
> + *  (non-running threads are de facto in such a
> + *  state). This covers threads from all processes
> + *  running on the system. This command returns 0.
>   *
>   * Command to be passed to the membarrier system call. The commands need to
>   * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
> @@ -48,6 +58,7 @@
>  enum membarrier_cmd {
>   MEMBARRIER_CMD_QUERY = 0,
>   MEMBARRIER_CMD_SHARED = (1 << 0),
> + MEMBARRIER_CMD_SHARED_EXPEDITED = (2 << 0),

Should this better be "(1 << 1)" ;-)

Regards,
Boqun

>  };
>  
>  #endif /* _UAPI_LINUX_MEMBARRIER_H */
> diff --git a/kernel/membarrier.c b/kernel/membarrier.c
> index 9f9284f37f8d..b749c39bb219 100644
> --- a/kernel/membarrier.c
> +++ b/kernel/membarrier.c
> @@ -22,7 +22,8 @@
>   * Bitmask made from a "or" of all commands within enum membarrier_cmd,
>   * except MEMBARRIER_CMD_QUERY.
>   */
> -#define MEMBARRIER_CMD_BITMASK   (MEMBARRIER_CMD_SHARED)
> +#define MEMBARRIER_CMD_BITMASK   (MEMBARRIER_CMD_SHARED |
> \
> +  MEMBARRIER_CMD_SHARED_EXPEDITED)
>  
>  /**
>   * sys_membarrier - issue memory barriers on a set of threads
> @@ -64,6 +65,10 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
>   if (num_online_cpus() > 1)
>   synchronize_sched();
>   return 0;
> + case MEMBARRIER_CMD_SHARED_EXPEDITED:
> + if (num_online_cpus() > 1)
> + synchronize_sched_expedited();
> + return 0;
>   default:
>   return -EINVAL;
>   }
> -- 
> 2.5.2
> 


signature.asc
Description: PGP signature


Re: [PATCH] efifb: allow user to disable write combined mapping.

2017-07-24 Thread Dave Airlie
On 19 July 2017 at 00:34, Peter Jones  wrote:
> On Tue, Jul 18, 2017 at 04:09:09PM +1000, Dave Airlie wrote:
>> This patch allows the user to disable write combined mapping
>> of the efifb framebuffer console using an nowc option.
>>
>> A customer noticed major slowdowns while logging to the console
>> with write combining enabled, on other tasks running on the same
>> CPU. (10x or greater slow down on all other cores on the same CPU
>> as is doing the logging).
>>
>> I reproduced this on a machine with dual CPUs.
>> Intel(R) Xeon(R) CPU E5-2609 v3 @ 1.90GHz (6 core)
>>
>> I wrote a test that just mmaps the pci bar and writes to it in
>> a loop, while this was running in the background one a single
>> core with (taskset -c 1), building a kernel up to init/version.o
>> (taskset -c 8) went from 13s to 133s or so. I've yet to explain
>> why this occurs or what is going wrong I haven't managed to find
>> a perf command that in any way gives insight into this.
>>
>> 11,885,070,715  instructions  #1.39  insns per cycle
>> vs
>> 12,082,592,342  instructions  #0.13  insns per cycle
>>
>> is the only thing I've spotted of interest, I've tried at least:
>> dTLB-stores,dTLB-store-misses,L1-dcache-stores,LLC-store,LLC-store-misses,LLC-load-misses,LLC-loads,\mem-loads,mem-stores,iTLB-loads,iTLB-load-misses,cache-references,cache-misses
>>
>> For now it seems at least a good idea to allow a user to disable write
>> combining if they see this until we can figure it out.
>
> Well, that's kind of amazing, given 3c004b4f7eab239e switched us /to/
> using ioremap_wc() for the exact same reason.  I'm not against letting
> the user force one way or the other if it helps, though it sure would be
> nice to know why.
>
> Anyway,
>
> Acked-By: Peter Jones 
>
> Bartlomiej, do you want to handle this in your devel tree?

I'm happy to stick this in a drm-fixes pull with this ack.

Dave.


Re: [PATCH] efifb: allow user to disable write combined mapping.

2017-07-24 Thread Dave Airlie
On 19 July 2017 at 00:34, Peter Jones  wrote:
> On Tue, Jul 18, 2017 at 04:09:09PM +1000, Dave Airlie wrote:
>> This patch allows the user to disable write combined mapping
>> of the efifb framebuffer console using an nowc option.
>>
>> A customer noticed major slowdowns while logging to the console
>> with write combining enabled, on other tasks running on the same
>> CPU. (10x or greater slow down on all other cores on the same CPU
>> as is doing the logging).
>>
>> I reproduced this on a machine with dual CPUs.
>> Intel(R) Xeon(R) CPU E5-2609 v3 @ 1.90GHz (6 core)
>>
>> I wrote a test that just mmaps the pci bar and writes to it in
>> a loop, while this was running in the background one a single
>> core with (taskset -c 1), building a kernel up to init/version.o
>> (taskset -c 8) went from 13s to 133s or so. I've yet to explain
>> why this occurs or what is going wrong I haven't managed to find
>> a perf command that in any way gives insight into this.
>>
>> 11,885,070,715  instructions  #1.39  insns per cycle
>> vs
>> 12,082,592,342  instructions  #0.13  insns per cycle
>>
>> is the only thing I've spotted of interest, I've tried at least:
>> dTLB-stores,dTLB-store-misses,L1-dcache-stores,LLC-store,LLC-store-misses,LLC-load-misses,LLC-loads,\mem-loads,mem-stores,iTLB-loads,iTLB-load-misses,cache-references,cache-misses
>>
>> For now it seems at least a good idea to allow a user to disable write
>> combining if they see this until we can figure it out.
>
> Well, that's kind of amazing, given 3c004b4f7eab239e switched us /to/
> using ioremap_wc() for the exact same reason.  I'm not against letting
> the user force one way or the other if it helps, though it sure would be
> nice to know why.
>
> Anyway,
>
> Acked-By: Peter Jones 
>
> Bartlomiej, do you want to handle this in your devel tree?

I'm happy to stick this in a drm-fixes pull with this ack.

Dave.


Re: [PATCH] xen: allocate page for shared info page from low memory

2017-07-24 Thread Boris Ostrovsky



On 07/23/2017 04:25 PM, Boris Ostrovsky wrote:



On 06/14/2017 01:11 PM, Juergen Gross wrote:

On 14/06/17 18:58, Boris Ostrovsky wrote:

On 06/12/2017 07:53 AM, Juergen Gross wrote:

In a HVM guest the kernel allocates the page for mapping the shared
info structure via extend_brk() today. This will lead to a drop of
performance as the underlying EPT entry will have to be split up into
4kB entries as the single shared info page is located in hypervisor
memory.

The issue has been detected by using the libmicro munmap test:
unmapping 8kB of memory was faster by nearly a factor of two when no
pv interfaces were active in the HVM guest.

So instead of taking a page from memory which might be mapped via
large EPT entries use a page which is already mapped via a 4kB EPT
entry: we can take a page from the first 1MB of memory as the video
memory at 640kB disallows using larger EPT entries.

Signed-off-by: Juergen Gross 
---
  arch/x86/xen/enlighten_hvm.c | 31 ---
  arch/x86/xen/enlighten_pv.c  |  2 --
  2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/arch/x86/xen/enlighten_hvm.c 
b/arch/x86/xen/enlighten_hvm.c

index a6d014f47e52..c19477b6e43a 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -1,5 +1,6 @@
  #include 
  #include 
+#include 
  #include 
  #include 
@@ -10,9 +11,11 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
+#include 
  #include "xen-ops.h"
  #include "mmu.h"
@@ -22,20 +25,34 @@ void __ref xen_hvm_init_shared_info(void)
  {
  int cpu;
  struct xen_add_to_physmap xatp;
-static struct shared_info *shared_info_page;
+u64 pa;
+
+if (HYPERVISOR_shared_info == _dummy_shared_info) {
+/*
+ * Search for a free page starting at 4kB physical address.
+ * Low memory is preferred to avoid an EPT large page split up
+ * by the mapping.
+ * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
+ * the BIOS used for HVM guests is well behaved and won't
+ * clobber memory other than the first 4kB.
+ */
+for (pa = PAGE_SIZE;
+ !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
+ memblock_is_reserved(pa);
+ pa += PAGE_SIZE)
+;


Is it possible to never find a page here?


Only if there is no memory available at all. :-)

TBH: I expect this to _always_ succeed at the first loop iteration.


This patch seems to break (64-bit only) guests on dumpdata here. No 
problems on other machines.


So far all I know is that we did get the first page (0x1000) but not 
much more. I will poke at this more on Monday.



So the problem is due to KASLR --- we can't use __va() before 
kernel_randomize_memory() is called since it will change __PAGE_OFFSET. 
(Setting CONFIG_RANDOMIZE_BASE will cause failure.)



-boris


Re: [PATCH] xen: allocate page for shared info page from low memory

2017-07-24 Thread Boris Ostrovsky



On 07/23/2017 04:25 PM, Boris Ostrovsky wrote:



On 06/14/2017 01:11 PM, Juergen Gross wrote:

On 14/06/17 18:58, Boris Ostrovsky wrote:

On 06/12/2017 07:53 AM, Juergen Gross wrote:

In a HVM guest the kernel allocates the page for mapping the shared
info structure via extend_brk() today. This will lead to a drop of
performance as the underlying EPT entry will have to be split up into
4kB entries as the single shared info page is located in hypervisor
memory.

The issue has been detected by using the libmicro munmap test:
unmapping 8kB of memory was faster by nearly a factor of two when no
pv interfaces were active in the HVM guest.

So instead of taking a page from memory which might be mapped via
large EPT entries use a page which is already mapped via a 4kB EPT
entry: we can take a page from the first 1MB of memory as the video
memory at 640kB disallows using larger EPT entries.

Signed-off-by: Juergen Gross 
---
  arch/x86/xen/enlighten_hvm.c | 31 ---
  arch/x86/xen/enlighten_pv.c  |  2 --
  2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/arch/x86/xen/enlighten_hvm.c 
b/arch/x86/xen/enlighten_hvm.c

index a6d014f47e52..c19477b6e43a 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -1,5 +1,6 @@
  #include 
  #include 
+#include 
  #include 
  #include 
@@ -10,9 +11,11 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
+#include 
  #include "xen-ops.h"
  #include "mmu.h"
@@ -22,20 +25,34 @@ void __ref xen_hvm_init_shared_info(void)
  {
  int cpu;
  struct xen_add_to_physmap xatp;
-static struct shared_info *shared_info_page;
+u64 pa;
+
+if (HYPERVISOR_shared_info == _dummy_shared_info) {
+/*
+ * Search for a free page starting at 4kB physical address.
+ * Low memory is preferred to avoid an EPT large page split up
+ * by the mapping.
+ * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
+ * the BIOS used for HVM guests is well behaved and won't
+ * clobber memory other than the first 4kB.
+ */
+for (pa = PAGE_SIZE;
+ !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
+ memblock_is_reserved(pa);
+ pa += PAGE_SIZE)
+;


Is it possible to never find a page here?


Only if there is no memory available at all. :-)

TBH: I expect this to _always_ succeed at the first loop iteration.


This patch seems to break (64-bit only) guests on dumpdata here. No 
problems on other machines.


So far all I know is that we did get the first page (0x1000) but not 
much more. I will poke at this more on Monday.



So the problem is due to KASLR --- we can't use __va() before 
kernel_randomize_memory() is called since it will change __PAGE_OFFSET. 
(Setting CONFIG_RANDOMIZE_BASE will cause failure.)



-boris


Re: [PATCH] mm: Always flush VMA ranges affected by zap_page_range

2017-07-24 Thread kbuild test robot
Hi Mel,

[auto build test ERROR on mmotm/master]
[also build test ERROR on v4.13-rc2 next-20170724]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Mel-Gorman/mm-Always-flush-VMA-ranges-affected-by-zap_page_range/20170725-102436
base:   git://git.cmpxchg.org/linux-mmotm.git master
config: ia64-allyesconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 6.2.0
reproduce:
wget 
https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=ia64 

All errors (new ones prefixed by >>):

   mm/memory.c: In function 'zap_page_range':
>> mm/memory.c:1497:3: error: implicit declaration of function 
>> '__tlb_adjust_range' [-Werror=implicit-function-declaration]
  __tlb_adjust_range(, start, end);
  ^~
   cc1: some warnings being treated as errors

vim +/__tlb_adjust_range +1497 mm/memory.c

  1466  
  1467  /**
  1468   * zap_page_range - remove user pages in a given range
  1469   * @vma: vm_area_struct holding the applicable pages
  1470   * @start: starting address of pages to zap
  1471   * @size: number of bytes to zap
  1472   *
  1473   * Caller must protect the VMA list
  1474   */
  1475  void zap_page_range(struct vm_area_struct *vma, unsigned long start,
  1476  unsigned long size)
  1477  {
  1478  struct mm_struct *mm = vma->vm_mm;
  1479  struct mmu_gather tlb;
  1480  unsigned long end = start + size;
  1481  
  1482  lru_add_drain();
  1483  tlb_gather_mmu(, mm, start, end);
  1484  update_hiwater_rss(mm);
  1485  mmu_notifier_invalidate_range_start(mm, start, end);
  1486  for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
  1487  unmap_single_vma(, vma, start, end, NULL);
  1488  
  1489  /*
  1490   * zap_page_range does not specify whether mmap_sem 
should be
  1491   * held for read or write. That allows parallel 
zap_page_range
  1492   * operations to unmap a PTE and defer a flush meaning 
that
  1493   * this call observes pte_none and fails to flush the 
TLB.
  1494   * Rather than adding a complex API, ensure that no 
stale
  1495   * TLB entries exist when this call returns.
  1496   */
> 1497  __tlb_adjust_range(, start, end);
  1498  }
  1499  
  1500  mmu_notifier_invalidate_range_end(mm, start, end);
  1501  tlb_finish_mmu(, start, end);
  1502  }
  1503  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH] mm: Always flush VMA ranges affected by zap_page_range

2017-07-24 Thread kbuild test robot
Hi Mel,

[auto build test ERROR on mmotm/master]
[also build test ERROR on v4.13-rc2 next-20170724]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Mel-Gorman/mm-Always-flush-VMA-ranges-affected-by-zap_page_range/20170725-102436
base:   git://git.cmpxchg.org/linux-mmotm.git master
config: ia64-allyesconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 6.2.0
reproduce:
wget 
https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=ia64 

All errors (new ones prefixed by >>):

   mm/memory.c: In function 'zap_page_range':
>> mm/memory.c:1497:3: error: implicit declaration of function 
>> '__tlb_adjust_range' [-Werror=implicit-function-declaration]
  __tlb_adjust_range(, start, end);
  ^~
   cc1: some warnings being treated as errors

vim +/__tlb_adjust_range +1497 mm/memory.c

  1466  
  1467  /**
  1468   * zap_page_range - remove user pages in a given range
  1469   * @vma: vm_area_struct holding the applicable pages
  1470   * @start: starting address of pages to zap
  1471   * @size: number of bytes to zap
  1472   *
  1473   * Caller must protect the VMA list
  1474   */
  1475  void zap_page_range(struct vm_area_struct *vma, unsigned long start,
  1476  unsigned long size)
  1477  {
  1478  struct mm_struct *mm = vma->vm_mm;
  1479  struct mmu_gather tlb;
  1480  unsigned long end = start + size;
  1481  
  1482  lru_add_drain();
  1483  tlb_gather_mmu(, mm, start, end);
  1484  update_hiwater_rss(mm);
  1485  mmu_notifier_invalidate_range_start(mm, start, end);
  1486  for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
  1487  unmap_single_vma(, vma, start, end, NULL);
  1488  
  1489  /*
  1490   * zap_page_range does not specify whether mmap_sem 
should be
  1491   * held for read or write. That allows parallel 
zap_page_range
  1492   * operations to unmap a PTE and defer a flush meaning 
that
  1493   * this call observes pte_none and fails to flush the 
TLB.
  1494   * Rather than adding a complex API, ensure that no 
stale
  1495   * TLB entries exist when this call returns.
  1496   */
> 1497  __tlb_adjust_range(, start, end);
  1498  }
  1499  
  1500  mmu_notifier_invalidate_range_end(mm, start, end);
  1501  tlb_finish_mmu(, start, end);
  1502  }
  1503  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH v2] ovl: drop CAP_SYS_RESOURCE from saved mounter's credentials

2017-07-24 Thread Greg KH
On Mon, Jul 24, 2017 at 10:15:22AM +0200, Miklos Szeredi wrote:
> On Sat, Jul 22, 2017 at 11:30 AM, Amir Goldstein  wrote:
> > Bumped into this patch (Now upstream commit 51f8f3c4e225) and realized
> > it is missing cc: stable # v4.8
> >
> > At least this docker PR suggests that regression introduced in v4.8 will 
> > not be
> > appreciated down the road:
> > https://github.com/moby/moby/issues/29364
> 
> Greg,
> 
> Can you please queue 51f8f3c4e225 ("ovl: drop CAP_SYS_RESOURCE from
> saved mounter's credentials") for 4.9.y?

Now queued up, thanks.

greg k-h


Re: [PATCH v2] ovl: drop CAP_SYS_RESOURCE from saved mounter's credentials

2017-07-24 Thread Greg KH
On Mon, Jul 24, 2017 at 10:15:22AM +0200, Miklos Szeredi wrote:
> On Sat, Jul 22, 2017 at 11:30 AM, Amir Goldstein  wrote:
> > Bumped into this patch (Now upstream commit 51f8f3c4e225) and realized
> > it is missing cc: stable # v4.8
> >
> > At least this docker PR suggests that regression introduced in v4.8 will 
> > not be
> > appreciated down the road:
> > https://github.com/moby/moby/issues/29364
> 
> Greg,
> 
> Can you please queue 51f8f3c4e225 ("ovl: drop CAP_SYS_RESOURCE from
> saved mounter's credentials") for 4.9.y?

Now queued up, thanks.

greg k-h


Re: RCU stall warnings...

2017-07-24 Thread Stephen Rothwell
Hi Dave,

On Mon, 24 Jul 2017 16:34:58 -0700 (PDT) David Miller  
wrote:
>
> Shoing my ignorance as well, after reading this, for some reason this
> commit below sticks out to me.  Maybe I should do a bisect and see if
> it lands on this commit.
> 
> That would take a while as it's hard to forcibly set this thing off.

For a quick test, just try with the parent of that commit and (if that
is "good") then with that commit itself ...

-- 
Cheers,
Stephen Rothwell


Re: RCU stall warnings...

2017-07-24 Thread Stephen Rothwell
Hi Dave,

On Mon, 24 Jul 2017 16:34:58 -0700 (PDT) David Miller  
wrote:
>
> Shoing my ignorance as well, after reading this, for some reason this
> commit below sticks out to me.  Maybe I should do a bisect and see if
> it lands on this commit.
> 
> That would take a while as it's hard to forcibly set this thing off.

For a quick test, just try with the parent of that commit and (if that
is "good") then with that commit itself ...

-- 
Cheers,
Stephen Rothwell


Re: [PATCH 2/2] dma: Add Spreadtrum DMA controller driver

2017-07-24 Thread Vinod Koul
On Mon, Jul 24, 2017 at 02:46:00PM +0800, Baolin Wang wrote:
> Hi,
> 
> On 六,  7月 22, 2017 at 01:27:31下午 +0530, Vinod Koul wrote:
> > On Tue, Jul 18, 2017 at 03:06:12PM +0800, Baolin Wang wrote:

> > > +static void sprd_dma_set_uid(struct sprd_dma_chn *mchan)
> > > +{
> > > + struct sprd_dma_dev *sdev = to_sprd_dma_dev(>chan);
> > > + u32 dev_id = mchan->dev_id;
> > > +
> > > + if (dev_id != DMA_SOFTWARE_UID)
> > 
> > Whats a UID?
> 
> It is for users, every user was assigned one unique hardware ID.
> Then the user can trigger the DMA to transfer by the user ID.

sounds like a slave id to me (hint read again struct dma_slave_config)

> > > + u32 fragmens_len;
> > > + u32 block_len;
> > 
> > oh please, I think I will stop here now :(
> > 
> > > + u32 transcation_len;
> > > + u32 src_step;
> > > + u32 des_step;
> > > + u32 src_frag_step;
> > > + u32 dst_frag_step;
> > > + u32 src_blk_step;
> > > + u32 dst_blk_step;
> > > + u32 wrap_ptr;
> > > + u32 wrap_to;
> > > + u32 dev_id;
> > > + enum dma_end_type is_end;
> > 
> > Looking at this I think these are overkill, many of them can be handled
> > properly by current dmaengine interfaces, so please use those before you
> > invent your own...
> > 
> > Also the code is bloated because you don't use virt-dma, pls use that. I
> > skipped many parts of the driver as I feel driver needs more work.
> 
> OK. I will check the virt-dma. Thanks for your commnets.

Ok, but the bigger concern is that people have defined generic interfaces
for everyone to use, so you should also use them, the hw doesn't seem
anything special which cannot be accommodated in the current fwk, if not do
tell me which parts don't fit before you invent your own interfaces...

-- 
~Vinod


Re: [PATCH 2/2] dma: Add Spreadtrum DMA controller driver

2017-07-24 Thread Vinod Koul
On Mon, Jul 24, 2017 at 02:46:00PM +0800, Baolin Wang wrote:
> Hi,
> 
> On 六,  7月 22, 2017 at 01:27:31下午 +0530, Vinod Koul wrote:
> > On Tue, Jul 18, 2017 at 03:06:12PM +0800, Baolin Wang wrote:

> > > +static void sprd_dma_set_uid(struct sprd_dma_chn *mchan)
> > > +{
> > > + struct sprd_dma_dev *sdev = to_sprd_dma_dev(>chan);
> > > + u32 dev_id = mchan->dev_id;
> > > +
> > > + if (dev_id != DMA_SOFTWARE_UID)
> > 
> > Whats a UID?
> 
> It is for users, every user was assigned one unique hardware ID.
> Then the user can trigger the DMA to transfer by the user ID.

sounds like a slave id to me (hint read again struct dma_slave_config)

> > > + u32 fragmens_len;
> > > + u32 block_len;
> > 
> > oh please, I think I will stop here now :(
> > 
> > > + u32 transcation_len;
> > > + u32 src_step;
> > > + u32 des_step;
> > > + u32 src_frag_step;
> > > + u32 dst_frag_step;
> > > + u32 src_blk_step;
> > > + u32 dst_blk_step;
> > > + u32 wrap_ptr;
> > > + u32 wrap_to;
> > > + u32 dev_id;
> > > + enum dma_end_type is_end;
> > 
> > Looking at this I think these are overkill, many of them can be handled
> > properly by current dmaengine interfaces, so please use those before you
> > invent your own...
> > 
> > Also the code is bloated because you don't use virt-dma, pls use that. I
> > skipped many parts of the driver as I feel driver needs more work.
> 
> OK. I will check the virt-dma. Thanks for your commnets.

Ok, but the bigger concern is that people have defined generic interfaces
for everyone to use, so you should also use them, the hw doesn't seem
anything special which cannot be accommodated in the current fwk, if not do
tell me which parts don't fit before you invent your own interfaces...

-- 
~Vinod


Re: [PATCH 3/4] gpio: davinci: Add a separate compatible for keystone-k2g soc

2017-07-24 Thread Keerthy


On Monday 24 July 2017 11:54 PM, Rob Herring wrote:
> On Tue, Jul 18, 2017 at 04:27:15PM +0530, Keerthy wrote:
>> Add a separate compatible for keystone-k2g soc
>>
>> Signed-off-by: Keerthy 
>> ---
>>  Documentation/devicetree/bindings/gpio/gpio-davinci.txt | 3 ++-
>>  drivers/gpio/gpio-davinci.c | 1 +
>>  2 files changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/Documentation/devicetree/bindings/gpio/gpio-davinci.txt 
>> b/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
>> index 5079ba7..1a5c1a2 100644
>> --- a/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
>> +++ b/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
>> @@ -1,7 +1,8 @@
>>  Davinci/Keystone GPIO controller bindings
>>  
>>  Required Properties:
>> -- compatible: should be "ti,dm6441-gpio", "ti,keystone-gpio"
>> +- compatible: should be "ti,dm6441-gpio", "ti,keystone-gpio",
>> +  "ti,keystone-k2g-gpio"
> 
> Reformat to one valid combination per line.

Sure Rob. Thanks for reviewing.

> 
>>  
>>  - reg: Physical base address of the controller and the size of memory mapped
>> registers.
>> diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
>> index 932f270..a8d8dd9 100644
>> --- a/drivers/gpio/gpio-davinci.c
>> +++ b/drivers/gpio/gpio-davinci.c
>> @@ -610,6 +610,7 @@ static int davinci_gpio_irq_setup(struct platform_device 
>> *pdev)
>>  static const struct of_device_id davinci_gpio_ids[] = {
>>  { .compatible = "ti,keystone-gpio", keystone_gpio_get_irq_chip},
>>  { .compatible = "ti,dm6441-gpio", davinci_gpio_get_irq_chip},
>> +{ .compatible = "ti,keystone-k2g-gpio", keystone_gpio_get_irq_chip},
>>  { /* sentinel */ },
>>  };
>>  MODULE_DEVICE_TABLE(of, davinci_gpio_ids);
>> -- 
>> 1.9.1
>>


Re: [PATCH 3/4] gpio: davinci: Add a separate compatible for keystone-k2g soc

2017-07-24 Thread Keerthy


On Monday 24 July 2017 11:54 PM, Rob Herring wrote:
> On Tue, Jul 18, 2017 at 04:27:15PM +0530, Keerthy wrote:
>> Add a separate compatible for keystone-k2g soc
>>
>> Signed-off-by: Keerthy 
>> ---
>>  Documentation/devicetree/bindings/gpio/gpio-davinci.txt | 3 ++-
>>  drivers/gpio/gpio-davinci.c | 1 +
>>  2 files changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/Documentation/devicetree/bindings/gpio/gpio-davinci.txt 
>> b/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
>> index 5079ba7..1a5c1a2 100644
>> --- a/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
>> +++ b/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
>> @@ -1,7 +1,8 @@
>>  Davinci/Keystone GPIO controller bindings
>>  
>>  Required Properties:
>> -- compatible: should be "ti,dm6441-gpio", "ti,keystone-gpio"
>> +- compatible: should be "ti,dm6441-gpio", "ti,keystone-gpio",
>> +  "ti,keystone-k2g-gpio"
> 
> Reformat to one valid combination per line.

Sure Rob. Thanks for reviewing.

> 
>>  
>>  - reg: Physical base address of the controller and the size of memory mapped
>> registers.
>> diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
>> index 932f270..a8d8dd9 100644
>> --- a/drivers/gpio/gpio-davinci.c
>> +++ b/drivers/gpio/gpio-davinci.c
>> @@ -610,6 +610,7 @@ static int davinci_gpio_irq_setup(struct platform_device 
>> *pdev)
>>  static const struct of_device_id davinci_gpio_ids[] = {
>>  { .compatible = "ti,keystone-gpio", keystone_gpio_get_irq_chip},
>>  { .compatible = "ti,dm6441-gpio", davinci_gpio_get_irq_chip},
>> +{ .compatible = "ti,keystone-k2g-gpio", keystone_gpio_get_irq_chip},
>>  { /* sentinel */ },
>>  };
>>  MODULE_DEVICE_TABLE(of, davinci_gpio_ids);
>> -- 
>> 1.9.1
>>


Re: [lkp-robot] [x86/refcount] b631e535c6: WARNING:at_net/netlink/af_netlink.c:#netlink_sock_destruct

2017-07-24 Thread Kees Cook
On Mon, Jul 24, 2017 at 6:03 AM, Hans Liljestrand
 wrote:
> On Sun, Jul 23, 2017 at 08:52:53PM -0700, Kees Cook wrote:
>>
>> Is 14afee4b6092f ("net: convert sock.sk_wmem_alloc from atomic_t to
>> refcount_t") correct? That looks like a statistics counter, not a
>> refcounter? I can't quite tell, though...
>
>
> Hmm, yes, it looks a bit weird, but it is used in a refcount fashion here:
>
> void sk_free(struct sock *sk)
> {
> /*
>  * We subtract one from sk_wmem_alloc and can know if
>  * some packets are still in some tx queue.
>  * If not null, sock_wfree() will call __sk_free(sk) later
>  */
> if (refcount_dec_and_test(>sk_wmem_alloc))
> __sk_free(sk);
> }
> http://elixir.free-electrons.com/linux/v4.13-rc1/source/net/core/sock.c#L1605

Ah yeah, there it is. Hrmpf. Something is triggering WARNs, though...
I wonder if this can get examined more closely?

Also, why not atomic->refcount for sk_rmem_alloc?

-Kees

>
> And here:
>
> if (refcount_sub_and_test(len, >sk_wmem_alloc))
> __sk_free(sk);
> }
> http://elixir.free-electrons.com/linux/v4.13-rc1/source/net/core/sock.c#L1798
>
>>
>> I think this WARN is from:
>>
>> WARN_ON(refcount_read(>sk_wmem_alloc));
>
>
> I looked through the commit and couldn't find any direct conversion issues.
> Although I guess it is debatable whether refcoun_t should be used in this
> kind of less conventional case.
>
> The only potential problem I noticed was that based on the following change
> (or rather the original code) it seems like sk_wmem_alloc could sometimes be
> negative. I'm not familiar enough with the code to say whether that really
> is the case.
>
> --- a/drivers/atm/fore200e.c
> +++ b/drivers/atm/fore200e.c
> @@ -924,12 +924,7 @@ fore200e_tx_irq(struct fore200e* fore200e)
>else {
>dev_kfree_skb_any(entry->skb);
>}
> -#if 1
> -   /* race fixed by the above incarnation mechanism, but... */
> -   if (atomic_read(_atm(vcc)->sk_wmem_alloc) < 0) {
> -   atomic_set(_atm(vcc)->sk_wmem_alloc, 0);
> -   }
> -#endif
> +
>/* check error condition */
>if (*entry->status & STATUS_ERROR)
>atomic_inc(>stats->tx_err);
>
> But if this is actually supposed to legitimately happen and eventually
> "balance out" refcount_t might not work here. On the other hand this should
> have triggered an earlier WARN_ON already, so it doesn't seem to be the
> issue here?
>
> Regards,
> -hans
>
>
>>
>> -Kees
>>
>> On Sun, Jul 23, 2017 at 7:13 PM, kernel test robot
>>  wrote:
>>>
>>>
>>> FYI, we noticed the following commit:
>>>
>>> commit: b631e535c61d7ddbb7ebac545f729ca9b3b6d70e ("x86/refcount:
>>> Implement fast refcount overflow protection")
>>> https://git.kernel.org/cgit/linux/kernel/git/kees/linux.git
>>> kspp/fast-refcount/ud/v6
>>>
>>> in testcase: boot
>>>
>>> on test machine: qemu-system-x86_64 -enable-kvm -smp 2 -m 512M
>>>
>>> caused below changes (please refer to attached dmesg/kmsg for entire
>>> log/backtrace):
>>>
>>>
>>>
>>> ++++
>>> || 561ee9566e
>>> | b631e535c6 |
>>>
>>> ++++
>>> | boot_successes | 37
>>> | 0  |
>>> | boot_failures  | 0
>>> | 4  |
>>> | WARNING:at_net/netlink/af_netlink.c:#netlink_sock_destruct | 0
>>> | 4  |
>>>
>>> ++++
>>>
>>>
>>>
>>> [   36.991339] WARNING: CPU: 0 PID: 280 at net/netlink/af_netlink.c:374
>>> netlink_sock_destruct+0x1ea/0x200
>>> [   36.994035] Modules linked in:
>>> [   36.994815] CPU: 0 PID: 280 Comm: sh Not tainted
>>> 4.13.0-rc1-3-gb631e53 #1
>>> [   36.996546] task: 88001448c180 task.stack: c94e
>>> [   36.998006] RIP: 0010:netlink_sock_destruct+0x1ea/0x200
>>> [   36.999290] RSP: 0018:82433de0 EFLAGS: 00010206
>>> [   37.000591] RAX: 88001448c180 RBX: 880016a3d000 RCX:
>>> 
>>> [   37.002319] RDX: 0100 RSI: 0001 RDI:
>>> 82796f48
>>> [   37.004061] RBP: 82433df0 R08:  R09:
>>> 
>>> [   37.005780] R10: 0001 R11: 0001 R12:
>>> 0001
>>> [   37.007528] R13: 81cd4a00 R14: 96e49674e09954cf R15:
>>> 001f
>>> [   37.009261] FS:  () GS:8243()
>>> knlGS:
>>> [   37.011233] CS:  0010 DS:  ES:  CR0: 80050033
>>> [   37.012629] CR2: 7f268a96e688 CR3: 159ff000 CR4:
>>> 06b0
>>> [  

Re: [lkp-robot] [x86/refcount] b631e535c6: WARNING:at_net/netlink/af_netlink.c:#netlink_sock_destruct

2017-07-24 Thread Kees Cook
On Mon, Jul 24, 2017 at 6:03 AM, Hans Liljestrand
 wrote:
> On Sun, Jul 23, 2017 at 08:52:53PM -0700, Kees Cook wrote:
>>
>> Is 14afee4b6092f ("net: convert sock.sk_wmem_alloc from atomic_t to
>> refcount_t") correct? That looks like a statistics counter, not a
>> refcounter? I can't quite tell, though...
>
>
> Hmm, yes, it looks a bit weird, but it is used in a refcount fashion here:
>
> void sk_free(struct sock *sk)
> {
> /*
>  * We subtract one from sk_wmem_alloc and can know if
>  * some packets are still in some tx queue.
>  * If not null, sock_wfree() will call __sk_free(sk) later
>  */
> if (refcount_dec_and_test(>sk_wmem_alloc))
> __sk_free(sk);
> }
> http://elixir.free-electrons.com/linux/v4.13-rc1/source/net/core/sock.c#L1605

Ah yeah, there it is. Hrmpf. Something is triggering WARNs, though...
I wonder if this can get examined more closely?

Also, why not atomic->refcount for sk_rmem_alloc?

-Kees

>
> And here:
>
> if (refcount_sub_and_test(len, >sk_wmem_alloc))
> __sk_free(sk);
> }
> http://elixir.free-electrons.com/linux/v4.13-rc1/source/net/core/sock.c#L1798
>
>>
>> I think this WARN is from:
>>
>> WARN_ON(refcount_read(>sk_wmem_alloc));
>
>
> I looked through the commit and couldn't find any direct conversion issues.
> Although I guess it is debatable whether refcoun_t should be used in this
> kind of less conventional case.
>
> The only potential problem I noticed was that based on the following change
> (or rather the original code) it seems like sk_wmem_alloc could sometimes be
> negative. I'm not familiar enough with the code to say whether that really
> is the case.
>
> --- a/drivers/atm/fore200e.c
> +++ b/drivers/atm/fore200e.c
> @@ -924,12 +924,7 @@ fore200e_tx_irq(struct fore200e* fore200e)
>else {
>dev_kfree_skb_any(entry->skb);
>}
> -#if 1
> -   /* race fixed by the above incarnation mechanism, but... */
> -   if (atomic_read(_atm(vcc)->sk_wmem_alloc) < 0) {
> -   atomic_set(_atm(vcc)->sk_wmem_alloc, 0);
> -   }
> -#endif
> +
>/* check error condition */
>if (*entry->status & STATUS_ERROR)
>atomic_inc(>stats->tx_err);
>
> But if this is actually supposed to legitimately happen and eventually
> "balance out" refcount_t might not work here. On the other hand this should
> have triggered an earlier WARN_ON already, so it doesn't seem to be the
> issue here?
>
> Regards,
> -hans
>
>
>>
>> -Kees
>>
>> On Sun, Jul 23, 2017 at 7:13 PM, kernel test robot
>>  wrote:
>>>
>>>
>>> FYI, we noticed the following commit:
>>>
>>> commit: b631e535c61d7ddbb7ebac545f729ca9b3b6d70e ("x86/refcount:
>>> Implement fast refcount overflow protection")
>>> https://git.kernel.org/cgit/linux/kernel/git/kees/linux.git
>>> kspp/fast-refcount/ud/v6
>>>
>>> in testcase: boot
>>>
>>> on test machine: qemu-system-x86_64 -enable-kvm -smp 2 -m 512M
>>>
>>> caused below changes (please refer to attached dmesg/kmsg for entire
>>> log/backtrace):
>>>
>>>
>>>
>>> ++++
>>> || 561ee9566e
>>> | b631e535c6 |
>>>
>>> ++++
>>> | boot_successes | 37
>>> | 0  |
>>> | boot_failures  | 0
>>> | 4  |
>>> | WARNING:at_net/netlink/af_netlink.c:#netlink_sock_destruct | 0
>>> | 4  |
>>>
>>> ++++
>>>
>>>
>>>
>>> [   36.991339] WARNING: CPU: 0 PID: 280 at net/netlink/af_netlink.c:374
>>> netlink_sock_destruct+0x1ea/0x200
>>> [   36.994035] Modules linked in:
>>> [   36.994815] CPU: 0 PID: 280 Comm: sh Not tainted
>>> 4.13.0-rc1-3-gb631e53 #1
>>> [   36.996546] task: 88001448c180 task.stack: c94e
>>> [   36.998006] RIP: 0010:netlink_sock_destruct+0x1ea/0x200
>>> [   36.999290] RSP: 0018:82433de0 EFLAGS: 00010206
>>> [   37.000591] RAX: 88001448c180 RBX: 880016a3d000 RCX:
>>> 
>>> [   37.002319] RDX: 0100 RSI: 0001 RDI:
>>> 82796f48
>>> [   37.004061] RBP: 82433df0 R08:  R09:
>>> 
>>> [   37.005780] R10: 0001 R11: 0001 R12:
>>> 0001
>>> [   37.007528] R13: 81cd4a00 R14: 96e49674e09954cf R15:
>>> 001f
>>> [   37.009261] FS:  () GS:8243()
>>> knlGS:
>>> [   37.011233] CS:  0010 DS:  ES:  CR0: 80050033
>>> [   37.012629] CR2: 7f268a96e688 CR3: 159ff000 CR4:
>>> 06b0
>>> [   37.014212] Call Trace:
>>> [   37.014745]  

  1   2   3   4   5   6   7   8   9   10   >