date:20181005

[PATCH v2 1/4] platform/x86: intel_pmc_core: Show Latency Tolerance info

2018-10-05 Thread Rajneesh Bhardwaj

This adds support to show the Latency Tolerance Reporting for the IPs on
the PCH as reported by the PMC. The format shown here is raw LTR data
payload that can further be decoded as per the PCI specification.

This also fixes some minor alignment issues in the header file by
removing spaces and converting to tabs at some places.

Signed-off-by: Rajneesh Bhardwaj 
---
Changes in v2:
 * Removed IP # from map and displaying IP # while printing.
 * Other style fixes as per Andy's suggestion.

 drivers/platform/x86/intel_pmc_core.c | 73 +++
 drivers/platform/x86/intel_pmc_core.h | 56 +---
 2 files changed, 122 insertions(+), 7 deletions(-)

diff --git a/drivers/platform/x86/intel_pmc_core.c 
b/drivers/platform/x86/intel_pmc_core.c
index 2d272a3e0176..217a822a8da1 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -110,10 +110,37 @@ static const struct pmc_bit_map spt_pfear_map[] = {
{},
 };
 
+static const struct pmc_bit_map spt_ltr_show_map[] = {
+   {"LTR_SOUTHPORT_A", SPT_PMC_LTR_SPA},
+   {"LTR_SOUTHPORT_B", SPT_PMC_LTR_SPB},
+   {"LTR_SATA",SPT_PMC_LTR_SATA},
+   {"LTR_GIGABIT_ETHERNET",SPT_PMC_LTR_GBE},
+   {"LTR_XHCI",SPT_PMC_LTR_XHCI},
+   /* IP 5 is reserved */
+   {"LTR_ME",  SPT_PMC_LTR_ME},
+   /* EVA is Enterprise Value Add, doesn't really exist on PCH */
+   {"LTR_EVA", SPT_PMC_LTR_EVA},
+   {"LTR_SOUTHPORT_C", SPT_PMC_LTR_SPC},
+   {"LTR_HD_AUDIO",SPT_PMC_LTR_AZ},
+   /* IP 10 is reserved */
+   {"LTR_LPSS",SPT_PMC_LTR_LPSS},
+   {"LTR_SOUTHPORT_D", SPT_PMC_LTR_SPD},
+   {"LTR_SOUTHPORT_E", SPT_PMC_LTR_SPE},
+   {"LTR_CAMERA",  SPT_PMC_LTR_CAM},
+   {"LTR_ESPI",SPT_PMC_LTR_ESPI},
+   {"LTR_SCC", SPT_PMC_LTR_SCC},
+   {"LTR_ISH", SPT_PMC_LTR_ISH},
+   /* Below two cannot be used for LTR_IGNORE */
+   {"LTR_CURRENT_PLATFORM",SPT_PMC_LTR_CUR_PLT},
+   {"LTR_AGGREGATED_SYSTEM",   SPT_PMC_LTR_CUR_ASLT},
+   {}
+};
+
 static const struct pmc_reg_map spt_reg_map = {
.pfear_sts = spt_pfear_map,
.mphy_sts = spt_mphy_map,
.pll_sts = spt_pll_map,
+   .ltr_show_sts = spt_ltr_show_map,
.slp_s0_offset = SPT_PMC_SLP_S0_RES_COUNTER_OFFSET,
.ltr_ignore_offset = SPT_PMC_LTR_IGNORE_OFFSET,
.regmap_length = SPT_PMC_MMIO_REG_LEN,
@@ -252,10 +279,39 @@ static const struct pmc_bit_map *cnp_slps0_dbg_maps[] = {
NULL,
 };
 
+static const struct pmc_bit_map cnp_ltr_show_map[] = {
+   {"LTR_SOUTHPORT_A", CNP_PMC_LTR_SPA},
+   {"LTR_SOUTHPORT_B", CNP_PMC_LTR_SPB},
+   {"LTR_SATA",CNP_PMC_LTR_SATA},
+   {"LTR_GIGABIT_ETHERNET",CNP_PMC_LTR_GBE},
+   {"LTR_XHCI",CNP_PMC_LTR_XHCI},
+   /* IP 5 is reserved */
+   {"LTR_ME",  CNP_PMC_LTR_ME},
+   /* EVA is Enterprise Value Add, doesn't really exist on PCH */
+   {"LTR_EVA", CNP_PMC_LTR_EVA},
+   {"LTR_SOUTHPORT_C", CNP_PMC_LTR_SPC},
+   {"LTR_HD_AUDIO",CNP_PMC_LTR_AZ},
+   {"LTR_CNV", CNP_PMC_LTR_CNV},
+   {"LTR_LPSS",CNP_PMC_LTR_LPSS},
+   {"LTR_SOUTHPORT_D", CNP_PMC_LTR_SPD},
+   {"LTR_SOUTHPORT_E", CNP_PMC_LTR_SPE},
+   {"LTR_CAMERA",  CNP_PMC_LTR_CAM},
+   {"LTR_ESPI",CNP_PMC_LTR_ESPI},
+   {"LTR_SCC", CNP_PMC_LTR_SCC},
+   {"LTR_ISH", CNP_PMC_LTR_ISH},
+   {"LTR_UFSX2",   CNP_PMC_LTR_UFSX2},
+   {"LTR_EMMC",CNP_PMC_LTR_EMMC},
+   /* Below two cannot be used for LTR_IGNORE */
+   {"LTR_CURRENT_PLATFORM",CNP_PMC_LTR_CUR_PLT},
+   {"LTR_AGGREGATED_SYSTEM",   CNP_PMC_LTR_CUR_ASLT},
+   {}
+};
+
 static const struct pmc_reg_map cnp_reg_map = {
.pfear_sts = cnp_pfear_map,
.slp_s0_offset = CNP_PMC_SLP_S0_RES_COUNTER_OFFSET,
.slps0_dbg_maps = cnp_slps0_dbg_maps,
+   .ltr_show_sts = cnp_ltr_show_map,
.slps0_dbg_offset = CNP_PMC_SLPS0_DBG_OFFSET,
.ltr_ignore_offset = CNP_PMC_LTR_IGNORE_OFFSET,
.regmap_length = CNP_PMC_MMIO_REG_LEN,
@@ -592,6 +648,21 @@ static int pmc_core_slps0_dbg_show(struct seq_file *s, 
void *unused)
 }
 DEFINE_SHOW_ATTRIBUTE(pmc_core_slps0_dbg);
 
+static int pmc_core_ltr_show(struct seq_file *s, void *unused)
+{
+   struct pmc_dev *pmcdev = s->private;
+   const struct pmc_bit_map *map = pmcdev->map->ltr_show_sts;
+   int index;
+
+   for (index = 0; map[index].name

Re: [PATCH v4] remoteproc: qcom: Introduce Non-PAS ADSP PIL driver

2018-10-05 Thread Bjorn Andersson

On Mon 24 Sep 04:07 PDT 2018, Rohit kumar wrote:

> This adds Non PAS ADSP PIL driver for Qualcomm
> Technologies Inc SoCs.
> Added initial support for SDM845 with ADSP bootup and
> shutdown operation handled from Application Processor
> SubSystem(APSS).
> 
> Signed-off-by: Rohit kumar 

Sorry for missing this on the last few patches, I thought we said we
where going to name the driver qcom_q6v5_adsp. Rather than spending more
time on this I applied the patch with this change, as it does look good.

Please let me know if you have any concerns with this.

Regards,
Bjorn

> ---
> Changes since v3:
>   Addressed comments posted by Sibi
> 
> This patch is dependent on the rpmh powerdomain driver 
> https://lkml.org/lkml/2018/6/27/7
> and renaming of Hexagon v5 PAS driver 
> https://patchwork.kernel.org/patch/10601119/ .
> 
>  drivers/remoteproc/Kconfig |  14 ++
>  drivers/remoteproc/Makefile|   1 +
>  drivers/remoteproc/qcom_adsp_pil.c | 502 
> +
>  3 files changed, 517 insertions(+)
>  create mode 100644 drivers/remoteproc/qcom_adsp_pil.c
> 
> diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
> index 8894935..f554669 100644
> --- a/drivers/remoteproc/Kconfig
> +++ b/drivers/remoteproc/Kconfig
> @@ -140,6 +140,20 @@ config QCOM_Q6V5_WCSS
> Say y here to support the Qualcomm Peripheral Image Loader for the
> Hexagon V5 based WCSS remote processors.
>  
> +config QCOM_ADSP_PIL
> + tristate "Qualcomm Technology Inc ADSP Peripheral Image Loader"
> + depends on OF && ARCH_QCOM
> + depends on QCOM_SMEM
> + depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
> + depends on QCOM_SYSMON || QCOM_SYSMON=n
> + select MFD_SYSCON
> + select QCOM_MDT_LOADER
> + select QCOM_Q6V5_COMMON
> + select QCOM_RPROC_COMMON
> + help
> +   Say y here to support the Peripheral Image Loader
> +   for the Qualcomm Technology Inc. ADSP remote processors.
> +
>  config QCOM_SYSMON
>   tristate "Qualcomm sysmon driver"
>   depends on RPMSG
> diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
> index 050f41a..0e1b89c 100644
> --- a/drivers/remoteproc/Makefile
> +++ b/drivers/remoteproc/Makefile
> @@ -19,6 +19,7 @@ obj-$(CONFIG_QCOM_Q6V5_COMMON)  += qcom_q6v5.o
>  obj-$(CONFIG_QCOM_Q6V5_MSS)  += qcom_q6v5_mss.o
>  obj-$(CONFIG_QCOM_Q6V5_PAS)  += qcom_q6v5_pas.o
>  obj-$(CONFIG_QCOM_Q6V5_WCSS) += qcom_q6v5_wcss.o
> +obj-$(CONFIG_QCOM_ADSP_PIL)  += qcom_adsp_pil.o
>  obj-$(CONFIG_QCOM_SYSMON)+= qcom_sysmon.o
>  obj-$(CONFIG_QCOM_WCNSS_PIL) += qcom_wcnss_pil.o
>  qcom_wcnss_pil-y += qcom_wcnss.o
> diff --git a/drivers/remoteproc/qcom_adsp_pil.c 
> b/drivers/remoteproc/qcom_adsp_pil.c
> new file mode 100644
> index 000..f2f5e56
> --- /dev/null
> +++ b/drivers/remoteproc/qcom_adsp_pil.c
> @@ -0,0 +1,502 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Qualcomm Technology Inc. ADSP Peripheral Image Loader for SDM845.
> + * Copyright (c) 2018, The Linux Foundation. All rights reserved.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include "qcom_common.h"
> +#include "qcom_q6v5.h"
> +#include "remoteproc_internal.h"
> +
> +/* time out value */
> +#define ACK_TIMEOUT  1000
> +#define BOOT_FSM_TIMEOUT 1
> +/* mask values */
> +#define EVB_MASK GENMASK(27, 4)
> +/*QDSP6SS register offsets*/
> +#define RST_EVB_REG  0x10
> +#define CORE_START_REG   0x400
> +#define BOOT_CMD_REG 0x404
> +#define BOOT_STATUS_REG  0x408
> +#define RET_CFG_REG  0x1C
> +/*TCSR register offsets*/
> +#define LPASS_MASTER_IDLE_REG0x8
> +#define LPASS_HALTACK_REG0x4
> +#define LPASS_PWR_ON_REG 0x10
> +#define LPASS_HALTREQ_REG0x0
> +
> +/* list of clocks required by ADSP PIL */
> +static const char * const adsp_clk_id[] = {
> + "sway_cbcr", "lpass_aon", "lpass_ahbs_aon_cbcr", "lpass_ahbm_aon_cbcr",
> + "qdsp6ss_xo", "qdsp6ss_sleep", "qdsp6ss_core",
> +};
> +
> +struct adsp_pil_data {
> + int crash_reason_smem;
> + const char *firmware_name;
> +
> + const char *ssr_name;
> + const char *sysmon_name;
> + int ssctl_id;
> +};
> +
> +struct qcom_adsp {
> + struct device *dev;
> + struct rproc *rproc;
> +
> + struct qcom_q6v5 q6v5;
> +
> + struct clk *xo;
> +
> + int num_clks;
> + struct clk_bulk_data *clks;
> +
> + void __iomem *qdsp6ss_base;
> +
> + struct reset_control *pdc_sync_reset;
> + struct reset_control *cc_lpass_restart;
> +
> + st

[PATCH v2 4/4] platform/x86: intel_telemetry: report debugfs failure

2018-10-05 Thread Rajneesh Bhardwaj

On some Goldmont based systems such as ASRock J3455M the BIOS may not
enable the IPC1 device that provides access to the PMC and PUNIT. In
such scenarios, the IOSS and PSS resources from the platform device can
not be obtained and result in a invalid telemetry_plt_config which is an
internal data structure that holds platform config and is maintained by
the telemetry platform driver.

This is also applicable to the platforms where the BIOS supports IPC1
device under debug configurations but IPC1 is disabled by user or the
policy.

This change allows user to know the reason for not seeing entries under
/sys/kernel/debug/telemetry/* when there is no apparent failure at boot.

Cc: Matt Turner 
Cc: Len Brown 
Cc: Souvik Kumar Chakravarty 
Cc: Kuppuswamy Sathyanarayanan 

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=198779
Acked-by: Matt Turner 
Signed-off-by: Rajneesh Bhardwaj 
---
Changes in v2:
 * Removed print and out label both as suggested by Andy.
 * changed to pr_info.
 * Other minor style fixes.


 drivers/platform/x86/intel_telemetry_debugfs.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c 
b/drivers/platform/x86/intel_telemetry_debugfs.c
index ffd0474b0531..1423fa8710fd 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel_telemetry_debugfs.c
@@ -951,12 +951,16 @@ static int __init telemetry_debugfs_init(void)
debugfs_conf = (struct telemetry_debugfs_conf *)id->driver_data;
 
err = telemetry_pltconfig_valid();
-   if (err < 0)
+   if (err < 0) {
+   pr_info("Invalid pltconfig, ensure IPC1 device is enabled in 
BIOS\n");
return -ENODEV;
+   }
 
err = telemetry_debugfs_check_evts();
-   if (err < 0)
+   if (err < 0) {
+   pr_info("telemetry_debugfs_check_evts failed\n");
return -EINVAL;
+   }
 
register_pm_notifier(&pm_notifier);
 
-- 
2.17.1

[PATCH v2 3/4] platform/x86: intel_pmc_core: Decode Snoop / Non Snoop LTR

2018-10-05 Thread Rajneesh Bhardwaj

The LTR values follow PCIE LTR encoding format and can be decoded as per
https://pcisig.com/sites/default/files/specification_documents/ECN_LatencyTolnReporting_14Aug08.pdf

This adds support to translate the raw LTR values as read from the PMC
to meaningful values in nanosecond units of time.

Signed-off-by: Rajneesh Bhardwaj 
---
Changes in v2:
 * Get rid of union and bitfields to decode LTR and use FIELD_GET macro
 * Change get_ltr_scale to convert_ltr_scale.
 * Other style fixes and misc. improvements suggested by Andy for v1.

 drivers/platform/x86/intel_pmc_core.c | 64 +--
 drivers/platform/x86/intel_pmc_core.h |  5 +++
 2 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/x86/intel_pmc_core.c 
b/drivers/platform/x86/intel_pmc_core.c
index c616cfedf2be..fbcab53456f3 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -21,6 +21,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -650,16 +651,73 @@ static int pmc_core_slps0_dbg_show(struct seq_file *s, 
void *unused)
 }
 DEFINE_SHOW_ATTRIBUTE(pmc_core_slps0_dbg);
 
+static u32 convert_ltr_scale(u32 val)
+{
+   u32 scale = 0;
+   /*
+* As per PCIE specification supporting document
+* ECN_LatencyTolnReporting_14Aug08.pdf the Latency
+* Tolerance Reporting data payload is encoded in a
+* 3 bit scale and 10 bit value fields. Values are
+* multiplied by the indicated scale to yield an absolute time
+* value, expressible in a range from 1 nanosecond to
+* 2^25*(2^10-1) = 34,326,183,936 nanoseconds.
+*
+* scale encoding is as follows:
+*
+* --
+* |scale factor|   Multiplier (ns) |
+* --
+* |0   |   1   |
+* |1   |   32  |
+* |2   |   1024|
+* |3   |   32768   |
+* |4   |   1048576 |
+* |5   |   33554432|
+* |6   |   Invalid |
+* |7   |   Invalid |
+* --
+*/
+   if (val > 5)
+   pr_warn("Invalid LTR scale factor.\n");
+   else
+   scale = 1U << (5 * (val));
+
+   return scale;
+}
+
 static int pmc_core_ltr_show(struct seq_file *s, void *unused)
 {
struct pmc_dev *pmcdev = s->private;
const struct pmc_bit_map *map = pmcdev->map->ltr_show_sts;
+   u64 decoded_snoop_ltr, decoded_non_snoop_ltr;
+   u32 ltr_raw_data, scale, val;
+   u16 snoop_ltr, nonsnoop_ltr;
int index;
 
for (index = 0; map[index].name ; index++) {
-   seq_printf(s, "IP %-2d :%-32s\tRAW LTR: 0x%x\n", index,
-  map[index].name,
-  pmc_core_reg_read(pmcdev, map[index].bit_mask));
+   decoded_snoop_ltr = decoded_non_snoop_ltr = 0;
+   ltr_raw_data = pmc_core_reg_read(pmcdev,
+map[index].bit_mask);
+   snoop_ltr = ltr_raw_data & ~MTPMC_MASK;
+   nonsnoop_ltr = (ltr_raw_data >> 0x10) & ~MTPMC_MASK;
+
+   if (FIELD_GET(LTR_REQ_NONSNOOP, ltr_raw_data)) {
+   scale = FIELD_GET(LTR_DECODED_SCALE, nonsnoop_ltr);
+   val = FIELD_GET(LTR_DECODED_VAL, nonsnoop_ltr);
+   decoded_non_snoop_ltr = val * convert_ltr_scale(scale);
+   }
+
+   if (FIELD_GET(LTR_REQ_SNOOP, ltr_raw_data)) {
+   scale = FIELD_GET(LTR_DECODED_SCALE, snoop_ltr);
+   val = FIELD_GET(LTR_DECODED_VAL, snoop_ltr);
+   decoded_snoop_ltr = val * convert_ltr_scale(scale);
+   }
+
+   seq_printf(s, "IP %-2d :%-24s\tRaw LTR: 0x%-16x\t Non-Snoop LTR 
(ns): %-16llu\t Snoop LTR (ns): %-16llu\n",
+  index, map[index].name, ltr_raw_data,
+  decoded_non_snoop_ltr,
+  decoded_snoop_ltr);
}
return 0;
 }
diff --git a/drivers/platform/x86/intel_pmc_core.h 
b/drivers/platform/x86/intel_pmc_core.h
index 7f8181057ec8..cc49cd4c86e9 100644
--- a/drivers/platform/x86/intel_pmc_core.h
+++ b/drivers/platform/x86/intel_pmc_core.h
@@ -177,6 +177,11 @@ enum ppfear_regs {
 #define CNP_PMC_LTR_EMMC   0x1BF4
 #define CNP_PMC_LTR_UFSX2  0x1BF8
 
+#define LTR_REQ_NONSNOOP   BIT(31)
+#define LTR_REQ_SNOOP  BIT(15)
+#define LTR_DECODED_VALGENMASK(9, 0

[PATCH v2 2/4] platform/x86: intel_pmc_core: Fix LTR IGNORE Max offset

2018-10-05 Thread Rajneesh Bhardwaj

Cannonlake PCH allows us to ignore LTR from more IPs than Sunrisepoint
PCH so make the LTR ignore platform specific.

Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/platform/x86/intel_pmc_core.c | 4 +++-
 drivers/platform/x86/intel_pmc_core.h | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/intel_pmc_core.c 
b/drivers/platform/x86/intel_pmc_core.c
index 217a822a8da1..c616cfedf2be 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -148,6 +148,7 @@ static const struct pmc_reg_map spt_reg_map = {
.ppfear_buckets = SPT_PPFEAR_NUM_ENTRIES,
.pm_cfg_offset = SPT_PMC_PM_CFG_OFFSET,
.pm_read_disable_bit = SPT_PMC_READ_DISABLE_BIT,
+   .ltr_ignore_max = SPT_NUM_IP_IGN_ALLOWED,
 };
 
 /* Cannonlake: PGD PFET Enable Ack Status Register(s) bitmap */
@@ -319,6 +320,7 @@ static const struct pmc_reg_map cnp_reg_map = {
.ppfear_buckets = CNP_PPFEAR_NUM_ENTRIES,
.pm_cfg_offset = CNP_PMC_PM_CFG_OFFSET,
.pm_read_disable_bit = CNP_PMC_READ_DISABLE_BIT,
+   .ltr_ignore_max = CNP_NUM_IP_IGN_ALLOWED,
 };
 
 static inline u8 pmc_core_reg_read_byte(struct pmc_dev *pmcdev, int offset)
@@ -565,7 +567,7 @@ static ssize_t pmc_core_ltr_ignore_write(struct file *file, 
const char __user
goto out_unlock;
}
 
-   if (val > NUM_IP_IGN_ALLOWED) {
+   if (val > map->ltr_ignore_max) {
err = -EINVAL;
goto out_unlock;
}
diff --git a/drivers/platform/x86/intel_pmc_core.h 
b/drivers/platform/x86/intel_pmc_core.h
index 7a00436e337d..7f8181057ec8 100644
--- a/drivers/platform/x86/intel_pmc_core.h
+++ b/drivers/platform/x86/intel_pmc_core.h
@@ -44,7 +44,7 @@
 #define SPT_PMC_READ_DISABLE_BIT   0x16
 #define SPT_PMC_MSG_FULL_STS_BIT   0x18
 #define NUM_RETRIES100
-#define NUM_IP_IGN_ALLOWED 17
+#define SPT_NUM_IP_IGN_ALLOWED 17
 
 #define SPT_PMC_LTR_CUR_PLT0x350
 #define SPT_PMC_LTR_CUR_ASLT   0x354
@@ -154,6 +154,7 @@ enum ppfear_regs {
 #define CNP_PPFEAR_NUM_ENTRIES 8
 #define CNP_PMC_READ_DISABLE_BIT   22
 #define CNP_PMC_LATCH_SLPS0_EVENTS BIT(31)
+#define CNP_NUM_IP_IGN_ALLOWED 19
 #define CNP_PMC_LTR_CUR_PLT0x1B50
 #define CNP_PMC_LTR_CUR_ASLT   0x1B54
 #define CNP_PMC_LTR_SPA0x1B60
@@ -216,6 +217,7 @@ struct pmc_reg_map {
const u32 pm_cfg_offset;
const int pm_read_disable_bit;
const u32 slps0_dbg_offset;
+   const u32 ltr_ignore_max;
 };
 
 /**
-- 
2.17.1

Re: [PATCH v4] dt-binding: remoteproc: Add QTI ADSP PIL bindings

2018-10-05 Thread Bjorn Andersson

On Mon 10 Sep 20:54 PDT 2018, Rohit kumar wrote:
> diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,adsp-pil.txt 
> b/Documentation/devicetree/bindings/remoteproc/qcom,adsp-pil.txt
[..]
> += EXAMPLE
> +The following example describes the resources needed to boot control the
> +ADSP, as it is found on SDM845 boards.
> + adsp-pil {

Updated this to remoteproc@1730 and applied the patch to rproc-next.

Regards,
Bjorn

> + compatible = "qcom,sdm845-adsp-pil";
> +
> + reg = <0x1730 0x40c>;
> +
> + interrupts-extended = <&intc 0 162 IRQ_TYPE_EDGE_RISING>,
> + <&adsp_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
> + <&adsp_smp2p_in 1 IRQ_TYPE_EDGE_RISING>,
> + <&adsp_smp2p_in 2 IRQ_TYPE_EDGE_RISING>,
> + <&adsp_smp2p_in 3 IRQ_TYPE_EDGE_RISING>;
> + interrupt-names = "wdog", "fatal", "ready",
> + "handover", "stop-ack";
> +
> + clocks = <&rpmhcc RPMH_CXO_CLK>,
> + <&gcc GCC_LPASS_SWAY_CLK>,
> + <&lpasscc LPASS_AUDIO_WRAPPER_AON_CLK>,
> + <&lpasscc LPASS_Q6SS_AHBS_AON_CLK>,
> + <&lpasscc LPASS_Q6SS_AHBM_AON_CLK>,
> + <&lpasscc LPASS_QDSP6SS_XO_CLK>,
> + <&lpasscc LPASS_QDSP6SS_SLEEP_CLK>,
> + <&lpasscc LPASS_QDSP6SS_CORE_CLK>;
> + clock-names = "xo", "sway_cbcr", "lpass_aon",
> + "lpass_ahbs_aon_cbcr",
> + "lpass_ahbm_aon_cbcr", "qdsp6ss_xo",
> + "qdsp6ss_sleep", "qdsp6ss_core";
> +
> + power-domains = <&rpmhpd SDM845_CX>;
> +
> + resets = <&pdc_reset PDC_AUDIO_SYNC_RESET>,
> +  <&aoss_reset AOSS_CC_LPASS_RESTART>;
> + reset-names = "pdc_sync", "cc_lpass";
> +
> + qcom,halt-regs = <&tcsr_mutex_regs 0x22000>;
> +
> + memory-region = <&pil_adsp_mem>;
> +
> + qcom,smem-states = <&adsp_smp2p_out 0>;
> + qcom,smem-state-names = "stop";
> + };

Re: [PATCH] remoteproc: qcom: q6v5: Propagate EPROBE_DEFER

2018-10-05 Thread Bjorn Andersson

On Mon 24 Sep 23:50 PDT 2018, Sibi Sankar wrote:

> On 2018-09-20 07:21, Bjorn Andersson wrote:
> > In the case that the interrupts fail to result because of the
> > interrupt-controller not yet being registered the
> > platform_get_irq_byname() call will fail with -EPROBE_DEFER, but passing
> > this into devm_request_threaded_irq() will result in -EINVAL being
> > returned, the driver is therefor not reprobed later.
> > 
> 
> The patch looks fine.
> Reviewed-by: Sibi Sankar 
> 

Thanks for the review Sibi, applied the patch.

Regards,
Bjorn

> > Fixes: 3b415c8fb263 ("remoteproc: q6v5: Extract common resource
> > handling")
> > Cc: sta...@vger.kernel.org
> > Signed-off-by: Bjorn Andersson 
> > ---
> >  drivers/remoteproc/qcom_q6v5.c | 12 
> >  1 file changed, 12 insertions(+)
> > 
> > diff --git a/drivers/remoteproc/qcom_q6v5.c
> > b/drivers/remoteproc/qcom_q6v5.c
> > index 61a760ee4aac..e9ab90c19304 100644
> > --- a/drivers/remoteproc/qcom_q6v5.c
> > +++ b/drivers/remoteproc/qcom_q6v5.c
> > @@ -198,6 +198,9 @@ int qcom_q6v5_init(struct qcom_q6v5 *q6v5, struct
> > platform_device *pdev,
> > }
> > 
> > q6v5->fatal_irq = platform_get_irq_byname(pdev, "fatal");
> > +   if (q6v5->fatal_irq == -EPROBE_DEFER)
> > +   return -EPROBE_DEFER;
> > +
> > ret = devm_request_threaded_irq(&pdev->dev, q6v5->fatal_irq,
> > NULL, q6v5_fatal_interrupt,
> > IRQF_TRIGGER_RISING | IRQF_ONESHOT,
> > @@ -208,6 +211,9 @@ int qcom_q6v5_init(struct qcom_q6v5 *q6v5, struct
> > platform_device *pdev,
> > }
> > 
> > q6v5->ready_irq = platform_get_irq_byname(pdev, "ready");
> > +   if (q6v5->ready_irq == -EPROBE_DEFER)
> > +   return -EPROBE_DEFER;
> > +
> > ret = devm_request_threaded_irq(&pdev->dev, q6v5->ready_irq,
> > NULL, q6v5_ready_interrupt,
> > IRQF_TRIGGER_RISING | IRQF_ONESHOT,
> > @@ -218,6 +224,9 @@ int qcom_q6v5_init(struct qcom_q6v5 *q6v5, struct
> > platform_device *pdev,
> > }
> > 
> > q6v5->handover_irq = platform_get_irq_byname(pdev, "handover");
> > +   if (q6v5->handover_irq == -EPROBE_DEFER)
> > +   return -EPROBE_DEFER;
> > +
> > ret = devm_request_threaded_irq(&pdev->dev, q6v5->handover_irq,
> > NULL, q6v5_handover_interrupt,
> > IRQF_TRIGGER_RISING | IRQF_ONESHOT,
> > @@ -229,6 +238,9 @@ int qcom_q6v5_init(struct qcom_q6v5 *q6v5, struct
> > platform_device *pdev,
> > disable_irq(q6v5->handover_irq);
> > 
> > q6v5->stop_irq = platform_get_irq_byname(pdev, "stop-ack");
> > +   if (q6v5->stop_irq == -EPROBE_DEFER)
> > +   return -EPROBE_DEFER;
> > +
> > ret = devm_request_threaded_irq(&pdev->dev, q6v5->stop_irq,
> > NULL, q6v5_stop_interrupt,
> > IRQF_TRIGGER_RISING | IRQF_ONESHOT,
> 
> -- 
> -- Sibi Sankar --
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
> a Linux Foundation Collaborative Project.

Re: [PATCH v2] remoteproc: qcom: pas: Add QCS404 remoteprocs

2018-10-05 Thread Bjorn Andersson

On Thu 27 Sep 23:27 PDT 2018, Sibi Sankar wrote:

> On 2018-09-28 00:33, Bjorn Andersson wrote:
> > Add compatibles for the three PAS based remote processors found in
> > QCS404.
> > 
> > Signed-off-by: Bjorn Andersson 
> > ---
> > 
> 
> Reviewed-by: Sibi Sankar 
> 

Thanks for the review Sibi, applied the patch.

Regards,
Bjorn

> > Changes since v1:
> > - Fixed incorrect sysmon_name, as pointed out by Sibi.
> > 
> >  .../devicetree/bindings/remoteproc/qcom,adsp.txt |  3 +++
> >  drivers/remoteproc/qcom_adsp_pil.c   | 12 
> >  2 files changed, 15 insertions(+)
> > 
> > diff --git
> > a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.txt
> > b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.txt
> > index b7d058228185..9c0cff3a5ed8 100644
> > --- a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.txt
> > +++ b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.txt
> > @@ -10,6 +10,9 @@ on the Qualcomm ADSP Hexagon core.
> > "qcom,msm8974-adsp-pil"
> > "qcom,msm8996-adsp-pil"
> > "qcom,msm8996-slpi-pil"
> > +   "qcom,qcs404-adsp-pas"
> > +   "qcom,qcs404-cdsp-pas"
> > +   "qcom,qcs404-wcss-pas"
> > "qcom,sdm845-adsp-pas"
> > "qcom,sdm845-cdsp-pas"
> > 
> > diff --git a/drivers/remoteproc/qcom_adsp_pil.c
> > b/drivers/remoteproc/qcom_adsp_pil.c
> > index da2254ea1135..d5e58235e83a 100644
> > --- a/drivers/remoteproc/qcom_adsp_pil.c
> > +++ b/drivers/remoteproc/qcom_adsp_pil.c
> > @@ -362,10 +362,22 @@ static const struct adsp_data slpi_resource_init =
> > {
> > .ssctl_id = 0x16,
> >  };
> > 
> > +static const struct adsp_data wcss_resource_init = {
> > +   .crash_reason_smem = 421,
> > +   .firmware_name = "wcnss.mdt",
> > +   .pas_id = 6,
> > +   .ssr_name = "mpss",
> > +   .sysmon_name = "wcnss",
> > +   .ssctl_id = 0x12,
> > +};
> > +
> >  static const struct of_device_id adsp_of_match[] = {
> > { .compatible = "qcom,msm8974-adsp-pil", .data = &adsp_resource_init},
> > { .compatible = "qcom,msm8996-adsp-pil", .data = &adsp_resource_init},
> > { .compatible = "qcom,msm8996-slpi-pil", .data = &slpi_resource_init},
> > +   { .compatible = "qcom,qcs404-adsp-pas", .data = &adsp_resource_init },
> > +   { .compatible = "qcom,qcs404-cdsp-pas", .data = &cdsp_resource_init },
> > +   { .compatible = "qcom,qcs404-wcss-pas", .data = &wcss_resource_init },
> > { .compatible = "qcom,sdm845-adsp-pas", .data = &adsp_resource_init},
> > { .compatible = "qcom,sdm845-cdsp-pas", .data = &cdsp_resource_init},
> > { },
> 
> -- 
> -- Sibi Sankar --
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
> a Linux Foundation Collaborative Project.

Re: [PATCH] remoteproc: qcom: q6v5: Fix a race condition on fatal crash

2018-10-05 Thread Bjorn Andersson

On Mon 01 Oct 07:25 PDT 2018, Sibi Sankar wrote:

> Currently with GLINK_SSR enabled each fatal crash results in servicing
> a crash from wdog as well. This is due to a race that occurs in setting
> the running flag in the shutdown path. Fix this by moving the running
> flag to the end of fatal interrupt handler.
> 
> Crash Logs:
> qcom-q6v5-pil 408.remoteproc: fatal error without message
> remoteproc remoteproc0: crash detected in 408.remoteproc: type fatal
>   error
> remoteproc remoteproc0: handling crash #1 in 408.remoteproc
> remoteproc remoteproc0: recovering 408.remoteproc
> qcom-q6v5-pil 408.remoteproc: watchdog without message
> remoteproc remoteproc0: crash detected in 408.remoteproc: type watchdog
> remoteproc:glink-edge: intent request timed out
> qcom_glink_ssr remoteproc:glink-edge.glink_ssr.-1.-1: failed to send
>   cleanup message
> qcom_glink_ssr remoteproc:glink-edge.glink_ssr.-1.-1: timeout waiting
>   for cleanup done message
> qcom-q6v5-pil 408.remoteproc: timed out on wait
> qcom-q6v5-pil 408.remoteproc: port failed halt
> remoteproc remoteproc0: stopped remote processor 408.remoteproc
> qcom-q6v5-pil 408.remoteproc: MBA booted, loading mpss
> remoteproc remoteproc0: remote processor 408.remoteproc is now up
> remoteproc remoteproc0: handling crash #2 in 408.remoteproc
> remoteproc remoteproc0: recovering 408.remoteproc
> qcom-q6v5-pil 408.remoteproc: port failed halt
> remoteproc remoteproc0: stopped remote processor 408.remoteproc
> qcom-q6v5-pil 408.remoteproc: MBA booted, loading mpss
> remoteproc remoteproc0: remote processor 408.remoteproc is now up
> 
> [bjorn: move running flag to the end of fatal interrupt handler]

Turned this line into a Suggested-by

> Signed-off-by: Sibi Sankar 

Applied.

Thanks,
Bjorn

> ---
>  drivers/remoteproc/qcom_q6v5.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/remoteproc/qcom_q6v5.c b/drivers/remoteproc/qcom_q6v5.c
> index e9ab90c19304..edeb2e43209e 100644
> --- a/drivers/remoteproc/qcom_q6v5.c
> +++ b/drivers/remoteproc/qcom_q6v5.c
> @@ -84,6 +84,7 @@ static irqreturn_t q6v5_fatal_interrupt(int irq, void *data)
>   else
>   dev_err(q6v5->dev, "fatal error without message\n");
>  
> + q6v5->running = false;
>   rproc_report_crash(q6v5->rproc, RPROC_FATAL_ERROR);
>  
>   return IRQ_HANDLED;
> @@ -150,8 +151,6 @@ int qcom_q6v5_request_stop(struct qcom_q6v5 *q6v5)
>  {
>   int ret;
>  
> - q6v5->running = false;
> -
>   qcom_smem_state_update_bits(q6v5->state,
>   BIT(q6v5->stop_bit), BIT(q6v5->stop_bit));
>  
> -- 
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project
>

Re: [PATCH] samples/rpmsg: Introduce a module parameter for message count

2018-10-05 Thread Bjorn Andersson

On Tue 11 Sep 10:46 PDT 2018, Suman Anna wrote:

> The current rpmsg_client_sample uses a fixed number of messages to
> be sent to each instance. This is currently set at 100. Introduce
> an optional module parameter 'count' so that the number of messages
> to be exchanged can be made flexible.
> 

Rather than sending N messages as fast as possible to any sample channel
that comes up, how about making the sample create a debugfs entry that
we can write messages to from user space?

That would make it possible to improve the handling of multiple
remoteprocs and would allow for a variation in message lengths etc.

Regards,
Bjorn

> Signed-off-by: Suman Anna 
> ---
>  samples/rpmsg/rpmsg_client_sample.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/samples/rpmsg/rpmsg_client_sample.c 
> b/samples/rpmsg/rpmsg_client_sample.c
> index f161dfd3e70a..9b6b27ea504f 100644
> --- a/samples/rpmsg/rpmsg_client_sample.c
> +++ b/samples/rpmsg/rpmsg_client_sample.c
> @@ -22,7 +22,9 @@
>  #include 
>  
>  #define MSG  "hello world!"
> -#define MSG_LIMIT100
> +
> +static int count = 100;
> +module_param(count, int, 0644);
>  
>  struct instance_data {
>   int rx_count;
> @@ -41,7 +43,7 @@ static int rpmsg_sample_cb(struct rpmsg_device *rpdev, void 
> *data, int len,
>  data, len,  true);
>  
>   /* samples should not live forever */
> - if (idata->rx_count >= MSG_LIMIT) {
> + if (idata->rx_count >= count) {
>   dev_info(&rpdev->dev, "goodbye!\n");
>   return 0;
>   }
> -- 
> 2.18.0
>

Re: [PATCH 3/5] remoteproc: Add missing kernel-doc comment for auto-boot

2018-10-05 Thread Bjorn Andersson

On Fri 14 Sep 17:37 PDT 2018, Suman Anna wrote:

> The commit ddf711872c9d ("remoteproc: Introduce auto-boot flag")
> introduced the auto-boot flag but missed adding the corresponding
> kernel-doc comment. Add the same.
> 
> Signed-off-by: Suman Anna 

Applied.

Thanks,
Bjorn

> ---
>  include/linux/remoteproc.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index e3c5d856b6da..75f9ca05b865 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -439,6 +439,7 @@ struct rproc_dump_segment {
>   * @cached_table: copy of the resource table
>   * @table_sz: size of @cached_table
>   * @has_iommu: flag to indicate if remote processor is behind an MMU
> + * @auto_boot: flag to indicate if remote processor should be auto-started
>   * @dump_segments: list of segments in the firmware
>   */
>  struct rproc {
> -- 
> 2.18.0
>

Re: [PATCH 2/5] remoteproc: Check for NULL firmwares in sysfs interface

2018-10-05 Thread Bjorn Andersson

On Fri 14 Sep 17:37 PDT 2018, Suman Anna wrote:

> The remoteproc framework provides a sysfs file 'firmware'
> for modifying the firmware image name from userspace. Add
> an additional check to ensure NULL firmwares are errored
> out right away, rather than getting a delayed error while
> requesting a firmware during the start of a remoteproc
> later on.
> 
> Signed-off-by: Suman Anna 

Applied.

Regards,
Bjorn

> ---
>  drivers/remoteproc/remoteproc_sysfs.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/drivers/remoteproc/remoteproc_sysfs.c 
> b/drivers/remoteproc/remoteproc_sysfs.c
> index 2142b3ea726e..ce93f4d710f3 100644
> --- a/drivers/remoteproc/remoteproc_sysfs.c
> +++ b/drivers/remoteproc/remoteproc_sysfs.c
> @@ -49,6 +49,11 @@ static ssize_t firmware_store(struct device *dev,
>   }
>  
>   len = strcspn(buf, "\n");
> + if (!len) {
> + dev_err(dev, "can't provide a NULL firmware\n");
> + err = -EINVAL;
> + goto out;
> + }
>  
>   p = kstrndup(buf, len, GFP_KERNEL);
>   if (!p) {
> -- 
> 2.18.0
>

Re: [PATCH 1/5] remoteproc: Fix unbalanced boot with sysfs for no auto-boot rprocs

2018-10-05 Thread Bjorn Andersson

On Fri 14 Sep 17:37 PDT 2018, Suman Anna wrote:

> The remoteproc core performs automatic boot and shutdown of a remote
> processor during rproc_add() and rproc_del() for remote processors
> supporting 'auto-boot'. The remoteproc devices not using 'auto-boot'
> require either a remoteproc client driver or a userspace client to
> use the sysfs 'state' variable to perform the boot and shutdown. The
> in-kernel client drivers hold the corresponding remoteproc driver
> module's reference count when they acquire a rproc handle through
> the rproc_get_by_phandle() API, but there is no such support for
> userspace applications performing the boot through sysfs interface.
> 
> The shutdown of a remoteproc upon removing a remoteproc platform
> driver is automatic only with 'auto-boot' and this can cause a
> remoteproc with no auto-boot to stay powered on and never freed
> up if booted using the sysfs interface without a matching stop,
> and when the remoteproc driver module is removed or unbound from
> the device. This will result in a memory leak as well as the
> corresponding remoteproc ida being never deallocated. Fix this
> by holding a module reference count for the remoteproc's driver
> during a sysfs 'start' and releasing it during the sysfs 'stop'
> operation.
> 

This prevents you from rmmod'ing the remoteproc driver, but it does not
prevent you from issuing an unbind of the driver - resulting in the same
issue.

I would prefer if we made sure that rproc_del() always cleaned up any
resources (and stopped the remoteproc processor), but I'm uncertain of
how to deal with remote processors that are supposed to survive Linux
shutting down.

But I'm also uncertain how we can make the remoteproc core ensure that
no dynamic resources are leaked in such scenario.

Regards,
Bjorn

> Signed-off-by: Suman Anna 
> ---
>  drivers/remoteproc/remoteproc_sysfs.c | 16 +++-
>  1 file changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_sysfs.c 
> b/drivers/remoteproc/remoteproc_sysfs.c
> index 47be411400e5..2142b3ea726e 100644
> --- a/drivers/remoteproc/remoteproc_sysfs.c
> +++ b/drivers/remoteproc/remoteproc_sysfs.c
> @@ -11,6 +11,7 @@
>   * GNU General Public License for more details.
>   */
>  
> +#include 
>  #include 
>  
>  #include "remoteproc_internal.h"
> @@ -100,14 +101,27 @@ static ssize_t state_store(struct device *dev,
>   if (rproc->state == RPROC_RUNNING)
>   return -EBUSY;
>  
> + /*
> +  * prevent underlying implementation from being removed
> +  * when remoteproc does not support auto-boot
> +  */
> + if (!rproc->auto_boot &&
> + !try_module_get(dev->parent->driver->owner))
> + return -EINVAL;
> +
>   ret = rproc_boot(rproc);
> - if (ret)
> + if (ret) {
>   dev_err(&rproc->dev, "Boot failed: %d\n", ret);
> + if (!rproc->auto_boot)
> + module_put(dev->parent->driver->owner);
> + }
>   } else if (sysfs_streq(buf, "stop")) {
>   if (rproc->state != RPROC_RUNNING)
>   return -EINVAL;
>  
>   rproc_shutdown(rproc);
> + if (!rproc->auto_boot)
> + module_put(dev->parent->driver->owner);
>   } else {
>   dev_err(&rproc->dev, "Unrecognised option: %s\n", buf);
>   ret = -EINVAL;
> -- 
> 2.18.0
>

Re: [RFC v3 1/1] ns: add binfmt_misc to the user namespace

2018-10-05 Thread Andrei Vagin

On Thu, Oct 04, 2018 at 12:50:22AM +0200, Laurent Vivier wrote:
> This patch allows to have a different binfmt_misc configuration
> for each new user namespace. By default, the binfmt_misc configuration
> is the one of the host, but if the binfmt_misc filesystem is mounted
> in the new namespace a new empty binfmt instance is created and used
> in this namespace.
> 
> For instance, using "unshare" we can start a chroot of an another
> architecture and configure the binfmt_misc interpreter without being root
> to run the binaries in this chroot.
> 
> Signed-off-by: Laurent Vivier 
> ---
>  fs/binfmt_misc.c   | 85 +++---
>  include/linux/user_namespace.h | 15 ++
>  kernel/user.c  | 14 ++
>  kernel/user_namespace.c|  9 
>  4 files changed, 95 insertions(+), 28 deletions(-)
> 
> diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
> index aa4a7a23ff99..78780bc87506 100644
> --- a/fs/binfmt_misc.c
> +++ b/fs/binfmt_misc.c
> @@ -38,9 +38,6 @@ enum {
>   VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */
>  };
>  
> -static LIST_HEAD(entries);
> -static int enabled = 1;
> -
>  enum {Enabled, Magic};
>  #define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
>  #define MISC_FMT_OPEN_BINARY (1 << 30)
> @@ -60,10 +57,7 @@ typedef struct {
>   struct file *interp_file;
>  } Node;
>  
> -static DEFINE_RWLOCK(entries_lock);
>  static struct file_system_type bm_fs_type;
> -static struct vfsmount *bm_mnt;
> -static int entry_count;
>  
>  /*
>   * Max length of the register string.  Determined by:
> @@ -85,13 +79,13 @@ static int entry_count;
>   * if we do, return the node, else NULL
>   * locking is done in load_misc_binary
>   */
> -static Node *check_file(struct linux_binprm *bprm)
> +static Node *check_file(struct user_namespace *ns, struct linux_binprm *bprm)
>  {
>   char *p = strrchr(bprm->interp, '.');
>   struct list_head *l;
>  
>   /* Walk all the registered handlers. */
> - list_for_each(l, &entries) {
> + list_for_each(l, &ns->binfmt_ns->entries) {
>   Node *e = list_entry(l, Node, list);
>   char *s;
>   int j;
> @@ -133,17 +127,18 @@ static int load_misc_binary(struct linux_binprm *bprm)
>   struct file *interp_file = NULL;
>   int retval;
>   int fd_binary = -1;
> + struct user_namespace *ns = current_user_ns();
>  
>   retval = -ENOEXEC;
> - if (!enabled)
> + if (!ns->binfmt_ns->enabled)
>   return retval;
>  
>   /* to keep locking time low, we copy the interpreter string */
> - read_lock(&entries_lock);
> - fmt = check_file(bprm);
> + read_lock(&ns->binfmt_ns->entries_lock);

It looks like ns->binfmt_ns isn't protected by any lock and
ns->binfmt_ns can be changed between read_lock() and read_unlock().

This can be fixed if ns->binfmt_ns will be dereferenced only once in
this function:

struct binfmt_namespace *binfmt_ns = ns->binfmt_ns;

> + fmt = check_file(ns ,bprm);
>   if (fmt)
>   dget(fmt->dentry);
> - read_unlock(&entries_lock);
> + read_unlock(&ns->binfmt_ns->entries_lock);
>   if (!fmt)
>   return retval;
>  
> @@ -609,19 +604,19 @@ static void bm_evict_inode(struct inode *inode)
>   kfree(e);
>  }
>  
> -static void kill_node(Node *e)
> +static void kill_node(struct user_namespace *ns, Node *e)
>  {
>   struct dentry *dentry;
>  
> - write_lock(&entries_lock);
> + write_lock(&ns->binfmt_ns->entries_lock);
>   list_del_init(&e->list);
> - write_unlock(&entries_lock);
> + write_unlock(&ns->binfmt_ns->entries_lock);
>  
>   dentry = e->dentry;
>   drop_nlink(d_inode(dentry));
>   d_drop(dentry);
>   dput(dentry);
> - simple_release_fs(&bm_mnt, &entry_count);
> + simple_release_fs(&ns->binfmt_ns->bm_mnt, &ns->binfmt_ns->entry_count);
>  }
>  
>  /* / */
> @@ -651,6 +646,7 @@ static ssize_t bm_entry_write(struct file *file, const 
> char __user *buffer,
>   struct dentry *root;
>   Node *e = file_inode(file)->i_private;
>   int res = parse_command(buffer, count);
> + struct user_namespace *ns = file->f_path.dentry->d_sb->s_user_ns;
>  
>   switch (res) {
>   case 1:
> @@ -667,7 +663,7 @@ static ssize_t bm_entry_write(struct file *file, const 
> char __user *buffer,
>   inode_lock(d_inode(root));
>  
>   if (!list_empty(&e->list))
> - kill_node(e);
> + kill_node(ns, e);
>  
>   inode_unlock(d_inode(root));
>   break;
> @@ -693,6 +689,7 @@ static ssize_t bm_register_write(struct file *file, const 
> char __user *buffer,
>   struct inode *inode;
>   struct super_block *sb = file_inode(file)->i_sb;
>   struct dentry *root = sb->s_root, *dentry;
> + struct user_namespace *ns = file->f_path.dentry->d_sb->s_user_ns;
>   int err = 0;
>  
>   e = create_entry(buffer, count);
>

A different PD controller firmware problem?

2018-10-05 Thread Theodore Y. Ts'o

On Tue, Sep 11, 2018 at 01:02:00PM +, mario.limoncie...@dell.com wrote:
> > I tried 9370 and it detects the adapter correctly. IIRC I did the same
> > for 5530 and it worked as well.
> 
> Thanks for confirming that.  Hopefully the same change can be ported to PD 
> controller
> firmware then on other models, I'll inquire.

Hey Mario,

Sorry for the thread hijack (I've changed the subject line to make it
clear it's a separate issue), but just this evening I just had a
very interesting problem with my Dell XPS 9370, and it appears to
be related to the PD controller.

Sortly after 12:30am US/Eastern, I got a low power warning on my
system, and the battery power had dropped below 10%.  Apparently the
laptop was not accepting any charge any more.  I tried doing a suspend
to ram, and then unsuspended it, and it still wasn't accepting any
charge, even though the adapter indicated it was plugged in and
supplying power.  I then did a power cycle, and still the laptop
didn't indicate it was charging with a USB C 45W power supply plugged
in.

I inserted a Satechi USB C voltage monitor in-line, and found that
while it was powered on, the laptop has pulling 0 mA at 5V.  If the
laptop was suspended, it would pull 3A at 5V.  Rebooting and power
cycling didn't change this syndrome.

What *did* fix it was powering down, and disconnecting the power
adapter for 30 seconds or so.  Then when I plugged it back in, the
laptop started accepting 20V at 2A.  I assume what happened is that
the PD controller had crashed, and it required a powerdown *and*
unplugging the power to force the EC to reset.

I have noticed other problems where a USB C to HDMI adapter doesn't
quite work right (the laptop refuses to talk to the display), and the
*only* way to fix things is to powerdown Linux and then remove the
power plug.  So this is not the first time that this particular
technique is needed to make my Dell XPS 9370 (with NVMe SSD, currently
running XPS 13 9370 System Firmware version 0.1.5.1) happy again.

What's the best place to report this sort of problem?  And is there
anything more I can do to debug these sorts of apparent PD Controller
/ EC bugs?

- Ted

Re: [ANN] init-kconfig - easy way to embrace Linux's kconfig

2018-10-05 Thread Ulf Magnusson

On Thu, Oct 4, 2018 at 10:03 PM Luis Chamberlain  wrote:
>
> Every now and then a project is born, and they decide to use Linux's
> kconfig to enable configuration of their project. As it stands we *know*
> kconfig is now used in at least over 12 different projects [0]. I myself
> added kconfig to one as well years ago. Even research reveals that
> kconfig has become one of the leading industrial variability modeling
> languages [1] [2].
>
> What is often difficult to do though is to start off using kconfig and
> integrating it into a project. Or updating / syncing to the latest
> kconfig from upstream Linux.
>
> I had yet another need to use kconfig for another small project so
> decided to make a clean template others can use and help keep it in sync.
> This is a passive fork which aims to keep in sync with the Linux
> kernel's latest kconfig to make it easier to keep up to date and to
> enable new projects to use and embrace kconfig on their own.  The goal
> is *not* to fork kconfig and evolve it separately, but rather keep in
> sync with the evolution of kconfig on Linux to make it easier for
> projects to use kconfig and also update their own kconfig when needed.
>
> This may also be useful if folks want to test R&D code on a smaller
> compartamentalized codebase.
>
> If you find this useful and you'd like to help keep it in sync, send
> patches my way as the kernel's kconfig evolves. The code is up on
> gitlab).) [3].
>
> Do we want to document this option on Linux in case folks want to try
> and embrace kconfig on their own for other projects?
>
> [0] http://www.eng.uwaterloo.ca/~shshe/kconfig_semantics.pdf
> [1] http://gsd.uwaterloo.ca/sites/default/files/vm-2013-berger.pdf
> [2] http://gsd.uwaterloo.ca/sites/default/files/ase241-berger_0.pdf
> [3] https://gitlab.com/mcgrof/init-kconfig
>
>   Luis

Shameless self-plug:

There's also a Python Kconfig implementation that's starting to get
picked up by several projects: https://github.com/ulfalizer/kconfiglib

It has a terminal menuconfig interface with a lot more features than
mconf (a demonstration is available at
https://raw.githubusercontent.com/ulfalizer/Kconfiglib/screenshots/screenshots/menuconfig.gif),
and can also be used e.g. to generate cross-referenced Kconfig
documentation that includes propagated dependencies:
https://docs.zephyrproject.org/latest/reference/kconfig/index.html
(note: heavy page).

Kconfiglib is based around a library (an old version appears in e.g.
U-Boot and Yocto, and a newer version in e.g. Espressif). The
documentation generation is just a script
(https://github.com/zephyrproject-rtos/zephyr/blob/master/doc/scripts/genrest.py),
and the same goes for the menuconfig and the other tools. The core
library takes part of all the trickiness related to evaluating
symbols.

I realize there would probably be massive opposition to adding a
Python dependency to a core part of the kernel, so I'm not going for
that. For most other projects, I think it's a good fit though.

Cheers,
Ulf

Re: [PATCH 7/7] Documentation: devicetree: Add Xilinx R5 rproc binding

2018-10-05 Thread Bjorn Andersson

On Thu 16 Aug 00:06 PDT 2018, Wendy Liang wrote:

> Add device tree binding for Xilinx Cortex-r5 remoteproc.
> 
> Signed-off-by: Wendy Liang 
> ---
>  .../remoteproc/xlnx,zynqmp-r5-remoteproc.txt   | 81 
> ++
>  1 file changed, 81 insertions(+)
>  create mode 100644 
> Documentation/devicetree/bindings/remoteproc/xlnx,zynqmp-r5-remoteproc.txt
> 
> diff --git 
> a/Documentation/devicetree/bindings/remoteproc/xlnx,zynqmp-r5-remoteproc.txt 
> b/Documentation/devicetree/bindings/remoteproc/xlnx,zynqmp-r5-remoteproc.txt
> new file mode 100644
> index 000..3940019
> --- /dev/null
> +++ 
> b/Documentation/devicetree/bindings/remoteproc/xlnx,zynqmp-r5-remoteproc.txt
> @@ -0,0 +1,81 @@
> +Xilinx ARM Cortex A53-R5 remoteproc driver
> +==
> +
> +ZynqMP family of devices use two Cortex R5 processors to help with various
> +low power / real time tasks.
> +
> +This driver requires specific ZynqMP hardware design.
> +
> +ZynqMP R5 RemoteProc Device Node:
> +=
> +A zynqmp_r5_remoteproc device node is used to represent a R5 IP instance
> +within ZynqMP SoC.
> +
> +Required properties:
> +
> + - compatible : Should be "xlnx,zynqmp-r5-remoteproc-1.0"

What is 1.0?

> + - reg : Address and length of the register set for the device. It
> +contains in the same order as described reg-names
> + - reg-names: Contain the register set names.
> +  "tcm_a" and "tcm_b" for TCM memories.
> +  If the user uses the remoteproc driver with the RPMsg kernel
> +  driver,"ipi" for the IPI register used to communicate with RPU
> +  is also required.
> +  Otherwise, if user only uses the remoteproc driver to boot RPU
> +  firmware, "ipi" is not required.
> + - tcm-pnode-id: TCM resources power nodes IDs which are used to request TCM
> + resources for the remoteproc driver to access.
> + - rpu-pnode-id : RPU power node id which is used by the remoteproc driver
> +  to start RPU or shut it down.
> +
> +Optional properties:
> +
> + - core_conf : R5 core configuration (valid string - split0 or split1 or
> +   lock-step), default is lock-step.
> + - memory-region: memories regions for RPU executable and DMA memory.
> + - interrupts : Interrupt mapping for remoteproc IPI. It is required if the
> +user uses the remoteproc driver with the RPMsg kernel driver.
> + - interrupt-parent : Phandle for the interrupt controller. It is required if
> +  the user uses the remoteproc driver with the RPMsg 
> kernel
> +  kernel driver.
> +
> +Example:
> +
> + reserved-memory {
> + #address-cells = <2>;
> + #size-cells = <2>;
> + ranges;
> + rproc_0_fw_reserved: rproc@3ed00 {
> + compatible = "rproc-prog-memory";
> + no-map;
> + reg = <0x0 0x3ed0 0x0 0x4>;
> + };
> + rproc_0_dma_reserved: rproc@3ed40 {
> + compatible = "shared-dma-pool";
> + no-map;
> + reg = <0x0 0x3ed4 0x0 0x8>;
> + };
> + };
> +
> + firmware {
> + zynqmp_firmware: zynqmp-firmware {
> + compatible = "xlnx,zynqmp-firmware";
> + method = "smc";
> + };
> + };
> +
> + zynqmp-r5-remoteproc@0 {

remoteproc@ffe0 {

> + compatible = "xlnx,zynqmp-r5-remoteproc-1.0";
> + reg = <0x0 0xFFE0 0x0 0x1>,
> + <0x0 0xFFE2 0x0 0x1>,
> + <0x0 0xff34 0x0 0x100>;

Make all addresses lowercase, rather than mixing case.

> + reg-names = "tcm_a", "tcm_b", "ipi";
> + dma-ranges;
> + core_conf = "split0";
> + memory-region = <&rproc_0_fw_reserved>,
> + <&rproc_0_dma_reserved>;
> + tcm-pnode-id = <0xf>, <0x10>;
> + rpu-pnode-id = <0x7>;
> + interrupt-parent = <&gic>;
> + interrupts = <0 29 4>;

interrutps = ;

> + } ;
> -- 
> 2.7.4
> 

Regards,
Bjorn

Re: [PATCH 6/7] remoteproc: Add Xilinx ZynqMP R5 remoteproc

2018-10-05 Thread Bjorn Andersson

On Thu 16 Aug 00:06 PDT 2018, Wendy Liang wrote:
> diff --git a/drivers/remoteproc/zynqmp_r5_remoteproc.c 
> b/drivers/remoteproc/zynqmp_r5_remoteproc.c
> new file mode 100644
> index 000..7fc3718
> --- /dev/null
> +++ b/drivers/remoteproc/zynqmp_r5_remoteproc.c
> @@ -0,0 +1,692 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Zynq R5 Remote Processor driver
> + *
> + * Copyright (C) 2015 Xilinx, Inc.
> + *
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include "remoteproc_internal.h"
> +
> +/* IPI reg offsets */
> +#define TRIG_OFFSET  0x
> +#define OBS_OFFSET   0x0004
> +#define ISR_OFFSET   0x0010
> +#define IMR_OFFSET   0x0014
> +#define IER_OFFSET   0x0018
> +#define IDR_OFFSET   0x001C
> +#define IPI_ALL_MASK 0x0F0F0301
> +
> +/* RPU IPI mask */
> +#define RPU_IPI_INIT_MASK0x0100
> +#define RPU_IPI_MASK(n)  (RPU_IPI_INIT_MASK << (n))
> +#define RPU_0_IPI_MASK   RPU_IPI_MASK(0)
> +#define RPU_1_IPI_MASK   RPU_IPI_MASK(1)

Rather than using 2 levels of macros, just define RPU_0_IPI_MASK and
RPU_1_IPI_MASK as BIT(8) and BIT(9)

> +
> +/* PM proc states */
> +#define PM_PROC_STATE_ACTIVE 1u

Unused

> +
> +/* Maximum TCM power nodes IDs */
> +#define MAX_TCM_PNODES 4
> +
> +/* Register access macros */
> +#define reg_read(base, reg) \
> + readl(((void __iomem *)(base)) + (reg))
> +#define reg_write(base, reg, val) \
> + writel((val), ((void __iomem *)(base)) + (reg))

Please drop these macros, using readl/writel directly rather than hiding
it behind a similar macro will make it easier to read the code.

> +
> +#define DEFAULT_FIRMWARE_NAME"rproc-rpu-fw"
> +
> +static bool autoboot __read_mostly;

A variable only read during probe() doesn't need hints.

> +
> +struct zynqmp_r5_rproc_pdata;

No need to forward declare this, as the very next statement is the
declaration of this struct.

> +
> +/**
> + * struct zynqmp_r5_rproc_pdata - zynqmp rpu remote processor instance state
> + * @rproc: rproc handle
> + * @workqueue: workqueue for the RPU remoteproc
> + * @ipi_base: virt ptr to IPI channel address registers for APU
> + * @rpu_mode: RPU core configuration
> + * @rpu_id: RPU CPU id
> + * @rpu_pnode_id: RPU CPU power domain id
> + * @mem_pools: list of gen_pool for firmware mmio_sram memory and their
> + * power domain IDs

mem_pools is not a member of the struct.

> + * @mems: list of rproc_mem_entries for firmware

Please reorder to match struct.

> + * @irq: IRQ number
> + * @ipi_dest_mask: IPI destination mask for the IPI channel
> + */
> +struct zynqmp_r5_rproc_pdata {
> + struct rproc *rproc;
> + struct work_struct workqueue;

This is the work object, not the work queue. Please update naming
("work" is a common choice to this).

> + void __iomem *ipi_base;
> + enum rpu_oper_mode rpu_mode;
> + struct list_head mems;

Consider renaming to mem_entries.

> + u32 ipi_dest_mask;
> + u32 rpu_id;
> + u32 rpu_pnode_id;
> + int irq;
> + u32 tcm_pnode_id[MAX_TCM_PNODES];
> +};
> +
> +/**
> + * r5_boot_addr_config - configure the boot address of R5

Add () on the function name in kerneldoc.

> + * @pdata: platform data
> + * @bootmem: boot from LOVEC or HIVEC
> + *
> + * This function will set the RPU boot address
> + */
> +static void r5_boot_addr_config(struct zynqmp_r5_rproc_pdata *pdata,
> + enum rpu_boot_mem bootmem)
> +{
> + const struct zynqmp_eemi_ops *eemi = zynqmp_pm_get_eemi_ops();

I presume this will return the same eemi as when it was called right
before in zynqmp_r5_rproc_start(). How about passing eemi from the
caller?

> +
> + pr_debug("%s: R5 ID: %d, boot_dev %d\n",
> +  __func__, pdata->rpu_id, bootmem);
> +
> + if (!eemi || !eemi->ioctl) {

If eemi is NULL zynqmp_r5_rproc_start() already aborted. How about
making zynqmp_r5_rproc_start() also check to see that eemi->ioctl is
non-NULL? and then just skip this check.

> + pr_err("%s: no eemi ioctl operation.\n", __func__);
> + return;
> + }
> + eemi->ioctl(pdata->rpu_pnode_id, IOCTL_RPU_BOOT_ADDR_CONFIG,
> + bootmem, 0, NULL);
> +}
> +
> +/**
> + * r5_mode_config - configure R5 operation mode
> + * @pdata: platform data
> + *
> + * configure R5 to split mode or lockstep mode
> + * based on the platform data.
> + */
> +static void r5_mode_config(struct zynqmp_r5_rproc_pdata *pdata)
> +{
> + const struct zynqmp_eemi_ops *eemi = zynqmp_pm_get_eemi_ops();

Same comments as for r5_boot_addr_config()

> +
> + pr_debug("%s: mode: %d\n", __func__, pdata->rpu_mode);
> +
> + if (!eemi || !eemi->ioctl) {
> + pr_err("%s:

Re: [PATCH v3 3/7] drivers: parisc: Avoids building driver if CONFIG_PARISC is disabled

2018-10-05 Thread Michael Schmitz





Am 05.10.2018 um 15:16 schrieb Leonardo Bras:

Well it's not really that persuasive.  Most people simply let the build
run to completion, but if you have a problem with a job control 3h
timelimit, then create a job that kills itself at 2:59 and then
resubmits itself.  That will produce a complete build in 3h chunks
without any need to call sub Makefiles.



Humm, I probably should have explained better how GitlabCI works.
It works creating a docker container that have a limited lifespan of 3h max.
After that the time is over, this container ceases to exist, leaving no build
objects, only the console log. So there is no way of 'resuming' the building
from where it stopped. I used the 'job' term because it's how they call it,
and I understand it's easily confused with bash jobs.


All of our Makefiles are coded assuming the upper level can prevent
descent into the lower ones.  You're proposing to change that
assumption, requiring a fairly large patch set, which doesn't really
seem to provide a huge benefit.

James


I understand your viewpoint.
But what I propose is not to change that assumption, but instead give some
Makefiles the aditional ability to be called directly and still not build stuff
if they were not enabled in .config.

But, why these chosen Makefiles, and not all of them?
Granularity.
What I am trying to achieve with this patchset is the ability of building
smaller sets of drivers without accidentally building what is not enabled
on .config.
And, in my viewpoint, building a single drivers/DRIVERNAME is small enough to
be fast in most situations.


That already works, doesn't it? So all that you'd need is an offline 
tool to precompute what drivers to actually build with a given config.


'make -n' with some suitable output mangling might do the job.

There may well be other ways to achieve your stated goal, without any 
need to make changes to the kernel build process (which is the result of 
many years of evolution and tuning, BTW).



This change is not supposed to bother the usual way of building the kernel, and


Enough people have voiced their concern to warrant that you should back 
up that claim, IMO. Have you verified that your patchset does not change 
current behaviour when building the entire set of default configurations 
for each supported architecture? Does it reduce or increase overall 
complexity of the build process?



it is not even supposed to add overhead to kernel compilation. And it would,
at least, solve my problem with the 3h limit, and enable the tool
I am building on GiltabCI to help other developers.


(Apropos of nothing: Am I the only one who thinks gitlab might take a 
rather dim view of your creativity in dealing with their limit?)



Thanks for reading,

Leonardo Bras


Thanks for listening!

Cheers,

Michael

Re: [PATCH 1/2] mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings

2018-10-05 Thread Andrea Arcangeli

Hello,

On Thu, Oct 04, 2018 at 04:05:26PM -0700, David Rientjes wrote:
> The source of the problem needs to be addressed: memory compaction.  We 
> regress because we lose __GFP_NORETRY and pointlessly try reclaim, but 

I commented in detail about the __GFP_NORETRY topic in the other email
so I will skip the discussion about __GFP_NORETRY in the context of
this answer except for the comment at the end of the email to the
actual code that implements __GFP_NORETRY.

> But that's a memory compaction issue, not a thp gfp mask issue; the 
> reclaim issue is responded to below.

Actually memory compaction has no issues whatsoever with
__GFP_THISNODE regardless of __GFP_NORETRY.

> This patch causes an even worse regression if all system memory is 
> fragmented such that thp cannot be allocated because it tries to stress 
> compaction on remote nodes, which ends up unsuccessfully, not just the 
> local node.
> 
> On Haswell, when all memory is fragmented (not just the local node as I 
> obtained by 13.9% regression result), the patch results in a fault latency 
> regression of 40.9% for MADV_HUGEPAGE region of 8GB.  This is because it 
> is thrashing both nodes pointlessly instead of just failing for 
> __GFP_THISNODE.

There's no I/O involved at the very least on compaction, nor we drop
any cache or shrink any slab by mistake by just invoking compaction.
Even when you hit the worst case "all nodes are 100% fragmented"
scenario that generates the 40% increased allocation latency, all
other tasks running in the local node will keep running fine, and they
won't be pushed away forcefully into swap with all their kernel cache
depleted, which is a mlock/mbind privileged behavior that the app
using the MADV_HUGEPAGE lib should not ever been able to inflict on
other processes running in the node from different users (users as in
uid).

Furthermore when you incur the worst case latency after that there's
compact deferred logic skipping compaction next time around if all
nodes were so fragmented to the point of guaranteed failure. While
there's nothing stopping reclaim to run every time COMPACT_SKIPPED is
returned just because compaction keeps succeeding as reclaim keeps
pushing more 2M amounts into swap from the local nodes.

I don't doubt with 1024 nodes things can get pretty bad when they're
all 100% fragmented, __GFP_THISNODE would win in such case, but then
what you're asking then is the __GFP_COMPACT_ONLY behavior. That will
solve it.

What we'd need probably regardless of how we solve this bug (because
not all compaction invocations are THP invocations... and we can't
keep making special cases and optimizations tailored for THP or we end
up in that same 40% higher latency for large skbs and other stuff) is
a more sophisticated COMPACT_DEFERRED logic where you can track when
remote compaction failed. Then you wait many more times before trying
a global compaction. It could be achieved with just a compact_deferred
counter in the zone/pgdat (wherever it fits best).

Overall I don't think the bug we're dealing with and the slowdown of
compaction on the remote nodes are comparable, also considering the
latter will still happen regardless if you've large skbs or other
drivers allocating large amounts of memory as an optimization.

> So the end result is that the patch regresses access latency forever by 
> 13.9% when the local node is fragmented because it is accessing remote thp 
> vs local pages of the native page size, and regresses fault latency of 
> 40.9% when the system is fully fragmented.  The only time that fault 
> latency is improved is when remote memory is not fully fragmented, but 
> then you must incur the remote access latency.

You get THP however which will reduce the TLB miss cost and maximize
TLB usage, so it depends on the app if that 13.9% cost is actually
offseted by the THP benefit or not.

It entirely depends if large part of the workload mostly fits in
in-socket CPU cache. The more the in-socket/node CPU cache pays off,
the more remote-THP also pays off. There would be definitely workloads
that would run faster, not slower, with the remote THP instead of
local PAGE_SIZEd memory. The benefit of THP is also larger for the
guest loads than for host loads, so it depends on that too.

We agree about the latency issue with a ton of RAM and thousands of
nodes, but again that can be mitigated with a NUMA friendly
COMPACT_DEFERRED logic NUMA aware. Even without such
NUMA-aware-compact_deferred logic improvement, the worst case of the
remote compaction behavior still doesn't look nearly as bad as this
bug by thinking about it. And it only is a concern for extremely large
NUMA systems (which may run the risk of running in other solubility
issues in other places if random workloads are applied to it and all
nodes are low on memory and fully fragmented which is far from common
scenario on those large systems), while the bug we fixed was hurting
badly all very common 2 nodes installs with workloads that ar

Re: [PATCH v4.19-rc7] treewide: Replace more open-coded allocation size multiplications

2018-10-05 Thread Joel Fernandes

On Fri, Oct 05, 2018 at 05:22:35PM -0700, Greg KH wrote:
> On Fri, Oct 05, 2018 at 05:04:16PM -0700, Kees Cook wrote:
> > On Fri, Oct 5, 2018 at 4:51 PM, Greg KH  wrote:
> > > On Fri, Oct 05, 2018 at 04:35:59PM -0700, Kees Cook wrote:
> > >> As done treewide earlier, this catches several more open-coded
> > >> allocation size calculations that were added to the kernel during the
> > >> merge window. This performs the following mechanical transformations
> > >> using Coccinelle:
> > >>
> > >>   kvmalloc(a * b, ...) -> kvmalloc_array(a, b, ...)
> > >>   kvzalloc(a * b, ...) -> kvcalloc(a, b, ...)
> > >>   devm_kzalloc(..., a * b, ...) -> devm_kcalloc(..., a, b, ...)
> > >>
> > >> Signed-off-by: Kees Cook 
> > >
> > > Has this had any testing in linux-next?
> > 
> > No; they're mechanical transformations (though I did build test them).
> > If you want I could add this to linux-next for a week?
> 
> That would be good, thanks.
> 
> > > And when was "earlier"?
> > 
> > v4.18, when all of these were originally eliminated:
> > 
> > 026f05079b00 treewide: Use array_size() in f2fs_kzalloc()
> > c86065938aab treewide: Use array_size() in f2fs_kmalloc()
> > 76e43e37a407 treewide: Use array_size() in sock_kmalloc()
> > 84ca176bf54a treewide: Use array_size() in kvzalloc_node()
> > fd7becedb1f0 treewide: Use array_size() in vzalloc_node()
> > fad953ce0b22 treewide: Use array_size() in vzalloc()
> > 42bc47b35320 treewide: Use array_size() in vmalloc()
> > a86854d0c599 treewide: devm_kzalloc() -> devm_kcalloc()
> > 3c4211ba8ad8 treewide: devm_kmalloc() -> devm_kmalloc_array()
> > 778e1cdd81bb treewide: kvzalloc() -> kvcalloc()
> > 344476e16acb treewide: kvmalloc() -> kvmalloc_array()
> > 590b5b7d8671 treewide: kzalloc_node() -> kcalloc_node()
> > 6396bb221514 treewide: kzalloc() -> kcalloc()
> > 6da2ec56059c treewide: kmalloc() -> kmalloc_array()
> > 
> > The new patch is catching new open-coded multiplications introduced in 
> > v4.19.
> 
> As this is getting smaller, why not just break it up and do it through
> all of the different subsystems instead of one large patch?
> 
> And do we have a way to add a rule to 0-day to catch these so that they
> get a warning when they are added again?

They could just be added to scripts/coccinelle and 0-day will report them?

For example, 0-day ran scripts/coccinelle/api/platform_no_drv_owner.cocci on
a recently submitted patch and reported it here:
https://lore.kernel.org/lkml/201808301856.vmnjerss%25fengguang...@intel.com/

But I'm not sure if 0-day runs make coccicheck on specific semantic patches,
or runs all of them (CC'd Fengguang).

thanks,

 - Joel

[PATCH v3 0/3] get_user_pages*() and RDMA: first steps

2018-10-05 Thread john . hubbard

From: John Hubbard 

Changes since v2:

-- Absorbed more dirty page handling logic into the put_user_page*(), and
   handled some page releasing loops in infiniband more thoroughly, as per
   Jason Gunthorpe's feedback.

-- Fixed a bug in the put_user_pages*() routines' loops (thanks to
   Ralph Campbell for spotting it).

Changes since v1:

-- Renamed release_user_pages*() to put_user_pages*(), from Jan's feedback.

-- Removed the goldfish.c changes, and instead, only included a single
   user (infiniband) of the new functions. That is because goldfish.c no
   longer has a name collision (it has a release_user_pages() routine), and
   also because infiniband exercises both the put_user_page() and
   put_user_pages*() paths.

-- Updated links to discussions and plans, so as to be sure to include
   bounce buffers, thanks to Jerome's feedback.

Also:

-- Dennis, thanks for your earlier review, and I have not yet added your
   Reviewed-by tag, because this revision changes the things that you had
   previously reviewed, thus potentially requiring another look.

This short series prepares for eventually fixing the problem described
in [1], and is following a plan listed in [2], [3], [4].

Patch 1, although not technically critical to do now, is still nice to
have, because it's already been reviewed by Jan, and it's just one more
thing on the long TODO list here, that is ready to be checked off.

Patch 2 is required in order to allow me (and others, if I'm lucky) to
start submitting changes to convert all of the callsites of
get_user_pages*() and put_page().  I think this will work a lot better
than trying to maintain a massive patchset and submitting all at once.

Patch 3 converts infiniband drivers: put_page() --> put_user_page(), and
also exercises put_user_pages_dirty_locked().

Once these are all in, then the floodgates can open up to convert the large
number of get_user_pages*() callsites.

[1] https://lwn.net/Articles/753027/ : "The Trouble with get_user_pages()"

[2] https://lkml.kernel.org/r/20180709080554.21931-1-jhubb...@nvidia.com
Proposed steps for fixing get_user_pages() + DMA problems.

[3]https://lkml.kernel.org/r/20180710082100.mkdwngdv5kkrc...@quack2.suse.cz
Bounce buffers (otherwise [2] is not really viable).

[4] https://lkml.kernel.org/r/20181003162115.gg24...@quack2.suse.cz
Follow-up discussions.

CC: Matthew Wilcox 
CC: Michal Hocko 
CC: Christopher Lameter 
CC: Jason Gunthorpe 
CC: Dan Williams 
CC: Jan Kara 
CC: Al Viro 
CC: Jerome Glisse 
CC: Christoph Hellwig 
CC: Ralph Campbell 

John Hubbard (3):
  mm: get_user_pages: consolidate error handling
  mm: introduce put_user_page*(), placeholder versions
  infiniband/mm: convert put_page() to put_user_page*()

 drivers/infiniband/core/umem.c  |  7 +--
 drivers/infiniband/core/umem_odp.c  |  2 +-
 drivers/infiniband/hw/hfi1/user_pages.c | 11 ++---
 drivers/infiniband/hw/mthca/mthca_memfree.c |  6 +--
 drivers/infiniband/hw/qib/qib_user_pages.c  | 11 ++---
 drivers/infiniband/hw/qib/qib_user_sdma.c   |  8 ++--
 drivers/infiniband/hw/usnic/usnic_uiom.c|  7 +--
 include/linux/mm.h  | 48 -
 mm/gup.c| 37 +---
 9 files changed, 92 insertions(+), 45 deletions(-)

-- 
2.19.0

[PATCH v3 1/3] mm: get_user_pages: consolidate error handling

2018-10-05 Thread john . hubbard

From: John Hubbard 

An upcoming patch requires a way to operate on each page that
any of the get_user_pages_*() variants returns.

In preparation for that, consolidate the error handling for
__get_user_pages(). This provides a single location (the "out:" label)
for operating on the collected set of pages that are about to be returned.

As long every use of the "ret" variable is being edited, rename
"ret" --> "err", so that its name matches its true role.
This also gets rid of two shadowed variable declarations, as a
tiny beneficial a side effect.

Reviewed-by: Jan Kara 
Signed-off-by: John Hubbard 
---
 mm/gup.c | 37 ++---
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 1abc8b4afff6..05ee7c18e59a 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -660,6 +660,7 @@ static long __get_user_pages(struct task_struct *tsk, 
struct mm_struct *mm,
struct vm_area_struct **vmas, int *nonblocking)
 {
long i = 0;
+   int err = 0;
unsigned int page_mask;
struct vm_area_struct *vma = NULL;
 
@@ -685,18 +686,19 @@ static long __get_user_pages(struct task_struct *tsk, 
struct mm_struct *mm,
if (!vma || start >= vma->vm_end) {
vma = find_extend_vma(mm, start);
if (!vma && in_gate_area(mm, start)) {
-   int ret;
-   ret = get_gate_page(mm, start & PAGE_MASK,
+   err = get_gate_page(mm, start & PAGE_MASK,
gup_flags, &vma,
pages ? &pages[i] : NULL);
-   if (ret)
-   return i ? : ret;
+   if (err)
+   goto out;
page_mask = 0;
goto next_page;
}
 
-   if (!vma || check_vma_flags(vma, gup_flags))
-   return i ? : -EFAULT;
+   if (!vma || check_vma_flags(vma, gup_flags)) {
+   err = -EFAULT;
+   goto out;
+   }
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i,
@@ -709,23 +711,25 @@ static long __get_user_pages(struct task_struct *tsk, 
struct mm_struct *mm,
 * If we have a pending SIGKILL, don't keep faulting pages and
 * potentially allocating memory.
 */
-   if (unlikely(fatal_signal_pending(current)))
-   return i ? i : -ERESTARTSYS;
+   if (unlikely(fatal_signal_pending(current))) {
+   err = -ERESTARTSYS;
+   goto out;
+   }
cond_resched();
page = follow_page_mask(vma, start, foll_flags, &page_mask);
if (!page) {
-   int ret;
-   ret = faultin_page(tsk, vma, start, &foll_flags,
+   err = faultin_page(tsk, vma, start, &foll_flags,
nonblocking);
-   switch (ret) {
+   switch (err) {
case 0:
goto retry;
case -EFAULT:
case -ENOMEM:
case -EHWPOISON:
-   return i ? i : ret;
+   goto out;
case -EBUSY:
-   return i;
+   err = 0;
+   goto out;
case -ENOENT:
goto next_page;
}
@@ -737,7 +741,8 @@ static long __get_user_pages(struct task_struct *tsk, 
struct mm_struct *mm,
 */
goto next_page;
} else if (IS_ERR(page)) {
-   return i ? i : PTR_ERR(page);
+   err = PTR_ERR(page);
+   goto out;
}
if (pages) {
pages[i] = page;
@@ -757,7 +762,9 @@ static long __get_user_pages(struct task_struct *tsk, 
struct mm_struct *mm,
start += page_increm * PAGE_SIZE;
nr_pages -= page_increm;
} while (nr_pages);
-   return i;
+
+out:
+   return i ? i : err;
 }
 
 static bool vma_permits_fault(struct vm_area_struct *vma,
-- 
2.19.0

[PATCH v3 2/3] mm: introduce put_user_page*(), placeholder versions

2018-10-05 Thread john . hubbard

From: John Hubbard 

Introduces put_user_page(), which simply calls put_page().
This provides a way to update all get_user_pages*() callers,
so that they call put_user_page(), instead of put_page().

Also introduces put_user_pages(), and a few dirty/locked variations,
as a replacement for release_pages(), and also as a replacement
for open-coded loops that release multiple pages.
These may be used for subsequent performance improvements,
via batching of pages to be released.

This prepares for eventually fixing the problem described
in [1], and is following a plan listed in [2], [3], [4].

[1] https://lwn.net/Articles/753027/ : "The Trouble with get_user_pages()"

[2] https://lkml.kernel.org/r/20180709080554.21931-1-jhubb...@nvidia.com
Proposed steps for fixing get_user_pages() + DMA problems.

[3]https://lkml.kernel.org/r/20180710082100.mkdwngdv5kkrc...@quack2.suse.cz
Bounce buffers (otherwise [2] is not really viable).

[4] https://lkml.kernel.org/r/20181003162115.gg24...@quack2.suse.cz
Follow-up discussions.

CC: Matthew Wilcox 
CC: Michal Hocko 
CC: Christopher Lameter 
CC: Jason Gunthorpe 
CC: Dan Williams 
CC: Jan Kara 
CC: Al Viro 
CC: Jerome Glisse 
CC: Christoph Hellwig 
CC: Ralph Campbell 
Signed-off-by: John Hubbard 
---
 include/linux/mm.h | 48 --
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0416a7204be3..305b206e6851 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -137,6 +137,8 @@ extern int overcommit_ratio_handler(struct ctl_table *, 
int, void __user *,
size_t *, loff_t *);
 extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
size_t *, loff_t *);
+int set_page_dirty(struct page *page);
+int set_page_dirty_lock(struct page *page);
 
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
 
@@ -943,6 +945,50 @@ static inline void put_page(struct page *page)
__put_page(page);
 }
 
+/* Pages that were pinned via get_user_pages*() should be released via
+ * either put_user_page(), or one of the put_user_pages*() routines
+ * below.
+ */
+static inline void put_user_page(struct page *page)
+{
+   put_page(page);
+}
+
+static inline void put_user_pages_dirty(struct page **pages,
+   unsigned long npages)
+{
+   unsigned long index;
+
+   for (index = 0; index < npages; index++) {
+   if (!PageDirty(pages[index]))
+   set_page_dirty(pages[index]);
+
+   put_user_page(pages[index]);
+   }
+}
+
+static inline void put_user_pages_dirty_lock(struct page **pages,
+unsigned long npages)
+{
+   unsigned long index;
+
+   for (index = 0; index < npages; index++) {
+   if (!PageDirty(pages[index]))
+   set_page_dirty_lock(pages[index]);
+
+   put_user_page(pages[index]);
+   }
+}
+
+static inline void put_user_pages(struct page **pages,
+ unsigned long npages)
+{
+   unsigned long index;
+
+   for (index = 0; index < npages; index++)
+   put_user_page(pages[index]);
+}
+
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 #define SECTION_IN_PAGE_FLAGS
 #endif
@@ -1534,8 +1580,6 @@ int redirty_page_for_writepage(struct writeback_control 
*wbc,
 void account_page_dirtied(struct page *page, struct address_space *mapping);
 void account_page_cleaned(struct page *page, struct address_space *mapping,
  struct bdi_writeback *wb);
-int set_page_dirty(struct page *page);
-int set_page_dirty_lock(struct page *page);
 void __cancel_dirty_page(struct page *page);
 static inline void cancel_dirty_page(struct page *page)
 {
-- 
2.19.0

[PATCH v3 3/3] infiniband/mm: convert put_page() to put_user_page*()

2018-10-05 Thread john . hubbard

From: John Hubbard 

For code that retains pages via get_user_pages*(),
release those pages via the new put_user_page(), or
put_user_pages*(), instead of put_page()

This prepares for eventually fixing the problem described
in [1], and is following a plan listed in [2], [3], [4].

[1] https://lwn.net/Articles/753027/ : "The Trouble with get_user_pages()"

[2] https://lkml.kernel.org/r/20180709080554.21931-1-jhubb...@nvidia.com
Proposed steps for fixing get_user_pages() + DMA problems.

[3]https://lkml.kernel.org/r/20180710082100.mkdwngdv5kkrc...@quack2.suse.cz
Bounce buffers (otherwise [2] is not really viable).

[4] https://lkml.kernel.org/r/20181003162115.gg24...@quack2.suse.cz
Follow-up discussions.

CC: Doug Ledford 
CC: Jason Gunthorpe 
CC: Mike Marciniszyn 
CC: Dennis Dalessandro 
CC: Christian Benvenuti 

CC: linux-r...@vger.kernel.org
CC: linux-kernel@vger.kernel.org
CC: linux...@kvack.org
Signed-off-by: John Hubbard 
---
 drivers/infiniband/core/umem.c  |  7 ---
 drivers/infiniband/core/umem_odp.c  |  2 +-
 drivers/infiniband/hw/hfi1/user_pages.c | 11 ---
 drivers/infiniband/hw/mthca/mthca_memfree.c |  6 +++---
 drivers/infiniband/hw/qib/qib_user_pages.c  | 11 ---
 drivers/infiniband/hw/qib/qib_user_sdma.c   |  8 
 drivers/infiniband/hw/usnic/usnic_uiom.c|  7 ---
 7 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index a41792dbae1f..7ab7a3a35eb4 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -58,9 +58,10 @@ static void __ib_umem_release(struct ib_device *dev, struct 
ib_umem *umem, int d
for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
 
page = sg_page(sg);
-   if (!PageDirty(page) && umem->writable && dirty)
-   set_page_dirty_lock(page);
-   put_page(page);
+   if (umem->writable && dirty)
+   put_user_pages_dirty_lock(&page, 1);
+   else
+   put_user_page(page);
}
 
sg_free_table(&umem->sg_head);
diff --git a/drivers/infiniband/core/umem_odp.c 
b/drivers/infiniband/core/umem_odp.c
index 6ec748eccff7..6227b89cf05c 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -717,7 +717,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 
user_virt, u64 bcnt,
ret = -EFAULT;
break;
}
-   put_page(local_page_list[j]);
+   put_user_page(local_page_list[j]);
continue;
}
 
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c 
b/drivers/infiniband/hw/hfi1/user_pages.c
index e341e6dcc388..99ccc0483711 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -121,13 +121,10 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, 
unsigned long vaddr, size_t np
 void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
 size_t npages, bool dirty)
 {
-   size_t i;
-
-   for (i = 0; i < npages; i++) {
-   if (dirty)
-   set_page_dirty_lock(p[i]);
-   put_page(p[i]);
-   }
+   if (dirty)
+   put_user_pages_dirty_lock(p, npages);
+   else
+   put_user_pages(p, npages);
 
if (mm) { /* during close after signal, mm can be NULL */
down_write(&mm->mmap_sem);
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c 
b/drivers/infiniband/hw/mthca/mthca_memfree.c
index cc9c0c8ccba3..b8b12effd009 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -481,7 +481,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct 
mthca_uar *uar,
 
ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
if (ret < 0) {
-   put_page(pages[0]);
+   put_user_page(pages[0]);
goto out;
}
 
@@ -489,7 +489,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct 
mthca_uar *uar,
 mthca_uarc_virt(dev, uar, i));
if (ret) {
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, 
PCI_DMA_TODEVICE);
-   put_page(sg_page(&db_tab->page[i].mem));
+   put_user_page(sg_page(&db_tab->page[i].mem));
goto out;
}
 
@@ -555,7 +555,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, 
struct mthca_uar *uar,
if (db_tab->page[i].uvirt) {
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1);
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, 
PCI_DMA_TODEVICE);
-

Re: [PATCH] kvm/x86 : avoid shifting signed 32-bit value by 31 bits

2018-10-05 Thread Wei Yang

On Thu, Oct 04, 2018 at 01:47:18PM -0400, Peng Hao wrote:
>
>From: Peng Hao 
>
>  modify AVIC_LOGICAL_ID_ENTRY_VALID_MASK to unsigned
>
>Signed-off-by: Peng Hao 
>---
> arch/x86/kvm/svm.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
>diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>index d96092b..bf1ded4 100644
>--- a/arch/x86/kvm/svm.c
>+++ b/arch/x86/kvm/svm.c
>@@ -262,7 +262,7 @@ struct amd_svm_iommu_ir {
> };
> 
> #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK  (0xFF)
>-#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK  (1 << 31)
>+#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK  (1UL << 31)

It is reasonable to change to unsigned, while not necessary to unsigned
long?

> 
> #define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK  (0xFFULL)
> #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK  (0xFFULL << 12)
>-- 
>1.8.3.1
>

-- 
Wei Yang
Help you, Help me

Re: [PATCH 2/3] namei: implement AT_THIS_ROOT chroot-like path resolution

2018-10-05 Thread Aleksa Sarai

On 2018-10-05, Jann Horn  wrote:
> > What if we took rename_lock (call it nd->r_seq) at the start of the
> > resolution, and then only tried the __d_path-style check
> >
> >   if (read_seqretry(&rename_lock, nd->r_seq) ||
> >   read_seqretry(&mount_lock, nd->m_seq))
> >   /* do the __d_path lookup. */
> >
> > That way you would only hit the slow path if there were concurrent
> > renames or mounts *and* you are doing a path resolution with
> > AT_THIS_ROOT or AT_BENEATH. I've attached a modified patch that does
> > this (and after some testing it also appears to work).
> 
> Yeah, I think that might do the job.

*phew* I was all out of other ideas. :P

> > ---
> >  fs/namei.c | 49 ++---
> >  1 file changed, 46 insertions(+), 3 deletions(-)
> >
> > diff --git a/fs/namei.c b/fs/namei.c
> > index 6f995e6de6b1..12c9be175cb4 100644
> > --- a/fs/namei.c
> > +++ b/fs/namei.c
> > @@ -493,7 +493,7 @@ struct nameidata {
> > struct path root;
> > struct inode*inode; /* path.dentry.d_inode */
> > unsigned intflags;
> > -   unsignedseq, m_seq;
> > +   unsignedseq, m_seq, r_seq;
> > int last_type;
> > unsigneddepth;
> > int total_link_count;
> > @@ -1375,6 +1375,27 @@ static int follow_dotdot_rcu(struct nameidata *nd)
> > return -EXDEV;
> > break;
> > }
> > +   if (unlikely((nd->flags & (LOOKUP_BENEATH | LOOKUP_CHROOT)) 
> > &&
> > +(read_seqretry(&rename_lock, nd->r_seq) ||
> > + read_seqretry(&mount_lock, nd->m_seq {
> > +   char *pathbuf, *pathptr;
> > +
> > +   nd->r_seq = read_seqbegin(&rename_lock);
> > +   /* Cannot take m_seq here. */
> > +
> > +   pathbuf = kmalloc(PATH_MAX, GFP_ATOMIC);
> > +   if (!pathbuf)
> > +   return -ECHILD;
> > +   pathptr = __d_path(&nd->path, &nd->root, pathbuf, 
> > PATH_MAX);
> > +   kfree(pathbuf);
> 
> You're doing this check before actually looking up the parent, right?
> So as long as I don't trigger the "path_equal(&nd->path, &nd->root)"
> check that you do for O_BENEATH, escaping up by one level is possible,
> right? You should probably move this check so that it happens after
> following "..".

Yup, you're right. I'll do that.

> (Also: I assume that you're going to get rid of that memory allocation
> in a future version.)

Sure. Would you prefer adding some scratch space in nameidata, or that I
change __d_path so it accepts NULL as the buffer (and thus it doesn't
actually do any string operations)?

> > if (nd->path.dentry != nd->path.mnt->mnt_root) {
> > int ret = path_parent_directory(&nd->path);
> > if (ret)
> > @@ -2269,6 +2311,9 @@ static const char *path_init(struct nameidata *nd, 
> > unsigned flags)
> > nd->last_type = LAST_ROOT; /* if there are only slashes... */
> > nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
> > nd->depth = 0;
> > +   nd->m_seq = read_seqbegin(&mount_lock);
> > +   nd->r_seq = read_seqbegin(&rename_lock);
> 
> This means that now, attempting to perform a lookup while something is
> holding the rename_lock will spin on the lock. I don't know whether
> that's a problem in practice though. Does anyone on this thread know
> whether this is problematic?

I could make it so that we only take &rename_lock
  if (nd->flags & (FOLLOW_BENEATH | FOLLOW_CHROOT)),
since it's not used outside of that path.

-- 
Aleksa Sarai
Senior Software Engineer (Containers)
SUSE Linux GmbH



signature.asc
Description: PGP signature

Re: [POC][RFC][PATCH 1/2] jump_function: Addition of new feature "jump_function"

2018-10-05 Thread Steven Rostedt

On Fri, 05 Oct 2018 21:51:11 -0400
Steven Rostedt  wrote:

> +#ifndef PARAMS
> +#define PARAMS(x...) x
> +#endif
> +
> +#ifndef ARGS
> +#define ARGS(x...) x
> +#endif
> +

This is also leftover from the first attempt and can be nuked.

Yeah, yeah, I should have reviewed my patches better before sending
them. But I was so excited that I got it working I just wanted to share
the joy!

-- Steve

linux-next: Signed-off-by missing for commit in the usb-gadget tree

2018-10-05 Thread Stephen Rothwell

Hi Felipe,

Commit

  89969a842e72 ("usb: gadget: uvc: configfs: Sort frame intervals upon writing")

is missing a Signed-off-by from its committer.

-- 
Cheers,
Stephen Rothwell


pgpfRrjX7qdXu.pgp
Description: OpenPGP digital signature

Re: [POC][RFC][PATCH 1/2] jump_function: Addition of new feature "jump_function"

2018-10-05 Thread Steven Rostedt

On Fri, 05 Oct 2018 21:51:11 -0400
Steven Rostedt  wrote:

> +#define arch_dynfunc_trampoline(name, def)   \
> + asm volatile (  \
> + ".globl dynfunc_" #name "; \n\t"\
> + "dynfunc_" #name ": \n\t"   \
> + "jmp " #def " \n\t" \
> + ".balign 8 \n \t"   \
> + : : : "memory" )
> +

Note, the assembler can easily put in a two byte jump here. The .balign
was suppose to also have some padding (nop) incase that happens. It's
fine, because we can just replace it with a 5 byte jump, as long as we
have 3 bytes afterward if it is a two byte jump.

-- Steve

Re: [POC][RFC][PATCH 1/2] jump_function: Addition of new feature "jump_function"

2018-10-05 Thread Steven Rostedt

On Fri, 05 Oct 2018 21:51:11 -0400
Steven Rostedt  wrote:

> From: "Steven Rostedt (VMware)" 
> 
> Signed-off-by: Steven Rostedt (VMware) 
> ---
>  include/asm-generic/vmlinux.lds.h |   4 +
>  include/linux/jump_function.h |  93 
>  kernel/Makefile   |   2 +-
>  kernel/jump_function.c| 368 ++
>  4 files changed, 466 insertions(+), 1 deletion(-)
>  create mode 100644 include/linux/jump_function.h
>  create mode 100644 kernel/jump_function.c
> 
> diff --git a/include/asm-generic/vmlinux.lds.h 
> b/include/asm-generic/vmlinux.lds.h
> index 7b75ff6e2fce..0e205069ff36 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -257,6 +257,10 @@
>   __start___jump_table = .;   \
>   KEEP(*(__jump_table))   \
>   __stop___jump_table = .;\
> + . = ALIGN(8);   \
> + __start___dynfunc_table = .;\
> + KEEP(*(__dynfunc_table))\
> + __stop___dynfunc_table = .; \
>   . = ALIGN(8);   \
>   __start___verbose = .;  \
>   KEEP(*(__verbose))  \
>

BAH, this is leftover from my first attempt. It's not needed and can be
nuked.

-- Steve

linux-next: Signed-off-by missing for commit in the integrity tree

2018-10-05 Thread Stephen Rothwell

Hi all,

Commit

  3dcee2d9c069 ("ima: fix showing large 'violations' or 
'runtime_measurements_count'")

is missing a Signed-off-by from its committer.

-- 
Cheers,
Stephen Rothwell


pgp4cZbLWM5sN.pgp
Description: OpenPGP digital signature

[POC][RFC][PATCH 0/2] PROOF OF CONCEPT: Dynamic Functions (jump functions)

2018-10-05 Thread Steven Rostedt



This is just a Proof Of Concept (POC), as I have done some "no no"s like
having x86 asm code in generic code paths, and it also needs a way of
working when an arch does not support this feature. Not to mention, I didn't
add proper change logs (that will come later).

Background:

 During David Woodhouse's presentation on Spectre and Meltdown at Kernel
Recipes he talked about how retpolines are implemented. I haven't had time
to look at the details so I haven't given it much thought. But as he
demonstrated that it has a measurable overhead on indirect calls, I realized
how much this can affect tracepoints. Tracepoints are implemented with
indirect calls, where the code iterates over an array calling each callback
that has registered with the tracepoint.

I ran a test to see how much overhead this entails.

With RETPOLINE disabled (CONFIG_RETPOLINE=n):

# trace-cmd start -e all
# perf stat -r 10 /work/c/hackbench 50
Time: 29.369
Time: 28.998
Time: 28.816
Time: 28.734
Time: 29.034
Time: 28.631
Time: 28.594
Time: 28.762
Time: 28.915
Time: 28.741

 Performance counter stats for '/work/c/hackbench 50' (10 runs):

 232926.801609  task-clock (msec) #7.465 CPUs utilized  
  ( +-  0.26% )
 3,175,526  context-switches  #0.014 M/sec  
  ( +-  0.50% )
   394,920  cpu-migrations#0.002 M/sec  
  ( +-  1.71% )
44,273  page-faults   #0.190 K/sec  
  ( +-  1.06% )
   859,904,212,284  cycles#3.692 GHz
  ( +-  0.26% )
   526,010,328,375  stalled-cycles-frontend   #   61.17% frontend cycles 
idle ( +-  0.26% )
   799,414,387,443  instructions  #0.93  insn per cycle
  #0.66  stalled cycles per 
insn  ( +-  0.25% )
   157,516,396,866  branches  #  676.248 M/sec  
  ( +-  0.25% )
   445,888,666  branch-misses #0.28% of all branches
  ( +-  0.19% )

  31.201263687 seconds time elapsed 
 ( +-  0.24% )

With RETPOLINE enabled (CONFIG_RETPOLINE=y)

# trace-cmd start -e all
# perf stat -r 10 /work/c/hackbench 50
Time: 31.087
Time: 31.180
Time: 31.250
Time: 30.905
Time: 31.024
Time: 32.056
Time: 31.312
Time: 31.409
Time: 31.451
Time: 31.275

 Performance counter stats for '/work/c/hackbench 50' (10 runs):

 252893.216212  task-clock (msec) #7.444 CPUs utilized  
  ( +-  0.31% )
 3,218,524  context-switches  #0.013 M/sec  
  ( +-  0.45% )
   427,129  cpu-migrations#0.002 M/sec  
  ( +-  1.52% )
43,666  page-faults   #0.173 K/sec  
  ( +-  0.92% )
   933,615,337,142  cycles#3.692 GHz
  ( +-  0.31% )
   593,141,521,286  stalled-cycles-frontend   #   63.53% frontend cycles 
idle ( +-  0.32% )
   806,848,677,318  instructions  #0.86  insn per cycle
  #0.74  stalled cycles per 
insn  ( +-  0.30% )
   161,289,933,342  branches  #  637.779 M/sec  
  ( +-  0.29% )
 2,070,719,044  branch-misses #1.28% of all branches
  ( +-  0.25% )

  33.971942318 seconds time elapsed 
 ( +-  0.28% )


What the above represents, is running "hackbench 50" with all trace events
enabled, went from: 31.201263687 to: 33.971942318 to perform, which is an
8.9% increase!

So I thought about how to solve this, and came up with "jump_functions".
These are similar to jump_labels, but instead of having a static branch, we
would have a dynamic function. A function "dynfunc_X()" that can be assigned
any other function, just as if it was a variable, and have it call the new
function. Talking with other kernel developers at Kernel Recipes, I was told
that this feature would be useful for other subsystems in the kernel and not
just for tracing.

The first attempt created a call in inline assembly, and did macro tricks to
create the parameters, but this was overly complex, especially when one of
the trace events has 12 parameters!

Then I decided to simplify it to have the dynfunc_X() call a trampoline,
that does a direct jump. It's similar to what a retpoline does, but a
retpoline does an indirect jump. A direct jump is much more efficient.

When changing what function a dynamic function should call, text_poke_bp()
is used to modify the trampoline to call the new function.

The first "no change log" patch implements the dynamic function (poorly, as
its just a proof of concept), and the second "no change log" patch
implements a way that tracepoints can take advantage of it.

The tracepoints c

[POC][RFC][PATCH 1/2] jump_function: Addition of new feature "jump_function"

2018-10-05 Thread Steven Rostedt

From: "Steven Rostedt (VMware)" 

Signed-off-by: Steven Rostedt (VMware) 
---
 include/asm-generic/vmlinux.lds.h |   4 +
 include/linux/jump_function.h |  93 
 kernel/Makefile   |   2 +-
 kernel/jump_function.c| 368 ++
 4 files changed, 466 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/jump_function.h
 create mode 100644 kernel/jump_function.c

diff --git a/include/asm-generic/vmlinux.lds.h 
b/include/asm-generic/vmlinux.lds.h
index 7b75ff6e2fce..0e205069ff36 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -257,6 +257,10 @@
__start___jump_table = .;   \
KEEP(*(__jump_table))   \
__stop___jump_table = .;\
+   . = ALIGN(8);   \
+   __start___dynfunc_table = .;\
+   KEEP(*(__dynfunc_table))\
+   __stop___dynfunc_table = .; \
. = ALIGN(8);   \
__start___verbose = .;  \
KEEP(*(__verbose))  \
diff --git a/include/linux/jump_function.h b/include/linux/jump_function.h
new file mode 100644
index ..8c6b0bab5f10
--- /dev/null
+++ b/include/linux/jump_function.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_JUMP_FUNCTION_H
+#define _LINUX_JUMP_FUNCTION_H
+
+
+ This all should be in arch/x86/include/asm
+
+typedef long dynfunc_t;
+
+struct dynfunc_struct;
+
+#define arch_dynfunc_trampoline(name, def) \
+   asm volatile (  \
+   ".globl dynfunc_" #name "; \n\t"\
+   "dynfunc_" #name ": \n\t"   \
+   "jmp " #def " \n\t" \
+   ".balign 8 \n \t"   \
+   : : : "memory" )
+
+int arch_assign_dynamic_function(const struct dynfunc_struct *dynfunc, void 
*func);
+
+ The below should be in include/linux
+
+#ifndef PARAMS
+#define PARAMS(x...) x
+#endif
+
+#ifndef ARGS
+#define ARGS(x...) x
+#endif
+
+struct dynfunc_struct {
+   const void  *dynfunc;
+   void*func;
+};
+
+int assign_dynamic_function(const struct dynfunc_struct *dynfunc, void *func);
+
+/*
+ * DECLARE_DYNAMIC_FUNCTION - Declaration to create a dynamic function call
+ * @name: The name of the function call to create
+ * @proto: The proto-type of the function (up to 4 args)
+ * @args: The arguments used by @proto
+ *
+ * This macro creates the function that can by used to create a dynamic
+ * function call later. It also creates the function to modify what is
+ * called:
+ *
+ *   dynfunc_[name](args);
+ *
+ * This is placed in the code where the dynamic function should be called
+ * from.
+ *
+ *   assign_dynamic_function_[name](func);
+ *
+ * This is used to make the dynfunc_[name]() call a different function.
+ * It will then call (func) instead.
+ *
+ * This must be added in a header for users of the above two functions.
+ */
+#define DECLARE_DYNAMIC_FUNCTION(name, proto, args)\
+   extern struct dynfunc_struct ___dyn_func__##name;   \
+   static inline int assign_dynamic_function_##name(int(*func)(proto)) { \
+   return assign_dynamic_function(&___dyn_func__##name, func); \
+   }   \
+   extern int dynfunc_##name(proto)
+
+/*
+ * DEFINE_DYNAMIC_FUNCTION - Define the dynamic function and default
+ * @name: The name of the function call to create
+ * @def: The default function to call
+ * @proto: The proto-type of the function (up to 4 args)
+ *
+ * Must be placed in a C file.
+ *
+ * This sets up the dynamic function that other places may call
+ * dynfunc_[name]().
+ *
+ * It defines the default function that the dynamic function will start
+ * out calling at boot up.
+ */
+#define DEFINE_DYNAMIC_FUNCTION(name, def, proto)  \
+   static void __used __dyn_func_trampoline_##name(void)   \
+   {   \
+   arch_dynfunc_trampoline(name, def); \
+   unreachable();  \
+   }   \
+   struct dynfunc_struct ___dyn_func__##name __used = {\
+   .dynfunc= (void *)dynfunc_##name,   \
+   .func   = def,  \
+   }
+
+#endif /*  _LINUX_JUMP_FUNCTION_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 7a63d567fdb5..c647c7f153

[POC][RFC][PATCH 2/2] tracepoints: Implement it with dynamic functions

2018-10-05 Thread Steven Rostedt

From: "Steven Rostedt (VMware)" 

Signed-off-by: Steven Rostedt (VMware) 
---
 include/linux/tracepoint-defs.h |  3 ++
 include/linux/tracepoint.h  | 65 ++---
 include/trace/define_trace.h| 14 +++
 kernel/tracepoint.c | 29 +--
 4 files changed, 79 insertions(+), 32 deletions(-)

diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 22c5a46e9693..a9d267be98de 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -11,6 +11,8 @@
 #include 
 #include 
 
+struct dynfunc_struct;
+
 struct trace_print_flags {
unsigned long   mask;
const char  *name;
@@ -30,6 +32,7 @@ struct tracepoint_func {
 struct tracepoint {
const char *name;   /* Tracepoint name */
struct static_key key;
+   struct dynfunc_struct *dynfunc;
int (*regfunc)(void);
void (*unregfunc)(void);
struct tracepoint_func __rcu *funcs;
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 041f7e56a289..800c1b025e1f 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct module;
 struct tracepoint;
@@ -94,7 +95,9 @@ extern int syscall_regfunc(void);
 extern void syscall_unregfunc(void);
 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
 
+#ifndef PARAMS
 #define PARAMS(args...) args
+#endif
 
 #define TRACE_DEFINE_ENUM(x)
 #define TRACE_DEFINE_SIZEOF(x)
@@ -138,12 +141,11 @@ extern void syscall_unregfunc(void);
  * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
  * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
  */
-#define __DO_TRACE(tp, proto, args, cond, rcuidle) \
+#define __DO_TRACE(name, proto, args, cond, rcuidle)   \
do {\
struct tracepoint_func *it_func_ptr;\
-   void *it_func;  \
-   void *__data;   \
int __maybe_unused idx = 0; \
+   void *__data;   \
\
if (!(cond))\
return; \
@@ -163,14 +165,11 @@ extern void syscall_unregfunc(void);
rcu_irq_enter_irqson(); \
}   \
\
-   it_func_ptr = rcu_dereference_raw((tp)->funcs); \
-   \
+   it_func_ptr =   \
+   rcu_dereference_raw((&__tracepoint_##name)->funcs); \
if (it_func_ptr) {  \
-   do {\
-   it_func = (it_func_ptr)->func;  \
-   __data = (it_func_ptr)->data;   \
-   ((void(*)(proto))(it_func))(args);  \
-   } while ((++it_func_ptr)->func);\
+   __data = (it_func_ptr)->data;   \
+   dynfunc_tp_func_##name(args);   \
}   \
\
if (rcuidle) {  \
@@ -186,7 +185,7 @@ extern void syscall_unregfunc(void);
static inline void trace_##name##_rcuidle(proto)\
{   \
if (static_key_false(&__tracepoint_##name.key)) \
-   __DO_TRACE(&__tracepoint_##name,\
+   __DO_TRACE(name,\
TP_PROTO(data_proto),   \
TP_ARGS(data_args), \
TP_CONDITION(cond), 1); \
@@ -208,11 +207,13 @@ extern void syscall_unregfunc(void);
  * poking RCU a bit.
  */
 #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
+   DECLARE_DYNAMIC_FUNCTION(tp_func_##name, PARAMS(data_proto),\
+PARAMS(data_args));\
extern struct tracepoint __tracepoint_##name;

Re: [PATCH v9 04/10] x86: refcount: prevent gcc distortions

2018-10-05 Thread Rasmus Villemoes

On 2018-10-04 21:33, H. Peter Anvin wrote:

> Here is the horrible code I mentioned yesterday.  This is about
> implementing the immediate-patching framework that Linus and others have
> discussed (it helps both performance and kernel hardening):

Heh, I did a POC in userspace some years ago for loading an "eventually
constant" value into a register - the idea being to avoid a load in
cases like kmemcache_alloc(foo_cachep) or kmemcache_free(foo_cachep, p),
and I assume this is something along the same lines? I didn't do
anything with it since I had no idea if the performance gain would be
worth it, and at the time (before __ro_after_init) there was no good way
to know that the values would really be constant eventually. Also, I had
hoped to come up with a way to avoid having to annotate the loads.

I just tried expanding this to deal with some of the hash tables sized
at init time which I can see was also mentioned on LKML some time ago.
I'm probably missing something fundamental, but there's some sorta
working code at https://github.com/villemoes/rai which is not too
horrible (IMO). Attaching gcc at various times and doing disassembly
shows that the patching does take place. Can I get you to take a look at
raimacros.S and tell me why that won't work?

Thanks,
Rasmus

[PATCH] staging: iio: ad2s1210: fix 'assignment operator' style checks

2018-10-05 Thread Matheus Tavares Bernardino

This patch fixes all "Assignment operator '=' should be on the previous
line" checks found in ad2s1210.c by checkpatch.pl.

Signed-off-by: Matheus Tavares 
---
 drivers/staging/iio/resolver/ad2s1210.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/iio/resolver/ad2s1210.c
b/drivers/staging/iio/resolver/ad2s1210.c
index ac13b99bd9cb..d4b1c2c010f2 100644
--- a/drivers/staging/iio/resolver/ad2s1210.c
+++ b/drivers/staging/iio/resolver/ad2s1210.c
@@ -301,8 +301,8 @@ static ssize_t ad2s1210_store_control(struct device *dev,
 "ad2s1210: write control register fail\n");
 goto error_ret;
 }
-st->resolution
-= ad2s1210_resolution_value[data & AD2S1210_SET_RESOLUTION];
+st->resolution =
+ad2s1210_resolution_value[data & AD2S1210_SET_RESOLUTION];
 if (st->pdata->gpioin) {
 data = ad2s1210_read_resolution_pin(st);
 if (data != st->resolution)
@@ -363,8 +363,8 @@ static ssize_t ad2s1210_store_resolution(struct device *dev,
 dev_err(dev, "ad2s1210: setting resolution fail\n");
 goto error_ret;
 }
-st->resolution
-= ad2s1210_resolution_value[data & AD2S1210_SET_RESOLUTION];
+st->resolution =
+ad2s1210_resolution_value[data & AD2S1210_SET_RESOLUTION];
 if (st->pdata->gpioin) {
 data = ad2s1210_read_resolution_pin(st);
 if (data != st->resolution)
-- 
2.18.0

Re: [PATCH] staging/rtlwifi: Fixing formatting warnings from checkpatch.pl.

2018-10-05 Thread Scott Tracy

On Fri, Oct 5, 2018 at 6:37 PM Joe Perches  wrote:
>
> On Fri, 2018-10-05 at 16:58 -0600, Scott Tracy wrote:
> > Fixing formatting warnings in rtlwifi found by checkpatch.pl
> > Changes include breaking up functions calls into multi line calls.
> > No functional/logical changes.
>
> I believe the code is better before most of these changes.
>
> There are various tradeoffs do source code formatting.
>
> Here you are changing some alignment to open parentheses
> and converting to < 80 columns.
>
>

Joe,
Thanks for the criticism. Fair comments all. I was just working
through the KernelNewbies.org tutorial looking for low value changes
to get my feet wet and assumed that a "check" was better than a
"warning". A also didn't feel confident enough to refactor the code to
get under the 80 character limit. Maybe I will spend a little more
time refactoring something and then submitting that. Thanks again,

Scott Tracy

Re: [PATCH] staging/rtlwifi: Fixing formatting warnings from checkpatch.pl.

2018-10-05 Thread Joe Perches

On Fri, 2018-10-05 at 16:58 -0600, Scott Tracy wrote:
> Fixing formatting warnings in rtlwifi found by checkpatch.pl
> Changes include breaking up functions calls into multi line calls.
> No functional/logical changes. 

I believe the code is better before most of these changes.

There are various tradeoffs do source code formatting.

Here you are changing some alignment to open parentheses
and converting to < 80 columns.

Re: [PATCH] fs/cifs: fix uninitialised variable warnings

2018-10-05 Thread Steve French

merged into cifs-2.6.git for-next
On Thu, Oct 4, 2018 at 3:16 AM Aurélien Aptel  wrote:
>
> Reviewed-by: Aurelien Aptel 
> --
> Aurélien Aptel / SUSE Labs Samba Team
> GPG: 1839 CB5F 9F5B FB9B AA97  8C99 03C8 A49B 521B D5D3
> SUSE Linux GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
> GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)



-- 
Thanks,

Steve

Re: [PATCH v4.19-rc7] treewide: Replace more open-coded allocation size multiplications

2018-10-05 Thread Greg KH

On Fri, Oct 05, 2018 at 05:04:16PM -0700, Kees Cook wrote:
> On Fri, Oct 5, 2018 at 4:51 PM, Greg KH  wrote:
> > On Fri, Oct 05, 2018 at 04:35:59PM -0700, Kees Cook wrote:
> >> As done treewide earlier, this catches several more open-coded
> >> allocation size calculations that were added to the kernel during the
> >> merge window. This performs the following mechanical transformations
> >> using Coccinelle:
> >>
> >>   kvmalloc(a * b, ...) -> kvmalloc_array(a, b, ...)
> >>   kvzalloc(a * b, ...) -> kvcalloc(a, b, ...)
> >>   devm_kzalloc(..., a * b, ...) -> devm_kcalloc(..., a, b, ...)
> >>
> >> Signed-off-by: Kees Cook 
> >
> > Has this had any testing in linux-next?
> 
> No; they're mechanical transformations (though I did build test them).
> If you want I could add this to linux-next for a week?

That would be good, thanks.

> > And when was "earlier"?
> 
> v4.18, when all of these were originally eliminated:
> 
> 026f05079b00 treewide: Use array_size() in f2fs_kzalloc()
> c86065938aab treewide: Use array_size() in f2fs_kmalloc()
> 76e43e37a407 treewide: Use array_size() in sock_kmalloc()
> 84ca176bf54a treewide: Use array_size() in kvzalloc_node()
> fd7becedb1f0 treewide: Use array_size() in vzalloc_node()
> fad953ce0b22 treewide: Use array_size() in vzalloc()
> 42bc47b35320 treewide: Use array_size() in vmalloc()
> a86854d0c599 treewide: devm_kzalloc() -> devm_kcalloc()
> 3c4211ba8ad8 treewide: devm_kmalloc() -> devm_kmalloc_array()
> 778e1cdd81bb treewide: kvzalloc() -> kvcalloc()
> 344476e16acb treewide: kvmalloc() -> kvmalloc_array()
> 590b5b7d8671 treewide: kzalloc_node() -> kcalloc_node()
> 6396bb221514 treewide: kzalloc() -> kcalloc()
> 6da2ec56059c treewide: kmalloc() -> kmalloc_array()
> 
> The new patch is catching new open-coded multiplications introduced in v4.19.

As this is getting smaller, why not just break it up and do it through
all of the different subsystems instead of one large patch?

And do we have a way to add a rule to 0-day to catch these so that they
get a warning when they are added again?

thanks,

greg k-h

[PATCH v1 1/2] x86/cpufeature: Add facility to match microcode revisions

2018-10-05 Thread Andi Kleen

From: Andi Kleen 

For bug workarounds or checks it is useful to check for specific
microcode versions. Add a new table format to check for steppings
with min/max microcode revisions.

This does not change the existing x86_cpu_id because it's an ABI
shared with modutils, and also has quite difference requirements,
as in no wildcards, but everything has to be matched exactly.

Signed-off-by: Andi Kleen 
---
 arch/x86/include/asm/cpu_device_id.h | 22 ++
 arch/x86/kernel/cpu/match.c  | 43 
 2 files changed, 65 insertions(+)

diff --git a/arch/x86/include/asm/cpu_device_id.h 
b/arch/x86/include/asm/cpu_device_id.h
index baeba0567126..bfd5438c 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -11,4 +11,26 @@
 
 extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
 
+/*
+ * Match specific microcodes or steppings.
+ *
+ * vendor/family/model/stepping must be all set.
+ * min_ucode/max_ucode/driver_data are optional and can be 0.
+ */
+
+struct x86_ucode_id {
+   __u16 vendor;
+   __u16 family;
+   __u16 model;
+   __u16 stepping;
+   __u32 min_ucode;
+   __u32 max_ucode;
+   kernel_ulong_t driver_data;
+};
+
+extern const struct x86_ucode_id *
+x86_match_ucode_cpu(int cpu, const struct x86_ucode_id *match);
+extern const struct x86_ucode_id *
+x86_match_ucode_all(const struct x86_ucode_id *match);
+
 #endif
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 3fed38812eea..f29a21b2809c 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -48,3 +48,46 @@ const struct x86_cpu_id *x86_match_cpu(const struct 
x86_cpu_id *match)
return NULL;
 }
 EXPORT_SYMBOL(x86_match_cpu);
+
+const struct x86_ucode_id *x86_match_ucode_cpu(int cpu,
+  const struct x86_ucode_id *match)
+{
+   const struct x86_ucode_id *m;
+   struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+   for (m = match; m->vendor | m->family | m->model; m++) {
+   if (c->x86_vendor != m->vendor)
+   continue;
+   if (c->x86 != m->family)
+   continue;
+   if (c->x86_model != m->model)
+   continue;
+   if (c->x86_stepping != m->stepping)
+   continue;
+   if (m->min_ucode && c->microcode < m->min_ucode)
+   continue;
+   if (m->max_ucode && c->microcode > m->max_ucode)
+   continue;
+   return m;
+   }
+   return NULL;
+}
+
+/* Check all CPUs */
+const struct x86_ucode_id *x86_match_ucode_all(const struct x86_ucode_id 
*match)
+{
+   int cpu;
+   const struct x86_ucode_id *all_m = NULL;
+   bool first = true;
+
+   for_each_online_cpu(cpu) {
+   const struct x86_ucode_id *m = x86_match_ucode_cpu(cpu, match);
+
+   if (first)
+   all_m = m;
+   else if (m != all_m)
+   return NULL;
+   first = false;
+   }
+   return all_m;
+}
-- 
2.17.1

Re: [PATCH v4.19-rc7] treewide: Replace more open-coded allocation size multiplications

2018-10-05 Thread Kees Cook

On Fri, Oct 5, 2018 at 4:51 PM, Greg KH  wrote:
> On Fri, Oct 05, 2018 at 04:35:59PM -0700, Kees Cook wrote:
>> As done treewide earlier, this catches several more open-coded
>> allocation size calculations that were added to the kernel during the
>> merge window. This performs the following mechanical transformations
>> using Coccinelle:
>>
>>   kvmalloc(a * b, ...) -> kvmalloc_array(a, b, ...)
>>   kvzalloc(a * b, ...) -> kvcalloc(a, b, ...)
>>   devm_kzalloc(..., a * b, ...) -> devm_kcalloc(..., a, b, ...)
>>
>> Signed-off-by: Kees Cook 
>
> Has this had any testing in linux-next?

No; they're mechanical transformations (though I did build test them).
If you want I could add this to linux-next for a week?

> And when was "earlier"?

v4.18, when all of these were originally eliminated:

026f05079b00 treewide: Use array_size() in f2fs_kzalloc()
c86065938aab treewide: Use array_size() in f2fs_kmalloc()
76e43e37a407 treewide: Use array_size() in sock_kmalloc()
84ca176bf54a treewide: Use array_size() in kvzalloc_node()
fd7becedb1f0 treewide: Use array_size() in vzalloc_node()
fad953ce0b22 treewide: Use array_size() in vzalloc()
42bc47b35320 treewide: Use array_size() in vmalloc()
a86854d0c599 treewide: devm_kzalloc() -> devm_kcalloc()
3c4211ba8ad8 treewide: devm_kmalloc() -> devm_kmalloc_array()
778e1cdd81bb treewide: kvzalloc() -> kvcalloc()
344476e16acb treewide: kvmalloc() -> kvmalloc_array()
590b5b7d8671 treewide: kzalloc_node() -> kcalloc_node()
6396bb221514 treewide: kzalloc() -> kcalloc()
6da2ec56059c treewide: kmalloc() -> kmalloc_array()

The new patch is catching new open-coded multiplications introduced in v4.19.

-Kees

-- 
Kees Cook
Pixel Security

Re: [PATCH v2 2/3] mm: introduce put_user_page[s](), placeholder versions

2018-10-05 Thread John Hubbard

On 10/5/18 2:48 PM, Jason Gunthorpe wrote:
> On Fri, Oct 05, 2018 at 12:49:06PM -0700, John Hubbard wrote:
>> On 10/5/18 8:17 AM, Jason Gunthorpe wrote:
>>> On Thu, Oct 04, 2018 at 09:02:24PM -0700, john.hubb...@gmail.com wrote:
 From: John Hubbard 

 Introduces put_user_page(), which simply calls put_page().
 This provides a way to update all get_user_pages*() callers,
 so that they call put_user_page(), instead of put_page().

 Also introduces put_user_pages(), and a few dirty/locked variations,
 as a replacement for release_pages(), for the same reasons.
 These may be used for subsequent performance improvements,
 via batching of pages to be released.

 This prepares for eventually fixing the problem described
 in [1], and is following a plan listed in [2], [3], [4].

 [1] https://lwn.net/Articles/753027/ : "The Trouble with get_user_pages()"

 [2] https://lkml.kernel.org/r/20180709080554.21931-1-jhubb...@nvidia.com
 Proposed steps for fixing get_user_pages() + DMA problems.

 [3]https://lkml.kernel.org/r/20180710082100.mkdwngdv5kkrc...@quack2.suse.cz
 Bounce buffers (otherwise [2] is not really viable).

 [4] https://lkml.kernel.org/r/20181003162115.gg24...@quack2.suse.cz
 Follow-up discussions.

>> [...]

 +/* Placeholder version, until all get_user_pages*() callers are updated. 
 */
 +static inline void put_user_page(struct page *page)
 +{
 +  put_page(page);
 +}
 +
 +/* For get_user_pages*()-pinned pages, use these variants instead of
 + * release_pages():
 + */
 +static inline void put_user_pages_dirty(struct page **pages,
 +  unsigned long npages)
 +{
 +  while (npages) {
 +  set_page_dirty(pages[npages]);
 +  put_user_page(pages[npages]);
 +  --npages;
 +  }
 +}
>>>
>>> Shouldn't these do the !PageDirty(page) thing?
>>>
>>
>> Well, not yet. This is the "placeholder" patch, in which I planned to keep
>> the behavior the same, while I go to all the get_user_pages call sites and 
>> change 
>> put_page() and release_pages() over to use these new routines.
> 
> Hmm.. Well, if it is the right thing to do here, why not include it and
> take it out of callers when doing the conversion?
> 
> If it is the wrong thing, then let us still take it out of callers
> when doing the conversion :)
> 
> Just seems like things will be in a better place to make future
> changes if all the call sights are de-duplicated and correct.
> 

OK, yes. Let me send out a v3 with that included, then.

thanks,
-- 
John Hubbard
NVIDIA

Re: [PATCH v4.19-rc7] treewide: Replace more open-coded allocation size multiplications

2018-10-05 Thread Greg KH

On Fri, Oct 05, 2018 at 04:35:59PM -0700, Kees Cook wrote:
> As done treewide earlier, this catches several more open-coded
> allocation size calculations that were added to the kernel during the
> merge window. This performs the following mechanical transformations
> using Coccinelle:
> 
>   kvmalloc(a * b, ...) -> kvmalloc_array(a, b, ...)
>   kvzalloc(a * b, ...) -> kvcalloc(a, b, ...)
>   devm_kzalloc(..., a * b, ...) -> devm_kcalloc(..., a, b, ...)
> 
> Signed-off-by: Kees Cook 

Has this had any testing in linux-next?

And when was "earlier"?

thanks,

greg k-h

Re: [PATCH v2 05/11] arch/x86: Introduce a new config parameter PLATFORM_QOS

2018-10-05 Thread Fenghua Yu

On Fri, Oct 05, 2018 at 08:55:52PM +, Moger, Babu wrote:
> Introduces a new config parameter PLATFORM_QOS.
> 
> This will be used as a common config parameter for both Intel and AMD.
> Each vendor will have their own config parameter to enable RDT feature.
> One for Intel(INTEL_RDT) and one for AMD(AMD_QOS). It can be enabled or
> disabled separately. The new parameter PLATFORM_QOS will be dependent
> on INTEL_RDT or AMD_QOS.
> 
> Signed-off-by: Babu Moger 
> ---
>  arch/x86/Kconfig | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 1a0be022f91d..7f2da780a327 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -458,6 +458,10 @@ config INTEL_RDT
>  
> Say N if unsure.
>  
> +config PLATFORM_QOS
> + def_bool y
> + depends on X86 && INTEL_RDT
> +

Can change "PLATFORM_QOS" to a more neutral name "RESCTRL"?

Thanks.

-Fenghua

Re: [PATCH] writeback: fix range_cyclic writeback vs writepages deadlock

2018-10-05 Thread Dave Chinner

On Fri, Oct 05, 2018 at 12:46:40PM -0700, Andrew Morton wrote:
> On Fri,  5 Oct 2018 15:45:26 +1000 Dave Chinner  wrote:
> 
> > From: Dave Chinner 
> > 
> > We've recently seen a workload on XFS filesystems with a repeatable
> > deadlock between background writeback and a multi-process
> > application doing concurrent writes and fsyncs to a small range of a
> > file.
> > 
> > ...
> > 
> > Signed-off-by: Dave Chinner 
> > Reviewed-by: Jan Kara 
> 
> Not a serious enough problem for a -stable backport?

Don't have enough evidence to say one way or another. The reported
incident was from a RHEL 7 kernel, so the bug has been there for
years in one form or another, but it's only ever been triggered by
this one-off custom workload.

I haven't done any analysis on older kernels, nor have I looked to see
if there's any gotchas that a stable backport might encounter. And I
tend not to change stuff in a path that is critical to data integrity
without at least doing enough due diligence to suggest a stable
backport would be fine.

You can mark it for stable backports if you want, but I'm not
prepared to because I haven't done the work necessary to ensure it's
safe to do so.

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com

[PATCH v4.19-rc7] treewide: Replace more open-coded allocation size multiplications

2018-10-05 Thread Kees Cook

As done treewide earlier, this catches several more open-coded
allocation size calculations that were added to the kernel during the
merge window. This performs the following mechanical transformations
using Coccinelle:

kvmalloc(a * b, ...) -> kvmalloc_array(a, b, ...)
kvzalloc(a * b, ...) -> kvcalloc(a, b, ...)
devm_kzalloc(..., a * b, ...) -> devm_kcalloc(..., a, b, ...)

Signed-off-by: Kees Cook 
---
 drivers/bluetooth/hci_qca.c |  2 +-
 drivers/crypto/inside-secure/safexcel.c |  8 +---
 drivers/gpu/drm/mediatek/mtk_drm_crtc.c |  2 +-
 drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c |  4 ++--
 drivers/hwmon/npcm750-pwm-fan.c |  2 +-
 drivers/md/dm-integrity.c   |  3 ++-
 drivers/net/wireless/mediatek/mt76/usb.c| 10 +-
 drivers/pci/controller/pcie-cadence.c   |  4 ++--
 drivers/tty/serial/qcom_geni_serial.c   |  4 ++--
 net/sched/sch_cake.c|  2 +-
 10 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index e182f6019f68..2fee65886d50 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -1322,7 +1322,7 @@ static int qca_init_regulators(struct qca_power *qca,
 {
int i;
 
-   qca->vreg_bulk = devm_kzalloc(qca->dev, num_vregs *
+   qca->vreg_bulk = devm_kcalloc(qca->dev, num_vregs,
  sizeof(struct regulator_bulk_data),
  GFP_KERNEL);
if (!qca->vreg_bulk)
diff --git a/drivers/crypto/inside-secure/safexcel.c 
b/drivers/crypto/inside-secure/safexcel.c
index 7e71043457a6..86c699c14f84 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -1044,7 +1044,8 @@ static int safexcel_probe(struct platform_device *pdev)
 
safexcel_configure(priv);
 
-   priv->ring = devm_kzalloc(dev, priv->config.rings * sizeof(*priv->ring),
+   priv->ring = devm_kcalloc(dev, priv->config.rings,
+ sizeof(*priv->ring),
  GFP_KERNEL);
if (!priv->ring) {
ret = -ENOMEM;
@@ -1063,8 +1064,9 @@ static int safexcel_probe(struct platform_device *pdev)
if (ret)
goto err_reg_clk;
 
-   priv->ring[i].rdr_req = devm_kzalloc(dev,
-   sizeof(priv->ring[i].rdr_req) * 
EIP197_DEFAULT_RING_SIZE,
+   priv->ring[i].rdr_req = devm_kcalloc(dev,
+   EIP197_DEFAULT_RING_SIZE,
+   sizeof(priv->ring[i].rdr_req),
GFP_KERNEL);
if (!priv->ring[i].rdr_req) {
ret = -ENOMEM;
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c 
b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index 0b976dfd04df..92ecb9bf982c 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -600,7 +600,7 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
}
 
mtk_crtc->layer_nr = mtk_ddp_comp_layer_nr(mtk_crtc->ddp_comp[0]);
-   mtk_crtc->planes = devm_kzalloc(dev, mtk_crtc->layer_nr *
+   mtk_crtc->planes = devm_kcalloc(dev, mtk_crtc->layer_nr,
sizeof(struct drm_plane),
GFP_KERNEL);
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c
index 790d39f816dc..b557687b1964 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c
@@ -153,8 +153,8 @@ int msm_dss_parse_clock(struct platform_device *pdev,
return 0;
}
 
-   mp->clk_config = devm_kzalloc(&pdev->dev,
- sizeof(struct dss_clk) * num_clk,
+   mp->clk_config = devm_kcalloc(&pdev->dev,
+ num_clk, sizeof(struct dss_clk),
  GFP_KERNEL);
if (!mp->clk_config)
return -ENOMEM;
diff --git a/drivers/hwmon/npcm750-pwm-fan.c b/drivers/hwmon/npcm750-pwm-fan.c
index 8474d601aa63..b998f9fbed41 100644
--- a/drivers/hwmon/npcm750-pwm-fan.c
+++ b/drivers/hwmon/npcm750-pwm-fan.c
@@ -908,7 +908,7 @@ static int npcm7xx_en_pwm_fan(struct device *dev,
if (fan_cnt < 1)
return -EINVAL;
 
-   fan_ch = devm_kzalloc(dev, sizeof(*fan_ch) * fan_cnt, GFP_KERNEL);
+   fan_ch = devm_kcalloc(dev, fan_cnt, sizeof(*fan_ch), GFP_KERNEL);
if (!fan_ch)
return -ENOMEM;
 
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 89ccb64342de..e1fa6baf4e8e 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -3462,7 +3462,8 @@ static int dm_integrity_ctr(struct dm_target *ti, 
unsigned argc, char **argv)
r = -ENOMEM;

Re: [GIT PULL] PCI fixes for v4.19

2018-10-05 Thread Greg Kroah-Hartman

On Fri, Oct 05, 2018 at 03:47:30PM -0500, Bjorn Helgaas wrote:
> PCI fixes:
> 
>   - Reprogram bridge prefetch registers to fix NVIDIA and Radeon issues
> after suspend/resume (Daniel Drake)
> 
>   - Fix mvebu I/O mapping creation sequence (Thomas Petazzoni)
> 
>   - Fix minor MAINTAINERS file match issue (Bjorn Helgaas)
> 

Now merged, thanks.

greg k-h

Re: [GIT PULL] GPIO fix for v4.19

2018-10-05 Thread Greg KH

On Fri, Oct 05, 2018 at 01:05:59PM +0200, Linus Walleij wrote:
> Hi Greg,
> 
> here is a single and hopefully final GPIO fix for the v4.19 series.
> Details in the signed tag.
> 
> Please pull it in!

Now merged, thanks.

greg k-h

Re: [GIT PULL] Power management fix for v4.19-rc7

2018-10-05 Thread Greg Kroah-Hartman

On Fri, Oct 05, 2018 at 11:59:09AM +0200, Rafael J. Wysocki wrote:
> Hi Greg,
> 
> Please pull from the tag
> 
>  git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git \
>  pm-4.19-rc7

Now merged, thanks.

greg k-h

Re: [GIT PULL] perf fixes

2018-10-05 Thread Greg Kroah-Hartman

On Fri, Oct 05, 2018 at 11:55:24AM +0200, Ingo Molnar wrote:
> 
> * Ingo Molnar  wrote:
> 
> > Linus,
> 
> ... and Greg as well!! ;-)

Heh, not a big deal :)

Now merged, thanks.

greg k-h

Re: [PATCH 1/2] mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings

2018-10-05 Thread Andrea Arcangeli

Hi,

On Fri, Oct 05, 2018 at 01:35:15PM -0700, David Rientjes wrote:
> Why is it ever appropriate to do heavy reclaim and swap activity to 
> allocate a transparent hugepage?  This is exactly what the __GFP_NORETRY 
> check for high-order allocations is attempting to avoid, and it explicitly 
> states that it is for thp faults.  The fact that we lost __GFP_NORERY for 
> thp allocations for all settings, including the default setting, other 
> than yours (setting of "always") is what I'm focusing on.  There is no 
> guarantee that this activity will free an entire pageblock or that it is 
> even worthwhile.

I tried to add just __GFP_NORETRY but it changes nothing. Try it
yourself if you think that can resolve the swap storm and excessive
reclaim CPU overhead... and see if it works. I didn't intend to
reinvent the wheel with __GFP_COMPACT_ONLY, if __GFP_NORETRY would
have worked. I tried adding __GFP_NORETRY first of course.

Reason why it doesn't help is: compaction fails because not enough
free RAM, reclaim is invoked, compaction succeeds, THP is allocated to
your lib user, compaction fails because not enough free RAM, reclaim
is invoked etc.. compact_result is not COMPACT_DEFERRED, but
COMPACT_SKIPPED.

See the part "reclaim is invoked" (with __GFP_THISNODE), is enough to
still create the same heavy swap storm and unfairly penalize all apps
with memory allocated in the local node like if your library had
actually the kernel privilege to run mbind or mlock, which is not ok.

Only __GFP_COMPACT_ONLY truly can avoid reclaim, the moment reclaim
can run with __GFP_THISNODE set, all bets are off and we're back to
square one, no difference (at best marginal difference) with
__GFP_NORETRY being set.

> That aside, removing __GFP_THISNODE can make the fault latency much worse 
> if remote notes are fragmented and/or reclaim has the inability to free 
> contiguous memory, which it likely cannot.  This is where I measured over 
> 40% fault latency regression from Linus's tree with this patch on a 
> fragmnented system where order-9 memory is neither available from node 0 
> or node 1 on Haswell.

Discussing the drawbacks of removing __GFP_THISNODE is an orthogonal
topic. __GFP_COMPACT_ONLY approach didn't have any of those drawbacks
about the remote latency because __GFP_THISNODE was still set at all
times, just as you like it. You seem to think __GFP_NORETRY will work
as well as __GFP_COMPACT_ONLY but it doesn't.

Calling compaction (and only compaction!) with __GFP_THISNODE set
doesn't break anything and that was what __GFP_COMPACT_ONLY was about.

> The behavior that MADV_HUGEPAGE specifies is certainly not clearly 
> defined, unfortunately.  The way that an application writer may read it, 
> as we have, is that it will make a stronger attempt at allocating a 
> hugepage at fault.  This actually works quite well when the allocation 
> correctly has __GFP_NORETRY, as it's supposed to, and compaction is 
> MIGRATE_ASYNC.

Like Mel said, your app just happens to fit in a local node, if the
user of the lib is slightly different and allocates 16G on a system
where each node is 4G, the post-fix MADV_HUGEPAGE will perform
extremely better also for the lib user.

And you know, if the lib user fits in one node, it can use mbind and
it won't hit OOM... and you'd need some capability giving the app
privilege anyway to keep MADV_HUGEPAGE as deep and unfair to the rest
of the processes running the local node (like mbind and mlock require
too).

Could you just run a test with the special lib and allocate 4 times
the size of a node, and see how the lib performs with upstream and
upstream+fix? Feel free to add __GFP_NORETRY anywhere you like in the
test of the upstream without fix.

The only constraint I would ask for the test (if the app using the lib
is not a massively multithreaded app, like qemu is, and you just
intend to run malloc(SIZEOFNODE*4); memset) is to run the app-lib
under "taskset -c 0". Otherwise NUMA balancing could move the the CPU
next to the last memory touched, which couldn't be done if each thread
accesses all ram at random from all 4 nodes at the same time (which is
a totally legitimate workload too and must not hit the "pathological
THP allocation performance").

> removed in a thp allocation.  I don't think anybody in this thread wants 
> 14% remote access latency regression if we allocate remotely or 40% fault 
> latency regression when remote nodes are fragmented as well.

Did you try the __GFP_COMPACT_ONLY patch? That won't have the 40%
fault latency already.

Also you're underestimating the benefit of THP given from remote nodes
for virt a bit, the 40% fault latency is not an issue when the
allocation is long lived, which is what MADV_HUGEPAGE is telling the
kernel, and the benefit of THP for guest is multiplied. It's more a
feature than a bug that 40% fault latency with MADV_HUGEPAGE set at
least for all long lived allocations (but if the allocations aren't
long lived, why should MADV_HUGEPAGE have

Re: [GIT PULL] scheduler fixes

2018-10-05 Thread Greg Kroah-Hartman

On Fri, Oct 05, 2018 at 11:50:17AM +0200, Ingo Molnar wrote:
> Greg,
> 
> Please pull the latest sched-urgent-for-linus git tree from:
> 
>git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
> sched-urgent-for-linus

Now merged, thanks.

greg k-h

Re: [GIT PULL] x86 fixes

2018-10-05 Thread Greg Kroah-Hartman

On Fri, Oct 05, 2018 at 11:53:54AM +0200, Ingo Molnar wrote:
> Greg,
> 
> Please pull the latest x86-urgent-for-linus git tree from:
> 
>git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
> x86-urgent-for-linus

Now merged, thanks.

greg k-h

Re: [GIT PULL] sound fixes for 4.19-rc7

2018-10-05 Thread Greg Kroah-Hartman

On Fri, Oct 05, 2018 at 11:51:17AM +0200, Takashi Iwai wrote:
> Greg,
> 
> please pull sound fixes for v4.19-rc7 from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git 
> tags/sound-4.19-rc7

Now merged, thanks.

greg k-h

Re: [GIT PULL] locking fixes

2018-10-05 Thread Greg Kroah-Hartman

On Fri, Oct 05, 2018 at 11:36:47AM +0200, Ingo Molnar wrote:
> Greg,
> 
> Please pull the latest locking-urgent-for-linus git tree from:
> 
>git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
> locking-urgent-for-linus

Now merged, thanks.

greg k-h

[PATCH] staging/rtlwifi: Fixing formatting warnings from checkpatch.pl.

2018-10-05 Thread Scott Tracy

Fixing formatting warnings in rtlwifi found by checkpatch.pl
Changes include breaking up functions calls into multi line calls.
No functional/logical changes. 

Signed-off-by: Scott Tracy 
---
 drivers/staging/rtlwifi/core.c  | 5 +++--
 drivers/staging/rtlwifi/efuse.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/rtlwifi/core.c b/drivers/staging/rtlwifi/core.c
index ca37f7511c4d..a36cb44a5388 100644
--- a/drivers/staging/rtlwifi/core.c
+++ b/drivers/staging/rtlwifi/core.c
@@ -1109,7 +1109,7 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw 
*hw,
if (rtlpriv->dm.supp_phymode_switch) {
if (sta->ht_cap.ht_supported)
rtl_send_smps_action(hw, sta,
-
IEEE80211_SMPS_STATIC);
+   IEEE80211_SMPS_STATIC);
}
 
if (rtlhal->current_bandtype == BAND_ON_5G) {
@@ -1882,7 +1882,8 @@ bool rtl_hal_pwrseqcmdparsing(struct rtl_priv *rtlpriv, 
u8 cut_version,
return true;
default:
WARN_ONCE(true,
- "rtlwifi: %s(): Unknown CMD!!\n", 
__func__);
+"rtlwifi: %s(): Unknown CMD!!\n",
+ __func__);
break;
}
}
diff --git a/drivers/staging/rtlwifi/efuse.c b/drivers/staging/rtlwifi/efuse.c
index 1dc71455f270..5b8afdb3e0fe 100644
--- a/drivers/staging/rtlwifi/efuse.c
+++ b/drivers/staging/rtlwifi/efuse.c
@@ -245,7 +245,8 @@ void read_efuse(struct ieee80211_hw *hw, u16 _offset, u16 
_size_byte, u8 *pbuf)
if (!efuse_word)
goto out;
for (i = 0; i < EFUSE_MAX_WORD_UNIT; i++) {
-   efuse_word[i] = kcalloc(efuse_max_section, sizeof(u16), 
GFP_ATOMIC);
+   efuse_word[i] = kcalloc(efuse_max_section,
+   sizeof(u16), GFP_ATOMIC);
if (!efuse_word[i])
goto done;
}
@@ -375,7 +376,7 @@ bool efuse_shadow_update_chk(struct ieee80211_hw *hw)
for (i = 0; i < 8; i = i + 2) {
if ((rtlefuse->efuse_map[EFUSE_INIT_MAP][base + i] !=
 rtlefuse->efuse_map[EFUSE_MODIFY_MAP][base + i]) ||
-   (rtlefuse->efuse_map[EFUSE_INIT_MAP][base + i + 1] 
!=
+  (rtlefuse->efuse_map[EFUSE_INIT_MAP][base + i + 1] !=
 rtlefuse->efuse_map[EFUSE_MODIFY_MAP][base + i +
   1])) {
words_need++;
-- 
2.17.1

Re: [PATCH RFC] mm: Add an fs-write seal to memfd

2018-10-05 Thread Joel Fernandes

On Fri, Oct 5, 2018 at 3:28 PM, Greg KH  wrote:
> On Fri, Oct 05, 2018 at 02:10:58PM -0700, Joel Fernandes wrote:
>> On Fri, Oct 05, 2018 at 12:53:39PM -0700, Andrew Morton wrote:
>> > On Fri,  5 Oct 2018 12:27:27 -0700 "Joel Fernandes (Google)" 
>> >  wrote:
>> >
>> > > To support the usecase, this patch adds a new F_SEAL_FS_WRITE seal which
>> > > prevents any future mmap and write syscalls from succeeding while
>> > > keeping the existing mmap active. The following program shows the seal
>> > > working in action:
>> >
>> > Please be prepared to create a manpage patch for this one.
>>
>> Sure, I will do that. thanks,
>
> And a test case to the in-kernel memfd tests would be appreciated.


Sure, I will do add to those self-tests.

thanks,

 - Joel

Re: [PATCH] staging/rtlwifi: Fixing formatting warnings.

2018-10-05 Thread Greg Kroah-Hartman

On Fri, Oct 05, 2018 at 02:58:15PM -0600, Scott Tracy wrote:
> Signed-off-by: Scott Tracy 
> ---
>  drivers/staging/rtlwifi/core.c  | 5 +++--
>  drivers/staging/rtlwifi/efuse.c | 5 +++--
>  2 files changed, 6 insertions(+), 4 deletions(-)

Hi,

This is the friendly patch-bot of Greg Kroah-Hartman.  You have sent him
a patch that has triggered this response.  He used to manually respond
to these common problems, but in order to save his sanity (he kept
writing the same thing over and over, yet to different people), I was
created.  Hopefully you will not take offence and will fix the problem
in your patch and resubmit it so that it can be accepted into the Linux
kernel tree.

You are receiving this message because of the following common error(s)
as indicated below:

- Your patch did many different things all at once, making it difficult
  to review.  All Linux kernel patches need to only do one thing at a
  time.  If you need to do multiple things (such as clean up all coding
  style issues in a file/driver), do it in a sequence of patches, each
  one doing only one thing.  This will make it easier to review the
  patches to ensure that they are correct, and to help alleviate any
  merge issues that larger patches can cause.

- You did not specify a description of why the patch is needed, or
  possibly, any description at all, in the email body.  Please read the
  section entitled "The canonical patch format" in the kernel file,
  Documentation/SubmittingPatches for what is needed in order to
  properly describe the change.

- You did not write a descriptive Subject: for the patch, allowing Greg,
  and everyone else, to know what this patch is all about.  Please read
  the section entitled "The canonical patch format" in the kernel file,
  Documentation/SubmittingPatches for what a proper Subject: line should
  look like.

If you wish to discuss this problem further, or you have questions about
how to resolve this issue, please feel free to respond to this email and
Greg will reply once he has dug out from the pending patches received
from other developers.

thanks,

greg k-h's patch email bot

Re: [PATCH RFC] mm: Add an fs-write seal to memfd

2018-10-05 Thread Greg KH

On Fri, Oct 05, 2018 at 02:10:58PM -0700, Joel Fernandes wrote:
> On Fri, Oct 05, 2018 at 12:53:39PM -0700, Andrew Morton wrote:
> > On Fri,  5 Oct 2018 12:27:27 -0700 "Joel Fernandes (Google)" 
> >  wrote:
> > 
> > > To support the usecase, this patch adds a new F_SEAL_FS_WRITE seal which
> > > prevents any future mmap and write syscalls from succeeding while
> > > keeping the existing mmap active. The following program shows the seal
> > > working in action:
> > 
> > Please be prepared to create a manpage patch for this one.
> 
> Sure, I will do that. thanks,

And a test case to the in-kernel memfd tests would be appreciated.

thanks,

greg k-h

Re: [PATCH] KVM: X86: Add missing KVM_AMD dependency

2018-10-05 Thread Paolo Bonzini

On 06/10/2018 00:03, Guenter Roeck wrote:
>> This should be handled by
>>
>> config KVM_AMD_SEV
>> def_bool y
>> bool "AMD Secure Encrypted Virtualization (SEV) support"
>> depends on KVM_AMD && X86_64
>> depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
>> ---help---
>> Provides support for launching Encrypted VMs on AMD processors.
>>
> Unfortunately it doesn't. It disables KVM_AMD_SEV, but that doesn't prevent
> the calls.

Yes, exactly - that's why I mentioned the sev_guest patch that should
cull all the SEV code from a !KVM_AMD_SEV build.

>> Maybe this works as well?  I haven't tested it yet:
>>
> I am sure there are many possible solutions. I would personally prefer one
> that enforces KVM_AMD=m with CRYPTO_DEV_CCP_DD=m, but that is just me.

Well, KVM_AMD=y is a relatively unusual choice to begin with.  The
question is whether then you want to disable this choice completely when
CRYPTO_DEV_CCP_DD=m, or just disable SEV.

My patch is a good idea anyway, if I may say so :), because it culls a
lot of code from a !KVM_AMD_SEV build.  But if it is not enough, we
certainly have to do something else about the failure you're reporting.

Paolo

Re: [RFC] x86/cpu_entry_area: move part of it back to fixmap

2018-10-05 Thread Nadav Amit

at 3:10 PM, Andy Lutomirski  wrote:

> On Fri, Oct 5, 2018 at 3:08 PM Nadav Amit  wrote:
>> at 10:02 AM, Andy Lutomirski  wrote:
>> 
>>> On Thu, Oct 4, 2018 at 9:31 AM Nadav Amit  wrote:
 at 7:11 AM, Andy Lutomirski  wrote:
 
> On Oct 3, 2018, at 9:59 PM, Nadav Amit  wrote:
> 
>> This RFC proposes to return part of the entry-area back to the fixmap to
>> improve system-call performance. Currently, since the entry-area is
>> mapped far (more than 2GB) away from the kernel text, an indirect branch
>> is needed to jump from the trampoline into the kernel. Due to Spectre
>> v2, vulnerable CPUs need to use a retpoline, which introduces an
>> overhead of >20 cycles.
> 
> That retpoline is gone in -tip. Can you see how your code stacks up 
> against -tip?  If it’s enough of a win to justify the added complexity, 
> we can try it.
> 
> You can see some pros and cons in the changelog:
> 
> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Ftip%2Fbf904d2762ee6fc1e4acfcb0772bbfb4a27ad8a6&data=02%7C01%7Cnamit%40vmware.com%7C481a83f5323242399efd08d62b0f69ba%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C1%7C0%7C636743742543114742&sdata=uI5X3PITzEVeXHyafSGNV6oVNklpHbmhhRbtyoIurkk%3D&reserved=0
 
 Err.. That’s what I get for not following lkml. Very nice discussion.
 Based on it, I may be able to do an additional micro-optimizations or
 two. Let me give it a try.
>>> 
>>> I think you should at least try to benchmark your code against mine,
>>> since you more or less implemented the alternative I suggested. :)
>> 
>> That’s what I meant. So I made a couple of tweaksin my implementation to
>> make as performant as possible. Eventually, there is a 2ns benefit for the
>> trampoline over the unified entry-path on average on my Haswell VM (254ns vs
>> 256ns), yet there is some variance (1.2 & 1.5ns stdev correspondingly).
>> 
>> I don’t know whether such a difference should make one option to be preferred
>> over the other. I think it boils down to whether:
>> 
>> 1. KASLR is needed.
> 
> Why?  KASLR is basically worthless on any existing CPU against
> attackers who can run local code.
> 
>> 2. Can you specialize the code-paths of trampoline/non-trampoline to gain
>> better performance. For example, by removing the ALTERNATIVE from
>> SWITCH_TO_KERNEL_CR3 and not reload CR3 on the non-trampoline path, you can
>> avoid an unconditional jmp on machines which are not vulnerable to Meltdown.
>> 
>> So I can guess what you’d prefer. Let’s see if I’m right.
> 
> 2 ns isn't bad, at least on a non-PTI system.  Which, I suppose, means
> that you should benchmark on AMD :)
> 
> If the code is reasonably clean, I could get on board.

Fair enough. I’ll clean it and resend.

Thanks,
Nadav

Re: [RFC] x86/cpu_entry_area: move part of it back to fixmap

2018-10-05 Thread Andy Lutomirski

On Fri, Oct 5, 2018 at 3:08 PM Nadav Amit  wrote:
>
> at 10:02 AM, Andy Lutomirski  wrote:
>
> > On Thu, Oct 4, 2018 at 9:31 AM Nadav Amit  wrote:
> >> at 7:11 AM, Andy Lutomirski  wrote:
> >>
> >>> On Oct 3, 2018, at 9:59 PM, Nadav Amit  wrote:
> >>>
>  This RFC proposes to return part of the entry-area back to the fixmap to
>  improve system-call performance. Currently, since the entry-area is
>  mapped far (more than 2GB) away from the kernel text, an indirect branch
>  is needed to jump from the trampoline into the kernel. Due to Spectre
>  v2, vulnerable CPUs need to use a retpoline, which introduces an
>  overhead of >20 cycles.
> >>>
> >>> That retpoline is gone in -tip. Can you see how your code stacks up 
> >>> against -tip?  If it’s enough of a win to justify the added complexity, 
> >>> we can try it.
> >>>
> >>> You can see some pros and cons in the changelog:
> >>>
> >>> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Ftip%2Fbf904d2762ee6fc1e4acfcb0772bbfb4a27ad8a6&data=02%7C01%7Cnamit%40vmware.com%7C9996b2dd6f1745dce10b08d62a1b3f3e%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C1%7C0%7C636742693864878787&sdata=NW0R%2Fv5OahZlTbbNgnFk20sF4Wt1W0MDjtv9g1k%2BWdg%3D&reserved=0
> >>
> >> Err.. That’s what I get for not following lkml. Very nice discussion.
> >> Based on it, I may be able to do an additional micro-optimizations or
> >> two. Let me give it a try.
> >
> > I think you should at least try to benchmark your code against mine,
> > since you more or less implemented the alternative I suggested. :)
>
> That’s what I meant. So I made a couple of tweaksin my implementation to
> make as performant as possible. Eventually, there is a 2ns benefit for the
> trampoline over the unified entry-path on average on my Haswell VM (254ns vs
> 256ns), yet there is some variance (1.2 & 1.5ns stdev correspondingly).
>
> I don’t know whether such a difference should make one option to be preferred
> over the other. I think it boils down to whether:
>
> 1. KASLR is needed.

Why?  KASLR is basically worthless on any existing CPU against
attackers who can run local code.

>
> 2. Can you specialize the code-paths of trampoline/non-trampoline to gain
> better performance. For example, by removing the ALTERNATIVE from
> SWITCH_TO_KERNEL_CR3 and not reload CR3 on the non-trampoline path, you can
> avoid an unconditional jmp on machines which are not vulnerable to Meltdown.
>
> So I can guess what you’d prefer. Let’s see if I’m right.
>

2 ns isn't bad, at least on a non-PTI system.  Which, I suppose, means
that you should benchmark on AMD :)

If the code is reasonably clean, I could get on board.

Re: [PATCH] KVM: X86: Add missing KVM_AMD dependency

2018-10-05 Thread Guenter Roeck

On Fri, Oct 05, 2018 at 10:41:55PM +0200, Paolo Bonzini wrote:
> On 05/10/2018 20:46, Guenter Roeck wrote:
> > Analysis shows that commit 59414c9892208 ("KVM: SVM: Add support for
> > KVM_SEV_LAUNCH_START command") added a dependency of KVM_AMD on
> > CRYPTO_DEV_CCP_DD if CRYPTO_DEV_SP_PSP is enabled: If CRYPTO_DEV_CCP_DD
> > is built as module, KVM_AMD must be built as module as well.
> > 
> > Fixes: 59414c9892208 ("KVM: SVM: Add support for KVM_SEV_LAUNCH_START 
> > command")
> > Cc: Brijesh Singh 
> > Cc: Borislav Petkov 
> > Signed-off-by: Guenter Roeck 
> 
> This should be handled by
> 
> config KVM_AMD_SEV
> def_bool y
> bool "AMD Secure Encrypted Virtualization (SEV) support"
> depends on KVM_AMD && X86_64
> depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
> ---help---
> Provides support for launching Encrypted VMs on AMD processors.
> 

Unfortunately it doesn't. It disables KVM_AMD_SEV, but that doesn't prevent
the calls.

> Maybe this works as well?  I haven't tested it yet:
> 

I am sure there are many possible solutions. I would personally prefer one
that enforces KVM_AMD=m with CRYPTO_DEV_CCP_DD=m, but that is just me.

Thanks,
Guenter

> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 89c4c5aa15f1..55f10b17d044 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -441,9 +441,13 @@ static inline bool svm_sev_enabled(void)
> 
>  static inline bool sev_guest(struct kvm *kvm)
>  {
> +#ifdef CONFIG_KVM_AMD_SEV
>   struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> 
>   return sev->active;
> +#else
> + return false;
> +#endif
>  }
> 
>  static inline int sev_get_asid(struct kvm *kvm)
> 
> Thanks,
> 
> Paolo

Re: hung task in 4.14 (syzbot bug from 2018 April 17)

2018-10-05 Thread Sebastian Kuzminsky

Another instance of the same problem (also on 4.14.67), this time with
the hung task timeout restored to its 120 second default:

[  980.954129] INFO: task systemd:1 blocked for more than 120 seconds.
[  980.954132]   Tainted: G   OE   4.14.67-solidfire1 #1
[  980.954133] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  980.954135] systemd D0 1  0 0x
[  980.954137] Call Trace:
[  980.954150]  ? __schedule+0x27f/0x870
[  980.954154]  ? check_preempt_wakeup+0x102/0x230
[  980.954156]  schedule+0x28/0x80
[  980.954159]  schedule_timeout+0x1e7/0x340
[  980.954163]  ? radix_tree_node_ctor+0x20/0x20
[  980.954165]  ? delete_node+0x1b8/0x1f0
[  980.954167]  ? wait_for_completion+0xb0/0x120
[  980.954169]  wait_for_completion+0xb0/0x120
[  980.954172]  ? wake_up_q+0x70/0x70
[  980.954176]  flush_work+0x10d/0x1c0
[  980.954179]  ? worker_detach_from_pool+0xa0/0xa0
[  980.954183]  fsnotify_destroy_group+0x34/0xa0
[  980.954185]  inotify_release+0x1a/0x50
[  980.954189]  __fput+0xd8/0x220
[  980.954194]  task_work_run+0x8a/0xb0
[  980.954198]  exit_to_usermode_loop+0xb9/0xc0
[  980.954200]  do_syscall_64+0x10b/0x120
[  980.954203]  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
[  980.954206] RIP: 0033:0x7f031a552900
[  980.954208] RSP: 002b:7fff93fd18b0 EFLAGS: 0293 ORIG_RAX:
0003
[  980.954210] RAX:  RBX: 0010 RCX: 7f031a552900
[  980.954211] RDX:  RSI:  RDI: 0010
[  980.954212] RBP: 7f031beba088 R08: 0020 R09: 55ac5a21e2ad
[  980.954213] R10:  R11: 0293 R12: 
[  980.954214] R13: 55ac5a488b20 R14: 55ac5c0907f0 R15: 
[  980.954724] INFO: task kworker/u114:4:41909 blocked for more than
120 seconds.
[  980.954725]   Tainted: G   OE   4.14.67-solidfire1 #1
[  980.954726] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  980.954727] kworker/u114:4  D0 41909  2 0x8000
[  980.954733] Workqueue: events_unbound fsnotify_mark_destroy_workfn
[  980.954734] Call Trace:
[  980.954739]  ? __schedule+0x27f/0x870
[  980.954741]  schedule+0x28/0x80
[  980.954743]  schedule_timeout+0x1e7/0x340
[  980.954746]  ? __switch_to_asm+0x40/0x70
[  980.954747]  ? update_curr+0xe1/0x1a0
[  980.954750]  ? wait_for_completion+0xb0/0x120
[  980.954751]  wait_for_completion+0xb0/0x120
[  980.954753]  ? wake_up_q+0x70/0x70
[  980.954758]  __synchronize_srcu.part.13+0x76/0x90
[  980.954761]  ? trace_raw_output_rcu_utilization+0x40/0x40
[  980.954762]  ? try_to_wake_up+0x44/0x460
[  980.954765]  ? fsnotify_mark_destroy_workfn+0x67/0xb0
[  980.954767]  fsnotify_mark_destroy_workfn+0x67/0xb0
[  980.954769]  process_one_work+0x1da/0x3d0
[  980.954772]  worker_thread+0x21f/0x3f0
[  980.954775]  ? process_one_work+0x3d0/0x3d0
[  980.954777]  kthread+0x119/0x130
[  980.954779]  ? kthread_create_on_node+0x40/0x40
[  980.954781]  ret_from_fork+0x35/0x40

crash> print reaper_work
$3 = {
  work = {
data = {
  counter = -117148584927483
},
entry = {
  next = 0x95743772b770,
  prev = 0xa2aa800a3df8
},
func = 0xa8249df0
  },
  timer = {
entry = {
  next = 0xdead0200,
  pprev = 0x0
},
expires = 4295020559,
function = 0xa80778c0,
data = 18446744072250514272,
flags = 69206020
  },
  wq = 0x95443f011400,
  cpu = 128
}

Any input is welcome.

-- 
Sebastian Kuzminsky

[PATCH][next] ath10k: fix out of bound read on array ath10k_rates

2018-10-05 Thread Colin King

From: Colin Ian King 

An out-of-bounds read on array ath10k_rates is occurring because
the maximum number of elements is currently based on the size of
the array and not the number of elements in the array. Fix this
by using ARRAY_SIZE instead of sizeof.

Detected by CoverityScan, CID#1473918 ("Out-of-bounds read")

Fixes: f279294e9ee2 ("ath10k: add support for configuring management packet 
rate")
Signed-off-by: Colin Ian King 
---
 drivers/net/wireless/ath/ath10k/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c 
b/drivers/net/wireless/ath/ath10k/mac.c
index 3933dd96da55..3564676e74e3 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -164,7 +164,7 @@ static int ath10k_mac_get_rate_hw_value(int bitrate)
if (ath10k_mac_bitrate_is_cck(bitrate))
hw_value_prefix = WMI_RATE_PREAMBLE_CCK << 6;
 
-   for (i = 0; i < sizeof(ath10k_rates); i++) {
+   for (i = 0; i < ARRAY_SIZE(ath10k_rates); i++) {
if (ath10k_rates[i].bitrate == bitrate)
return hw_value_prefix | ath10k_rates[i].hw_value;
}
-- 
2.17.1

Re: [PATCH] apparmor: add #ifdef checks for secmark filtering

2018-10-05 Thread John Johansen

On 10/05/2018 09:11 AM, Arnd Bergmann wrote:
> The newly added code fails to build when either SECMARK or
> NETFILTER are disabled:
> 
> security/apparmor/lsm.c: In function 'apparmor_socket_sock_rcv_skb':
> security/apparmor/lsm.c:1138:12: error: 'struct sk_buff' has no member named 
> 'secmark'; did you mean 'mark'?
> 
> security/apparmor/lsm.c:1671:21: error: 'struct nf_hook_state' declared 
> inside parameter list will not be visible outside of this definition or 
> declaration [-Werror]
> 
> Add a set of #ifdef checks around it to only enable the code that
> we can compile and that makes sense in that configuration.
> 
> Fixes: ab9f2115081a ("apparmor: Allow filtering based on secmark policy")
> Signed-off-by: Arnd Bergmann 

Thanks Arnd, I have pulled this into apparmor-next


> ---
>  security/apparmor/lsm.c | 10 ++
>  security/apparmor/net.c |  2 ++
>  2 files changed, 12 insertions(+)
> 
> diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
> index 53201013c40e..b74b724d3e84 100644
> --- a/security/apparmor/lsm.c
> +++ b/security/apparmor/lsm.c
> @@ -1123,6 +1123,7 @@ static int apparmor_socket_shutdown(struct socket 
> *sock, int how)
>   return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock);
>  }
>  
> +#ifdef CONFIG_NETWORK_SECMARK
>  /**
>   * apparmor_socket_sock_recv_skb - check perms before associating skb to sk
>   *
> @@ -1141,6 +1142,7 @@ static int apparmor_socket_sock_rcv_skb(struct sock 
> *sk, struct sk_buff *skb)
>   return apparmor_secmark_check(ctx->label, OP_RECVMSG, AA_MAY_RECEIVE,
> skb->secmark, sk);
>  }
> +#endif
>  
>  
>  static struct aa_label *sk_peer_label(struct sock *sk)
> @@ -1235,6 +1237,7 @@ static void apparmor_sock_graft(struct sock *sk, struct 
> socket *parent)
>   ctx->label = aa_get_current_label();
>  }
>  
> +#ifdef CONFIG_NETWORK_SECMARK
>  static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb,
> struct request_sock *req)
>  {
> @@ -1246,6 +1249,7 @@ static int apparmor_inet_conn_request(struct sock *sk, 
> struct sk_buff *skb,
>   return apparmor_secmark_check(ctx->label, OP_CONNECT, AA_MAY_CONNECT,
> skb->secmark, sk);
>  }
> +#endif
>  
>  static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
>   LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
> @@ -1304,13 +1308,17 @@ static struct security_hook_list apparmor_hooks[] 
> __lsm_ro_after_init = {
>   LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt),
>   LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt),
>   LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown),
> +#ifdef CONFIG_NETWORK_SECMARK
>   LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb),
> +#endif
>   LSM_HOOK_INIT(socket_getpeersec_stream,
> apparmor_socket_getpeersec_stream),
>   LSM_HOOK_INIT(socket_getpeersec_dgram,
> apparmor_socket_getpeersec_dgram),
>   LSM_HOOK_INIT(sock_graft, apparmor_sock_graft),
> +#ifdef CONFIG_NETWORK_SECMARK
>   LSM_HOOK_INIT(inet_conn_request, apparmor_inet_conn_request),
> +#endif
>  
>   LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank),
>   LSM_HOOK_INIT(cred_free, apparmor_cred_free),
> @@ -1666,6 +1674,7 @@ static inline int apparmor_init_sysctl(void)
>  }
>  #endif /* CONFIG_SYSCTL */
>  
> +#if defined(CONFIG_NETFILTER) && defined(CONFIG_NETWORK_SECMARK)
>  static unsigned int apparmor_ip_postroute(void *priv,
> struct sk_buff *skb,
> const struct nf_hook_state *state)
> @@ -1754,6 +1763,7 @@ static int __init apparmor_nf_ip_init(void)
>   return 0;
>  }
>  __initcall(apparmor_nf_ip_init);
> +#endif
>  
>  static int __init apparmor_init(void)
>  {
> diff --git a/security/apparmor/net.c b/security/apparmor/net.c
> index f9a678ce994f..c07fde444792 100644
> --- a/security/apparmor/net.c
> +++ b/security/apparmor/net.c
> @@ -190,6 +190,7 @@ int aa_sock_file_perm(struct aa_label *label, const char 
> *op, u32 request,
>   return aa_label_sk_perm(label, op, request, sock->sk);
>  }
>  
> +#ifdef CONFIG_NETWORK_SECMARK
>  static int apparmor_secmark_init(struct aa_secmark *secmark)
>  {
>   struct aa_label *label;
> @@ -254,3 +255,4 @@ int apparmor_secmark_check(struct aa_label *label, char 
> *op, u32 request,
>   aa_secmark_perm(profile, request, secid,
>   &sa, sk));
>  }
> +#endif
>

Re: [PATCH v2 2/3] mm: introduce put_user_page[s](), placeholder versions

2018-10-05 Thread Jason Gunthorpe

On Fri, Oct 05, 2018 at 12:49:06PM -0700, John Hubbard wrote:
> On 10/5/18 8:17 AM, Jason Gunthorpe wrote:
> > On Thu, Oct 04, 2018 at 09:02:24PM -0700, john.hubb...@gmail.com wrote:
> >> From: John Hubbard 
> >>
> >> Introduces put_user_page(), which simply calls put_page().
> >> This provides a way to update all get_user_pages*() callers,
> >> so that they call put_user_page(), instead of put_page().
> >>
> >> Also introduces put_user_pages(), and a few dirty/locked variations,
> >> as a replacement for release_pages(), for the same reasons.
> >> These may be used for subsequent performance improvements,
> >> via batching of pages to be released.
> >>
> >> This prepares for eventually fixing the problem described
> >> in [1], and is following a plan listed in [2], [3], [4].
> >>
> >> [1] https://lwn.net/Articles/753027/ : "The Trouble with get_user_pages()"
> >>
> >> [2] https://lkml.kernel.org/r/20180709080554.21931-1-jhubb...@nvidia.com
> >> Proposed steps for fixing get_user_pages() + DMA problems.
> >>
> >> [3]https://lkml.kernel.org/r/20180710082100.mkdwngdv5kkrc...@quack2.suse.cz
> >> Bounce buffers (otherwise [2] is not really viable).
> >>
> >> [4] https://lkml.kernel.org/r/20181003162115.gg24...@quack2.suse.cz
> >> Follow-up discussions.
> >>
> [...]
> >>  
> >> +/* Placeholder version, until all get_user_pages*() callers are updated. 
> >> */
> >> +static inline void put_user_page(struct page *page)
> >> +{
> >> +  put_page(page);
> >> +}
> >> +
> >> +/* For get_user_pages*()-pinned pages, use these variants instead of
> >> + * release_pages():
> >> + */
> >> +static inline void put_user_pages_dirty(struct page **pages,
> >> +  unsigned long npages)
> >> +{
> >> +  while (npages) {
> >> +  set_page_dirty(pages[npages]);
> >> +  put_user_page(pages[npages]);
> >> +  --npages;
> >> +  }
> >> +}
> > 
> > Shouldn't these do the !PageDirty(page) thing?
> > 
> 
> Well, not yet. This is the "placeholder" patch, in which I planned to keep
> the behavior the same, while I go to all the get_user_pages call sites and 
> change 
> put_page() and release_pages() over to use these new routines.

Hmm.. Well, if it is the right thing to do here, why not include it and
take it out of callers when doing the conversion?

If it is the wrong thing, then let us still take it out of callers
when doing the conversion :)

Just seems like things will be in a better place to make future
changes if all the call sights are de-duplicated and correct.

Jason

Re: [RFC/PATCH 2/5] device property: introduce notion of subnodes for legacy boards

2018-10-05 Thread Dmitry Torokhov

Hi Heikki,

On Tue, Sep 25, 2018 at 03:19:27PM +0300, Heikki Krogerus wrote:
> On Mon, Sep 24, 2018 at 11:45:43AM -0700, Dmitry Torokhov wrote:
> > I think we are talking about totally different use cases and that is why
> > we are having hard time coming to a mutually agreeable solution. Could
> > you please describe in more detail what you would like to achieve,
> > and preferably show how it is described now with DT and/or ACPI, so that
> > I have a better frame of reference.
> 
> Yes, of course. Sorry.
> 
> USB ports are devices that usually the USB controller drivers register
> (or actually the USB core code). They are represented in both ACPI and
> DT as child nodes of the controller device node. The USB connector OF
> node is defined in file
> Documentation/devicetree/bindings/connector/usb-connector.txt
> 
> In short, the controller drivers will request handle to a child node
> that represents a port, and only after that register the actual port
> device.
> 
> The drivers I'm looking at currently are the USB Type-C port
> controller drivers and the port manager (in Greg's usb-next or
> linux-next):
> 
> drivers/usb/typec/tcpm/tcpci.c
> drivers/usb/typec/tcpm/fusb302.c
> drivers/usb/typec/tcpm/tcpm.c
> 
> The goal is simply to get rid of the platform data as usual, and
> ideally so that we don't need any extra code in order to support the
> "legacy" platforms.

Are these actually used on any of the "legacy" platforms? I fetched
linux-next today, but I do not actually see anything in
drivers/usb/typec touching platform data...

In the context of the connector, even before we descend to child nodes
details, how do you propose implementing references between fwnodes?
Especially since the other node (in case you complementing existing
topology) may be ACPI or DT instance?

I want to say that "You aren't gonna need it" for what you are asking
here and we can actually split it apart if and when we actually want to
separate creation of pset-backed device nodes and instantiating
corresponding device structures.

Thanks.

-- 
Dmitry

[PATCH i2c-next v7 0/5] i2c: aspeed: Add bus idle waiting logic for multi-master use cases

2018-10-05 Thread Jae Hyun Yoo

In multi-master environment, this driver's master cannot know
exactly when peer master sends data to this driver's slave so a
case can be happened that this master tries to send data through
the master_xfer function but slave data from peer master is still
being processed by this driver. To prevent state corruption in the
case, this patch adds checking code if any slave operation is
ongoing and it waits up to the bus timeout duration before starting
a master_xfer operation.

To support this change, it introduces changes on i2c-core-base to
make that able to read the bus timeout and master transfer retries
count values from device tree properties.

Please review this patch set.

Thanks,

-Jae

Changes since v6:
- Changed the 'timeout-ms' property name to 'bus-timeout-ms'.

Changes since v5:
- Changed using of property reading API to device_property_read_u32.

Changes since v4:
- Moved the property reading code into i2c-base-core and changed the
  property name to 'timeout-ms'. Also, added '#retries' property reading
  code.
- Changed bus busy checking logic to make that check slave_state instead
  of 'Transfer Mode State Machine' reg value.

Changes since v3:
- Changed the property name to 'timeout' and made it use the
  default setting in i2c-core when not specified.

Changes since v2:
- Changed the property name to 'aspeed,timeout' and made it to
  update the adapter's timeout configuration.

Changes since v1:
- Changed define names of timeout related.

Jae Hyun Yoo (5):
  dt-bindings: i2c: Add 'bus-timeout-ms' and '#retries' properties as
common optional
  i2c: core: Add support reading of 'bus-timeout-ms' and '#retries'
properties
  dt-bindings: i2c: aspeed: Add 'bus-timeout-ms' property as an optional
property
  i2c: aspeed: Remove hard-coded bus timeout value setting
  i2c: aspeed: Add bus idle waiting logic for multi-master use cases

 .../devicetree/bindings/i2c/i2c-aspeed.txt|  3 ++
 Documentation/devicetree/bindings/i2c/i2c.txt |  6 +++
 drivers/i2c/busses/i2c-aspeed.c   | 54 +--
 drivers/i2c/i2c-core-base.c   | 12 -
 4 files changed, 57 insertions(+), 18 deletions(-)

-- 
2.19.0

[PATCH i2c-next v7 2/5] i2c: core: Add support reading of 'bus-timeout-ms' and '#retries' properties

2018-10-05 Thread Jae Hyun Yoo

This commit adds support for 'bus-timeout-ms' and '#retries'
properties to set 'timeout' and 'retries' values in
'struct i2c_adapter' in case an adapter node has the properties.
Still the values can be set by I2C_TIMEOUT and I2C_RETRIES ioctls
on cdev at runtime too.

These properties may not be supported by all drivers. However, if
a driver wants to support one of them, it should adapt the
bindings in the dt-bindings document.

Signed-off-by: Jae Hyun Yoo 
---
 drivers/i2c/i2c-core-base.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 799776c6d421..aa2a365d374a 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1214,6 +1214,7 @@ EXPORT_SYMBOL_GPL(i2c_handle_smbus_host_notify);
 static int i2c_register_adapter(struct i2c_adapter *adap)
 {
int res = -EINVAL;
+   u32 bus_timeout_ms = 0;
 
/* Can't register until after driver model init */
if (WARN_ON(!is_registered)) {
@@ -1239,8 +1240,15 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
INIT_LIST_HEAD(&adap->userspace_clients);
 
/* Set default timeout to 1 second if not already set */
-   if (adap->timeout == 0)
-   adap->timeout = HZ;
+   if (adap->timeout == 0) {
+   device_property_read_u32(&adap->dev, "bus-timeout-ms",
+&bus_timeout_ms);
+   adap->timeout = bus_timeout_ms ?
+   msecs_to_jiffies(bus_timeout_ms) : HZ;
+   }
+
+   /* Set retries count if it has the property setting */
+   device_property_read_u32(&adap->dev, "#retries", &adap->retries);
 
/* register soft irqs for Host Notify */
res = i2c_setup_host_notify_irq_domain(adap);
-- 
2.19.0

[PATCH i2c-next v7 4/5] i2c: aspeed: Remove hard-coded bus timeout value setting

2018-10-05 Thread Jae Hyun Yoo

This commit removes hard-coded bus timeout value setting so that
it can be set by i2c-core-base.

Signed-off-by: Jae Hyun Yoo 
Reviewed-by: Joel Stanley 
---
 drivers/i2c/busses/i2c-aspeed.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
index 8dc9161ced38..833b6b6a4c7e 100644
--- a/drivers/i2c/busses/i2c-aspeed.c
+++ b/drivers/i2c/busses/i2c-aspeed.c
@@ -930,7 +930,6 @@ static int aspeed_i2c_probe_bus(struct platform_device 
*pdev)
init_completion(&bus->cmd_complete);
bus->adap.owner = THIS_MODULE;
bus->adap.retries = 0;
-   bus->adap.timeout = 5 * HZ;
bus->adap.algo = &aspeed_i2c_algo;
bus->adap.dev.parent = &pdev->dev;
bus->adap.dev.of_node = pdev->dev.of_node;
-- 
2.19.0

[PATCH i2c-next v7 5/5] i2c: aspeed: Add bus idle waiting logic for multi-master use cases

2018-10-05 Thread Jae Hyun Yoo

In multi-master environment, this driver's master cannot know
exactly when peer master sends data to this driver's slave so a
case can be happened that this master tries to send data through
the master_xfer function but slave data from peer master is still
being processed by this driver.

To prevent any state corruption in the case, this patch adds
checking code if any slave operation is ongoing and it waits up to
the bus timeout duration before starting a master_xfer operation.

Signed-off-by: Jae Hyun Yoo 
---
 drivers/i2c/busses/i2c-aspeed.c | 53 +++--
 1 file changed, 38 insertions(+), 15 deletions(-)

diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
index 833b6b6a4c7e..8d60d7e5b323 100644
--- a/drivers/i2c/busses/i2c-aspeed.c
+++ b/drivers/i2c/busses/i2c-aspeed.c
@@ -12,6 +12,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -115,6 +116,9 @@
 /* 0x18 : I2CD Slave Device Address Register   */
 #define ASPEED_I2CD_DEV_ADDR_MASK  GENMASK(6, 0)
 
+/* Busy checking */
+#define ASPEED_I2C_BUS_BUSY_CHECK_INTERVAL_US  (10 * 1000)
+
 enum aspeed_i2c_master_state {
ASPEED_I2C_MASTER_INACTIVE,
ASPEED_I2C_MASTER_START,
@@ -156,6 +160,8 @@ struct aspeed_i2c_bus {
int cmd_err;
/* Protected only by i2c_lock_bus */
int master_xfer_result;
+   /* Multi-master */
+   boolmulti_master;
 #if IS_ENABLED(CONFIG_I2C_SLAVE)
struct i2c_client   *slave;
enum aspeed_i2c_slave_state slave_state;
@@ -596,27 +602,42 @@ static irqreturn_t aspeed_i2c_bus_irq(int irq, void 
*dev_id)
return irq_remaining ? IRQ_NONE : IRQ_HANDLED;
 }
 
+static int aspeed_i2c_check_bus_busy(struct aspeed_i2c_bus *bus)
+{
+   ktime_t timeout;
+
+   if (bus->multi_master) {
+   might_sleep();
+   timeout = ktime_add_ms(ktime_get(),
+  jiffies_to_msecs(bus->adap.timeout));
+   }
+
+   for (;;) {
+   if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
+ ASPEED_I2CD_BUS_BUSY_STS) &&
+   bus->slave_state == ASPEED_I2C_SLAVE_STOP)
+   return 0;
+   if (!bus->multi_master)
+   break;
+   if (ktime_compare(ktime_get(), timeout) > 0)
+   break;
+   usleep_range((ASPEED_I2C_BUS_BUSY_CHECK_INTERVAL_US >> 2) + 1,
+ASPEED_I2C_BUS_BUSY_CHECK_INTERVAL_US);
+   }
+
+   return aspeed_i2c_recover_bus(bus);
+}
+
 static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
  struct i2c_msg *msgs, int num)
 {
struct aspeed_i2c_bus *bus = i2c_get_adapdata(adap);
unsigned long time_left, flags;
-   int ret = 0;
 
-   spin_lock_irqsave(&bus->lock, flags);
-   bus->cmd_err = 0;
-
-   /* If bus is busy, attempt recovery. We assume a single master
-* environment.
-*/
-   if (readl(bus->base + ASPEED_I2C_CMD_REG) & ASPEED_I2CD_BUS_BUSY_STS) {
-   spin_unlock_irqrestore(&bus->lock, flags);
-   ret = aspeed_i2c_recover_bus(bus);
-   if (ret)
-   return ret;
-   spin_lock_irqsave(&bus->lock, flags);
-   }
+   if (aspeed_i2c_check_bus_busy(bus))
+   return -EAGAIN;
 
+   spin_lock_irqsave(&bus->lock, flags);
bus->cmd_err = 0;
bus->msgs = msgs;
bus->msgs_index = 0;
@@ -827,7 +848,9 @@ static int aspeed_i2c_init(struct aspeed_i2c_bus *bus,
if (ret < 0)
return ret;
 
-   if (!of_property_read_bool(pdev->dev.of_node, "multi-master"))
+   if (of_property_read_bool(pdev->dev.of_node, "multi-master"))
+   bus->multi_master = true;
+   else
fun_ctrl_reg |= ASPEED_I2CD_MULTI_MASTER_DIS;
 
/* Enable Master Mode */
-- 
2.19.0

[PATCH i2c-next v7 1/5] dt-bindings: i2c: Add 'bus-timeout-ms' and '#retries' properties as common optional

2018-10-05 Thread Jae Hyun Yoo

This commit adds 'bus-timeout-ms' and '#retries' properties as
common optional properties that can be used for setting 'timeout'
and 'retries' values of 'struct i2c_adapter'. With this patch, the
bus timeout value and the master transfer retries count can be set
through these properties at the registration time of an adapter.
Still the values can be set by I2C_TIMEOUT and I2C_RETRIES ioctls
on cdev at runtime too.

These properties may not be supported by all drivers. However, if
a driver wants to support one of them, it should adapt the
bindings in this document.

Signed-off-by: Jae Hyun Yoo 
---
 Documentation/devicetree/bindings/i2c/i2c.txt | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt 
b/Documentation/devicetree/bindings/i2c/i2c.txt
index 11263982470e..bdead91f82a4 100644
--- a/Documentation/devicetree/bindings/i2c/i2c.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c.txt
@@ -80,6 +80,12 @@ wants to support one of the below features, it should adapt 
the bindings below.
Names of map programmable addresses.
It can contain any map needing another address than default one.
 
+- bus-timeout-ms
+   Bus timeout in milliseconds.
+
+- #retries
+   Number of retries for master transfer.
+
 Binding may contain optional "interrupts" property, describing interrupts
 used by the device. I2C core will assign "irq" interrupt (or the very first
 interrupt if not using interrupt names) as primary interrupt for the slave.
-- 
2.19.0

[PATCH i2c-next v7 3/5] dt-bindings: i2c: aspeed: Add 'bus-timeout-ms' property as an optional property

2018-10-05 Thread Jae Hyun Yoo

This commit adds 'bus-timeout-ms' property as an optional property
which can be used for setting the bus timeout value of an adapter.
With this patch, the bus timeout value can be set through this
property at the probing time of this module. Still the bus timeout
value can be set by an I2C_TIMEOUT ioctl on cdev at runtime too.

Signed-off-by: Jae Hyun Yoo 
---
 Documentation/devicetree/bindings/i2c/i2c-aspeed.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt 
b/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
index 8fbd8633a387..ce1f07620368 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
@@ -17,6 +17,9 @@ Optional Properties:
  specified
 - multi-master : states that there is another master active on this bus.
 
+- bus-timeout-ms: bus timeout in milliseconds defaults to 1 second when not
+ specified.
+
 Example:
 
 i2c {
-- 
2.19.0

Re: [PATCH v3] tools/lib/traceevent: Replace str_error_r() with an open coded implementation

2018-10-05 Thread Colin McCabe

On Fri, Oct 5, 2018, at 12:47, Arnaldo Carvalho de Melo wrote:
> Em Fri, Oct 05, 2018 at 09:27:16AM -0700, Colin McCabe escreveu:
> > Hmm.  Did you consider setting the ifdefs you can set to always get the 
> > POSIX version of strerror_r?
> 
> Yes, didn't work for tools/perf, that uses _GNU_SOURCE, so we would have
> the headers with that and the .c with an explicit undef _GNU_SOURCE,
> that didn't fly, at least in my experiments, and that may be the case
> with just some odd distros, working with others.

Yeah, I believe "#undef _GNU_SOURCE" is needed.  I was including that as part 
of "setting [up] the ifdefs."  Sorry, that was probably confusing.

I've done the "separate .c file with POSIX strerror_r" thing before as well.  
It's probably the sanest way to do things if you want to avoid the deprecation 
warning.

It's kind of sad that we're still having problems with strerror_r in 2018.  
Someone should just use thread-local storage in glibc to fix the original 
strerror to work like everyone expects it to.  I have a vague memory that some 
OS did this (Solaris?)  The people who wouldn't want the extra 100 bytes per 
thread are probably embedded people using a custom libc, or people like the 
Golang authors who try to avoid libc altogether.

I think this also kind of happened with readdir_r.  
https://lwn.net/Articles/696474/
Although in that case, it wasn't even theoretically possible to correctly use 
the _r version, so they had to just fix the regular one.

best,
Colin

> 
> Having a separate .c file that first thing it does is to undef
> _GNU_SOURCE, then include the necessary headers, then use strerror_r was
> what worked accross the 67 environments in my containers:
> 
>1 alpine:3.4: Ok   gcc (Alpine 5.3.0) 5.3.0
>2 alpine:3.5: Ok   gcc (Alpine 6.2.1) 6.2.1 
> 20160822
>3 alpine:3.6: Ok   gcc (Alpine 6.3.0) 6.3.0
>4 alpine:3.7: Ok   gcc (Alpine 6.4.0) 6.4.0
>5 alpine:3.8: Ok   gcc (Alpine 6.4.0) 6.4.0
>6 alpine:edge   : Ok   gcc (Alpine 6.4.0) 6.4.0
>7 amazonlinux:1 : Ok   gcc (GCC) 4.8.5 20150623 (Red 
> Hat 4.8.5-28)
>8 amazonlinux:2 : Ok   gcc (GCC) 7.3.1 20180303 (Red 
> Hat 7.3.1-5)
>9 android-ndk:r12b-arm  : Ok   arm-linux-androideabi-gcc 
> (GCC) 4.9.x 20150123 (prerelease)
>   10 android-ndk:r15c-arm  : Ok   arm-linux-androideabi-gcc 
> (GCC) 4.9.x 20150123 (prerelease)
>   11 centos:5  : Ok   gcc (GCC) 4.1.2 20080704 (Red 
> Hat 4.1.2-55)
>   12 centos:6  : Ok   gcc (GCC) 4.4.7 20120313 (Red 
> Hat 4.4.7-23)
>   13 centos:7  : Ok   gcc (GCC) 4.8.5 20150623 (Red 
> Hat 4.8.5-28)
>   14 clearlinux:latest : Ok   gcc (Clear Linux OS for Intel 
> Architecture) 8.2.1 20180502
>   15 debian:7  : Ok   gcc (Debian 4.7.2-5) 4.7.2
>   16 debian:8  : Ok   gcc (Debian 4.9.2-10+deb8u1) 
> 4.9.2
>   17 debian:9  : Ok   gcc (Debian 6.3.0-18+deb9u1) 
> 6.3.0 20170516
>   18 debian:experimental   : Ok   gcc (Debian 8.2.0-7) 8.2.0
>   19 debian:experimental-x-arm64   : Ok   aarch64-linux-gnu-gcc (Debian 
> 8.2.0-4) 8.2.0
>   20 debian:experimental-x-mips: Ok   mips-linux-gnu-gcc (Debian 
> 8.1.0-12) 8.1.0
>   21 debian:experimental-x-mips64  : Ok   mips64-linux-gnuabi64-gcc 
> (Debian 8.1.0-12) 8.1.0
>   22 debian:experimental-x-mipsel  : Ok   mipsel-linux-gnu-gcc (Debian 
> 8.1.0-12) 8.1.0
>   23 fedora:20 : Ok   gcc (GCC) 4.8.3 20140911 (Red 
> Hat 4.8.3-7)
>   24 fedora:21 : Ok   gcc (GCC) 4.9.2 20150212 (Red 
> Hat 4.9.2-6)
>   25 fedora:22 : Ok   gcc (GCC) 5.3.1 20160406 (Red 
> Hat 5.3.1-6)
>   26 fedora:23 : Ok   gcc (GCC) 5.3.1 20160406 (Red 
> Hat 5.3.1-6)
>   27 fedora:24 : Ok   gcc (GCC) 6.3.1 20161221 (Red 
> Hat 6.3.1-1)
>   28 fedora:24-x-ARC-uClibc: Ok   arc-linux-gcc (ARCompact ISA 
> Linux uClibc toolchain 2017.09-rc2) 7.1.1 20170710
>   29 fedora:25 : Ok   gcc (GCC) 6.4.1 20170727 (Red 
> Hat 6.4.1-1)
>   30 fedora:26 : Ok   gcc (GCC) 7.3.1 20180130 (Red 
> Hat 7.3.1-2)
>   31 fedora:27 : Ok   gcc (GCC) 7.3.1 20180712 (Red 
> Hat 7.3.1-6)
>   32 fedora:28 : Ok   gcc (GCC) 8.1.1 20180712 (Red 
> Hat 8.1.1-5)
>   33 fedora:rawhide: Ok   gcc (GCC) 8.2.1 20180905 (Red 
> Hat 8.2.1-3)
>   34 gentoo-stage3-amd64:latest: Ok   gcc (Gentoo 7.3.0-r3 p1.4) 
> 7.3.0
>   35 mageia:5  : Ok   gcc (GCC) 4.9.2
>   36 mageia:6  : Ok   gcc (Mageia 5.5.0-1.mga6) 
> 5.5.0
>   37 opensuse:13.2 : Ok   gcc (SUSE Linux) 4.8.3 
> 20140627 [gcc-4_8-branch revision 212064]
>   38 opensuse:42.1 : Ok   gcc

Re: [PATCH v2 10/11] arch/x86: Add AMD feature bit X86_FEATURE_MBA in cpuid bits array

2018-10-05 Thread Borislav Petkov

On Fri, Oct 05, 2018 at 08:56:09PM +, Moger, Babu wrote:
> From: Sherry Hurwitz 
> 
> The feature bit X86_FEATURE_MBA is detected via CPUID leaf 0x8008
> EBX Bit 06. This bit indicates the support of AMD's MBA feature.
> 
> This feature is supported by both Intel and AMD. But they are detected
> different CPUID leaves.
> 
> Signed-off-by: Babu Moger 
> Signed-off-by: Sherry Hurwitz 

This SOB chain should be the other way around - first Sherry, then you.

> ---
>  arch/x86/kernel/cpu/scattered.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
> index 772c219b6889..bd7853334b27 100644
> --- a/arch/x86/kernel/cpu/scattered.c
> +++ b/arch/x86/kernel/cpu/scattered.c
> @@ -17,7 +17,11 @@ struct cpuid_bit {
>   u32 sub_leaf;
>  };
>  
> -/* Please keep the leaf sorted by cpuid_bit.level for faster search. */
> +/*
> + * Please keep the leaf sorted by cpuid_bit.level for faster search.
> + * X86_FEATURE_MBA supported by both Intel and AMD. But the cpuid
> + * levels are different. Add a separate enty for each.
> + */
>  static const struct cpuid_bit cpuid_bits[] = {
>   { X86_FEATURE_APERFMPERF,   CPUID_ECX,  0, 0x0006, 0 },
>   { X86_FEATURE_EPB,  CPUID_ECX,  3, 0x0006, 0 },
> @@ -29,6 +33,7 @@ static const struct cpuid_bit cpuid_bits[] = {
>   { X86_FEATURE_HW_PSTATE,CPUID_EDX,  7, 0x8007, 0 },
>   { X86_FEATURE_CPB,  CPUID_EDX,  9, 0x8007, 0 },
>   { X86_FEATURE_PROC_FEEDBACK,CPUID_EDX, 11, 0x8007, 0 },
> + { X86_FEATURE_MBA,  CPUID_EBX,  6, 0x8008, 0 },
>   { X86_FEATURE_SME,  CPUID_EAX,  0, 0x801f, 0 },
>   { X86_FEATURE_SEV,  CPUID_EAX,  1, 0x801f, 0 },
>   { 0, 0, 0, 0, 0 }
> -- 

With that fixed:

Reviewed-by: Borislav Petkov 

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

Re: [PATCH 0/2] tools/perf: Python 3 + clang build fixes

2018-10-05 Thread Arnaldo Carvalho de Melo

Em Fri, Oct 05, 2018 at 05:40:56PM -0300, Eduardo Habkost escreveu:
> This series contains a couple fixes to make it possible to build
> perf with Python 3 and clang.
> 
> Eduardo Habkost (2):
>   perf: Make clang_has_option() work on Python 3
>   perf: More portable way to make CFLAGS work with clang

Thanks! That was fast!

Applying and testing,

- Arnaldo

Re: [patch] mm, page_alloc: set num_movable in move_freepages()

2018-10-05 Thread Andrew Morton

On Fri, 5 Oct 2018 13:56:39 -0700 (PDT) David Rientjes  
wrote:

> If move_freepages() returns 0 because zone_spans_pfn(), *num_movable can

 move_free_pages_block()?   !zone_spans_pfn()?

> hold the value from the stack because it does not get initialized in
> move_freepages().
> 
> Move the initialization to move_freepages_block() to guarantee the value
> actually makes sense.
> 
> This currently doesn't affect its only caller where num_movable != NULL,
> so no bug fix, but just more robust.
> 
> ...

Re: [PATCH] mmc: meson-mx-sdio: mark expected switch fall-through

2018-10-05 Thread Gustavo A. R. Silva

Hi Martin,

On 10/5/18 11:10 PM, Martin Blumenstingl wrote:
> On Fri, Oct 5, 2018 at 12:15 PM Gustavo A. R. Silva
>  wrote:
>>
>> In preparation to enabling -Wimplicit-fallthrough, mark switch cases
>> where we are expecting to fall through.
>>
>> Notice that in this particular case, I replaced the "fall-through:"
>> comment with a proper "fall through", which is what GCC is
>> expecting to find.
>>
>> Addresses-Coverity-ID: 1373880 ("Missing break in switch")
>> Signed-off-by: Gustavo A. R. Silva 
> Acked-by: Martin Blumenstingl 
> 
> thank you for taking care of this!
> 

Glad to help. :)

Thanks
--
Gustavo

Re: [PATCH] staging/rtlwifi: Fixing formatting warnings.

2018-10-05 Thread Gustavo A. R. Silva




On 10/5/18 10:58 PM, Scott Tracy wrote:
> Signed-off-by: Scott Tracy 
> ---
>  drivers/staging/rtlwifi/core.c  | 5 +++--
>  drivers/staging/rtlwifi/efuse.c | 5 +++--
>  2 files changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/staging/rtlwifi/core.c b/drivers/staging/rtlwifi/core.c
> index ca37f7511c4d..a36cb44a5388 100644
> --- a/drivers/staging/rtlwifi/core.c
> +++ b/drivers/staging/rtlwifi/core.c
> @@ -1109,7 +1109,7 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw 
> *hw,
>   if (rtlpriv->dm.supp_phymode_switch) {
>   if (sta->ht_cap.ht_supported)
>   rtl_send_smps_action(hw, sta,
> -  
> IEEE80211_SMPS_STATIC);
> + IEEE80211_SMPS_STATIC);
>   }
>  
>   if (rtlhal->current_bandtype == BAND_ON_5G) {
> @@ -1882,7 +1882,8 @@ bool rtl_hal_pwrseqcmdparsing(struct rtl_priv *rtlpriv, 
> u8 cut_version,
>   return true;
>   default:
>   WARN_ONCE(true,
> -   "rtlwifi: %s(): Unknown CMD!!\n", 
> __func__);
> +  "rtlwifi: %s(): Unknown CMD!!\n",
> +   __func__);
>   break;
>   }
>   }
> diff --git a/drivers/staging/rtlwifi/efuse.c b/drivers/staging/rtlwifi/efuse.c
> index 1dc71455f270..5b8afdb3e0fe 100644
> --- a/drivers/staging/rtlwifi/efuse.c
> +++ b/drivers/staging/rtlwifi/efuse.c
> @@ -245,7 +245,8 @@ void read_efuse(struct ieee80211_hw *hw, u16 _offset, u16 
> _size_byte, u8 *pbuf)
>   if (!efuse_word)
>   goto out;
>   for (i = 0; i < EFUSE_MAX_WORD_UNIT; i++) {
> - efuse_word[i] = kcalloc(efuse_max_section, sizeof(u16), 
> GFP_ATOMIC);
> + efuse_word[i] = kcalloc(efuse_max_section,
> + sizeof(u16), GFP_ATOMIC);
>   if (!efuse_word[i])
>   goto done;
>   }
> @@ -375,7 +376,7 @@ bool efuse_shadow_update_chk(struct ieee80211_hw *hw)
>   for (i = 0; i < 8; i = i + 2) {
>   if ((rtlefuse->efuse_map[EFUSE_INIT_MAP][base + i] !=
>rtlefuse->efuse_map[EFUSE_MODIFY_MAP][base + i]) ||
> - (rtlefuse->efuse_map[EFUSE_INIT_MAP][base + i + 1] 
> !=
> +(rtlefuse->efuse_map[EFUSE_INIT_MAP][base + i + 1] !=
>rtlefuse->efuse_map[EFUSE_MODIFY_MAP][base + i +
>  1])) {
>   words_need++;
> 

Hi,

Please, always add a commit log explaining what exactly are you trying to fix.

Thanks
--
Gustavo

Re: [PATCH RFC] mm: Add an fs-write seal to memfd

2018-10-05 Thread Joel Fernandes

On Fri, Oct 05, 2018 at 12:53:39PM -0700, Andrew Morton wrote:
> On Fri,  5 Oct 2018 12:27:27 -0700 "Joel Fernandes (Google)" 
>  wrote:
> 
> > To support the usecase, this patch adds a new F_SEAL_FS_WRITE seal which
> > prevents any future mmap and write syscalls from succeeding while
> > keeping the existing mmap active. The following program shows the seal
> > working in action:
> 
> Please be prepared to create a manpage patch for this one.

Sure, I will do that. thanks,

 - Joel

Re: [PATCH] mmc: meson-mx-sdio: mark expected switch fall-through

2018-10-05 Thread Martin Blumenstingl

On Fri, Oct 5, 2018 at 12:15 PM Gustavo A. R. Silva
 wrote:
>
> In preparation to enabling -Wimplicit-fallthrough, mark switch cases
> where we are expecting to fall through.
>
> Notice that in this particular case, I replaced the "fall-through:"
> comment with a proper "fall through", which is what GCC is
> expecting to find.
>
> Addresses-Coverity-ID: 1373880 ("Missing break in switch")
> Signed-off-by: Gustavo A. R. Silva 
Acked-by: Martin Blumenstingl 

thank you for taking care of this!


Regards
Martin

Re: [PATCH] staging: emxx_udc: Remove unused device_desc declaration

2018-10-05 Thread Nick Desaulniers

On Wed, Oct 3, 2018 at 10:56 PM Nathan Chancellor
 wrote:
>
> Clang warns:
>
> drivers/staging/emxx_udc/emxx_udc.c:1373:37: warning: variable
> 'device_desc' is not needed and will not be emitted
> [-Wunneeded-internal-declaration]
> static struct usb_device_descriptor device_desc = {
> ^
> 1 warning generated.
>
> This definition hasn't been attached to anything since the driver was
> introduced in commit 33aa8d45a4fe ("staging: emxx_udc: Add Emma Mobile
> USB Gadget driver") and neither GCC nor Clang emit any reference to the
> variable in the final assembly. The only reason GCC doesn't warn about
> this variable being unused is the sizeof function.
>
> Reported-by: Nick Desaulniers 
> Signed-off-by: Nathan Chancellor 
> ---
>
> This seems kind of wrong given this is a USB driver but there isn't an
> instance of a platform_driver in the kernel tree having a usb device
> descriptor declaration so I'm unsure of how to handle this warning aside
> from just removing the definition but I'm certainly open to suggestions.

In drivers under drivers/usb/gadget/legacy/{ether|mass_storage|hid}.c,
it seems that addresses of instances of `struct usb_device_descriptor`
are stored in instances of `struct usb_composite_driver eth_driver`
that are passed to module_usb_composite_driver().

drivers/staging/emxx_udc/emxx_udc.c doesn't mention anything about
being a composite driver, and I don't know if there are multiple
devices to warrant a composite driver?  Composite seems to imply "more
than one gadget" while the path to drivers using this interface under
drivers/usb/gadget/legacy/ seem to imply there's a modern (non-legacy)
usb gadget interface that could potentially be used instead.

If this was never intended to be a composite usb driver, or there's
some reason why it doesn't make sense for it to be one, then this code
is likely dead and your fix is correct.  If it's not, maybe folks who
know more about the USB interfaces have another solution to make this
a composite usb driver?

>
>  drivers/staging/emxx_udc/emxx_udc.c | 19 ---
>  1 file changed, 19 deletions(-)
>
> diff --git a/drivers/staging/emxx_udc/emxx_udc.c 
> b/drivers/staging/emxx_udc/emxx_udc.c
> index 0e8d3f232fe9..65cc3d9af972 100644
> --- a/drivers/staging/emxx_udc/emxx_udc.c
> +++ b/drivers/staging/emxx_udc/emxx_udc.c
> @@ -1368,25 +1368,6 @@ static void _nbu2ss_set_endpoint_stall(
> }
>  }
>
> -/*-*/
> -/* Device Descriptor */
> -static struct usb_device_descriptor device_desc = {
> -   .bLength  = sizeof(device_desc),
> -   .bDescriptorType  = USB_DT_DEVICE,
> -   .bcdUSB   = cpu_to_le16(0x0200),
> -   .bDeviceClass = USB_CLASS_VENDOR_SPEC,
> -   .bDeviceSubClass  = 0x00,
> -   .bDeviceProtocol  = 0x00,
> -   .bMaxPacketSize0  = 64,
> -   .idVendor = cpu_to_le16(0x0409),
> -   .idProduct= cpu_to_le16(0xfff0),
> -   .bcdDevice= 0x,
> -   .iManufacturer= 0x00,
> -   .iProduct = 0x00,
> -   .iSerialNumber= 0x00,
> -   .bNumConfigurations   = 0x01,
> -};
> -
>  /*-*/
>  static void _nbu2ss_set_test_mode(struct nbu2ss_udc *udc, u32 mode)
>  {
> --
> 2.19.0
>

-- 
Thanks,
~Nick Desaulniers

[PATCH] staging/rtlwifi: Fixing formatting warnings.

2018-10-05 Thread Scott Tracy

Signed-off-by: Scott Tracy 
---
 drivers/staging/rtlwifi/core.c  | 5 +++--
 drivers/staging/rtlwifi/efuse.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/rtlwifi/core.c b/drivers/staging/rtlwifi/core.c
index ca37f7511c4d..a36cb44a5388 100644
--- a/drivers/staging/rtlwifi/core.c
+++ b/drivers/staging/rtlwifi/core.c
@@ -1109,7 +1109,7 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw 
*hw,
if (rtlpriv->dm.supp_phymode_switch) {
if (sta->ht_cap.ht_supported)
rtl_send_smps_action(hw, sta,
-
IEEE80211_SMPS_STATIC);
+   IEEE80211_SMPS_STATIC);
}
 
if (rtlhal->current_bandtype == BAND_ON_5G) {
@@ -1882,7 +1882,8 @@ bool rtl_hal_pwrseqcmdparsing(struct rtl_priv *rtlpriv, 
u8 cut_version,
return true;
default:
WARN_ONCE(true,
- "rtlwifi: %s(): Unknown CMD!!\n", 
__func__);
+"rtlwifi: %s(): Unknown CMD!!\n",
+ __func__);
break;
}
}
diff --git a/drivers/staging/rtlwifi/efuse.c b/drivers/staging/rtlwifi/efuse.c
index 1dc71455f270..5b8afdb3e0fe 100644
--- a/drivers/staging/rtlwifi/efuse.c
+++ b/drivers/staging/rtlwifi/efuse.c
@@ -245,7 +245,8 @@ void read_efuse(struct ieee80211_hw *hw, u16 _offset, u16 
_size_byte, u8 *pbuf)
if (!efuse_word)
goto out;
for (i = 0; i < EFUSE_MAX_WORD_UNIT; i++) {
-   efuse_word[i] = kcalloc(efuse_max_section, sizeof(u16), 
GFP_ATOMIC);
+   efuse_word[i] = kcalloc(efuse_max_section,
+   sizeof(u16), GFP_ATOMIC);
if (!efuse_word[i])
goto done;
}
@@ -375,7 +376,7 @@ bool efuse_shadow_update_chk(struct ieee80211_hw *hw)
for (i = 0; i < 8; i = i + 2) {
if ((rtlefuse->efuse_map[EFUSE_INIT_MAP][base + i] !=
 rtlefuse->efuse_map[EFUSE_MODIFY_MAP][base + i]) ||
-   (rtlefuse->efuse_map[EFUSE_INIT_MAP][base + i + 1] 
!=
+  (rtlefuse->efuse_map[EFUSE_INIT_MAP][base + i + 1] !=
 rtlefuse->efuse_map[EFUSE_MODIFY_MAP][base + i +
   1])) {
words_need++;
-- 
2.17.1

[PATCH v2 02/11] arch/x86: Rename the RDT functions and definitions

2018-10-05 Thread Moger, Babu

As AMD is starting to support RDT(or QOS) features, rename
the RDT functions and definitions to more generic names.

Signed-off-by: Babu Moger 
---
 arch/x86/include/asm/rdt_sched.h   | 22 +++---
 arch/x86/kernel/cpu/rdt.c  | 24 
 arch/x86/kernel/cpu/rdt.h  |  8 
 arch/x86/kernel/cpu/rdt_monitor.c  | 10 +-
 arch/x86/kernel/cpu/rdt_rdtgroup.c | 10 +-
 arch/x86/kernel/process_32.c   |  2 +-
 arch/x86/kernel/process_64.c   |  2 +-
 7 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/arch/x86/include/asm/rdt_sched.h b/arch/x86/include/asm/rdt_sched.h
index 9acb06b6f81e..666bf9acb41d 100644
--- a/arch/x86/include/asm/rdt_sched.h
+++ b/arch/x86/include/asm/rdt_sched.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_INTEL_RDT_SCHED_H
-#define _ASM_X86_INTEL_RDT_SCHED_H
+#ifndef _ASM_X86_RDT_SCHED_H
+#define _ASM_X86_RDT_SCHED_H
 
 #ifdef CONFIG_INTEL_RDT
 
@@ -24,21 +24,21 @@
  * The cache also helps to avoid pointless updates if the value does
  * not change.
  */
-struct intel_pqr_state {
+struct rdt_pqr_state {
u32 cur_rmid;
u32 cur_closid;
u32 default_rmid;
u32 default_closid;
 };
 
-DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
+DECLARE_PER_CPU(struct rdt_pqr_state, pqr_state);
 
 DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
 DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
 DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
 
 /*
- * __intel_rdt_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
+ * __rdt_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
  *
  * Following considerations are made so that this has minimal impact
  * on scheduler hot path:
@@ -51,9 +51,9 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
  *   simple as possible.
  * Must be called with preemption disabled.
  */
-static void __intel_rdt_sched_in(void)
+static void __rdt_sched_in(void)
 {
-   struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+   struct rdt_pqr_state *state = this_cpu_ptr(&pqr_state);
u32 closid = state->default_closid;
u32 rmid = state->default_rmid;
 
@@ -78,16 +78,16 @@ static void __intel_rdt_sched_in(void)
}
 }
 
-static inline void intel_rdt_sched_in(void)
+static inline void rdt_sched_in(void)
 {
if (static_branch_likely(&rdt_enable_key))
-   __intel_rdt_sched_in();
+   __rdt_sched_in();
 }
 
 #else
 
-static inline void intel_rdt_sched_in(void) {}
+static inline void rdt_sched_in(void) {}
 
 #endif /* CONFIG_INTEL_RDT */
 
-#endif /* _ASM_X86_INTEL_RDT_SCHED_H */
+#endif /* _ASM_X86_RDT_SCHED_H */
diff --git a/arch/x86/kernel/cpu/rdt.c b/arch/x86/kernel/cpu/rdt.c
index 28d6cd254ba9..b361c63170d7 100644
--- a/arch/x86/kernel/cpu/rdt.c
+++ b/arch/x86/kernel/cpu/rdt.c
@@ -40,12 +40,12 @@
 DEFINE_MUTEX(rdtgroup_mutex);
 
 /*
- * The cached intel_pqr_state is strictly per CPU and can never be
+ * The cached rdt_pqr_state is strictly per CPU and can never be
  * updated from a remote CPU. Functions which modify the state
  * are called with interrupts disabled and no preemption, which
  * is sufficient for the protection.
  */
-DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
+DEFINE_PER_CPU(struct rdt_pqr_state, pqr_state);
 
 /*
  * Used to store the max resource name width and max resource data width
@@ -634,7 +634,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource 
*r)
 
 static void clear_closid_rmid(int cpu)
 {
-   struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+   struct rdt_pqr_state *state = this_cpu_ptr(&pqr_state);
 
state->default_closid = 0;
state->default_rmid = 0;
@@ -643,7 +643,7 @@ static void clear_closid_rmid(int cpu)
wrmsr(IA32_PQR_ASSOC, 0, 0);
 }
 
-static int intel_rdt_online_cpu(unsigned int cpu)
+static int rdt_online_cpu(unsigned int cpu)
 {
struct rdt_resource *r;
 
@@ -669,7 +669,7 @@ static void clear_childcpus(struct rdtgroup *r, unsigned 
int cpu)
}
 }
 
-static int intel_rdt_offline_cpu(unsigned int cpu)
+static int rdt_offline_cpu(unsigned int cpu)
 {
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
@@ -861,7 +861,7 @@ static __init bool get_rdt_resources(void)
 
 static enum cpuhp_state rdt_online;
 
-static int __init intel_rdt_late_init(void)
+static int __init rdt_late_init(void)
 {
struct rdt_resource *r;
int state, ret;
@@ -873,7 +873,7 @@ static int __init intel_rdt_late_init(void)
 
state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
  "x86/rdt/cat:online:",
- intel_rdt_online_cpu, intel_rdt_offline_cpu);
+ rdt_online_cpu, rdt_offline_cpu);
if (state < 0)
return state;
 
@@ -885,20 +885,20 @@ static int __init intel_rdt_late_ini

[PATCH v2 03/11] arch/x86: Re-arrange RDT init code

2018-10-05 Thread Moger, Babu

Separate the call sequence for rdt_quirks and MBA feature.
This is in preparation to handle vendor differences in these
call sequences.

Signed-off-by: Babu Moger 
---
 arch/x86/kernel/cpu/rdt.c | 29 +++--
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/rdt.c b/arch/x86/kernel/cpu/rdt.c
index b361c63170d7..c3ac7f9a3a0f 100644
--- a/arch/x86/kernel/cpu/rdt.c
+++ b/arch/x86/kernel/cpu/rdt.c
@@ -789,6 +789,16 @@ static bool __init rdt_cpu_has(int flag)
return ret;
 }
 
+static __init bool rdt_mba_config(void)
+{
+   if (rdt_cpu_has(X86_FEATURE_MBA)) {
+   if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
+   return true;
+   }
+
+   return false;
+}
+
 static __init bool get_rdt_alloc_resources(void)
 {
bool ret = false;
@@ -813,10 +823,9 @@ static __init bool get_rdt_alloc_resources(void)
ret = true;
}
 
-   if (rdt_cpu_has(X86_FEATURE_MBA)) {
-   if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
-   ret = true;
-   }
+   if (rdt_mba_config())
+   ret = true;
+
return ret;
 }
 
@@ -835,7 +844,7 @@ static __init bool get_rdt_mon_resources(void)
return !rdt_get_mon_l3_config(&rdt_resources_all[RDT_RESOURCE_L3]);
 }
 
-static __init void rdt_quirks(void)
+static __init void rdt_quirks_intel(void)
 {
switch (boot_cpu_data.x86_model) {
case INTEL_FAM6_HASWELL_X:
@@ -850,9 +859,14 @@ static __init void rdt_quirks(void)
}
 }
 
+static __init void rdt_quirks(void)
+{
+   if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+   rdt_quirks_intel();
+}
+
 static __init bool get_rdt_resources(void)
 {
-   rdt_quirks();
rdt_alloc_capable = get_rdt_alloc_resources();
rdt_mon_capable = get_rdt_mon_resources();
 
@@ -866,6 +880,9 @@ static int __init rdt_late_init(void)
struct rdt_resource *r;
int state, ret;
 
+   /* Run quirks first */
+   rdt_quirks();
+
if (!get_rdt_resources())
return -ENODEV;
 
-- 
2.17.1

[patch] mm, page_alloc: set num_movable in move_freepages()

2018-10-05 Thread David Rientjes

If move_freepages() returns 0 because zone_spans_pfn(), *num_movable can
hold the value from the stack because it does not get initialized in
move_freepages().

Move the initialization to move_freepages_block() to guarantee the value
actually makes sense.

This currently doesn't affect its only caller where num_movable != NULL,
so no bug fix, but just more robust.

Signed-off-by: David Rientjes 
---
 mm/page_alloc.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2015,10 +2015,6 @@ static int move_freepages(struct zone *zone,
  pfn_valid(page_to_pfn(end_page)) &&
  page_zone(start_page) != page_zone(end_page));
 #endif
-
-   if (num_movable)
-   *num_movable = 0;
-
for (page = start_page; page <= end_page;) {
if (!pfn_valid_within(page_to_pfn(page))) {
page++;
@@ -2058,6 +2054,9 @@ int move_freepages_block(struct zone *zone, struct page 
*page,
unsigned long start_pfn, end_pfn;
struct page *start_page, *end_page;
 
+   if (num_movable)
+   *num_movable = 0;
+
start_pfn = page_to_pfn(page);
start_pfn = start_pfn & ~(pageblock_nr_pages-1);
start_page = pfn_to_page(start_pfn);

[PATCH v2 08/11] arch/x86: Bring few more functions into the resource structure

2018-10-05 Thread Moger, Babu

Bring all resource functions that are different between the vendors
into resource structure and initialize them dynamically.
Add _intel suffix to Intel specific functions.

Implement these functions separately for each vendors.
update_mba_bw : Feedback loop bandwidth update functionality is not
needed for AMD.
cbm_validate  : Cache bitmask validate function. AMD allows
non-contiguous masks. So, use separate functions for
Intel and AMD.
Signed-off-by: Babu Moger 
---
 arch/x86/kernel/cpu/rdt.c | 10 +-
 arch/x86/kernel/cpu/rdt.h | 15 +++
 arch/x86/kernel/cpu/rdt_ctrlmondata.c |  4 ++--
 arch/x86/kernel/cpu/rdt_monitor.c | 10 +++---
 4 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/rdt.c b/arch/x86/kernel/cpu/rdt.c
index 9680a43d9485..c7c2dbaae7bb 100644
--- a/arch/x86/kernel/cpu/rdt.c
+++ b/arch/x86/kernel/cpu/rdt.c
@@ -874,10 +874,18 @@ static __init void rdt_init_res_defs_intel(void)
struct rdt_resource *r;
 
for_each_rdt_resource(r) {
-   if (r->rid == RDT_RESOURCE_MBA) {
+   if ((r->rid == RDT_RESOURCE_L3) ||
+   (r->rid == RDT_RESOURCE_L3DATA) ||
+   (r->rid == RDT_RESOURCE_L3CODE) ||
+   (r->rid == RDT_RESOURCE_L2) ||
+   (r->rid == RDT_RESOURCE_L2DATA) ||
+   (r->rid == RDT_RESOURCE_L2CODE))
+   r->cbm_validate = cbm_validate_intel;
+   else if (r->rid == RDT_RESOURCE_MBA) {
r->msr_base = IA32_MBA_THRTL_BASE;
r->msr_update = mba_wrmsr_intel;
r->parse_ctrlval = parse_bw_intel;
+   r->update_mba_bw = update_mba_bw_intel;
}
}
 }
diff --git a/arch/x86/kernel/cpu/rdt.h b/arch/x86/kernel/cpu/rdt.h
index 42bf239313a0..cb7e5a4739fc 100644
--- a/arch/x86/kernel/cpu/rdt.h
+++ b/arch/x86/kernel/cpu/rdt.h
@@ -410,10 +410,12 @@ struct rdt_parse_data {
  * @cache: Cache allocation related data
  * @format_str:Per resource format string to show domain value
  * @parse_ctrlval: Per resource function pointer to parse control values
- * @evt_list:  List of monitoring events
- * @num_rmid:  Number of RMIDs available
- * @mon_scale: cqm counter * mon_scale = occupancy in bytes
- * @fflags:flags to choose base and info files
+ * @update_mba_bw: Feedback loop for MBA software controller function
+ * @cbm_validate   Cache bitmask validate function
+ * @evt_list:  List of monitoring events
+ * @num_rmid:  Number of RMIDs available
+ * @mon_scale: cqm counter * mon_scale = occupancy in bytes
+ * @fflags:flags to choose base and info files
  */
 struct rdt_resource {
int rid;
@@ -436,6 +438,9 @@ struct rdt_resource {
int (*parse_ctrlval)(struct rdt_parse_data *data,
 struct rdt_resource *r,
 struct rdt_domain *d);
+   void (*update_mba_bw)(struct rdtgroup *rgrp,
+ struct rdt_domain *dom_mbm);
+   bool (*cbm_validate)(char *buf, u32 *data, struct rdt_resource *r);
struct list_headevt_list;
int num_rmid;
unsigned intmon_scale;
@@ -576,5 +581,7 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, 
unsigned long delay_ms);
 void cqm_handle_limbo(struct work_struct *work);
 bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
 void __check_limbo(struct rdt_domain *d, bool force_free);
+void update_mba_bw_intel(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm);
+bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r);
 
 #endif /* _ASM_X86_RDT_H */
diff --git a/arch/x86/kernel/cpu/rdt_ctrlmondata.c 
b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
index ee3e8389d8d2..af8506003ee8 100644
--- a/arch/x86/kernel/cpu/rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
@@ -88,7 +88,7 @@ int parse_bw_intel(struct rdt_parse_data *data, struct 
rdt_resource *r,
  * are allowed (e.g. H, 0FF0H, 003CH, etc.).
  * Additionally Haswell requires at least two bits set.
  */
-static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
+bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
 {
unsigned long first_bit, zero_bit, val;
unsigned int cbm_len = r->cache.cbm_len;
@@ -148,7 +148,7 @@ int parse_cbm(struct rdt_parse_data *data, struct 
rdt_resource *r,
return -EINVAL;
}
 
-   if (!cbm_validate(data->buf, &cbm_val, r))
+   if (r->cbm_validate && !r->cbm_validate(data->buf, &cbm_val, r))
return -EINVAL;
 
if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
diff --git a/arch/x86/kernel/cpu/rdt_

[PATCH v2 09/11] arch/x86: Introduce new config parameter AMD_QOS

2018-10-05 Thread Moger, Babu

Introduces the new config parameter AMD_QOS. This parameter will be
used to enable cache and memory bandwidth allocation and monitoring
features on AMD processors. This will enable common config parameter
PLATFORM_QOS if selected.

Signed-off-by: Babu Moger 
---
 arch/x86/Kconfig | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7f2da780a327..0bfb5f4f32f2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -458,9 +458,24 @@ config INTEL_RDT
 
  Say N if unsure.
 
+config AMD_QOS
+   bool "AMD Quality of Service support"
+   default n
+   depends on X86 && CPU_SUP_AMD
+   select KERNFS
+   help
+ Select to enable cache and memory bandwidth enforcement and monitoring
+ features of AMD processors. These features are intended to provide
+ support for the monitoring of the usage of certain system resources
+ by one or more processors and for the separate allocation and
+ enforcement of limits on the use of certain system resources by one or
+ more processors.
+
+ Say N if unsure.
+
 config PLATFORM_QOS
def_bool y
-   depends on X86 && INTEL_RDT
+   depends on X86 && (INTEL_RDT || AMD_QOS)
 
 if X86_32
 config X86_BIGSMP
-- 
2.17.1

[PATCH v2 06/11] arch/x86: Use new config parameter PLATFORM_QOS for compilation

2018-10-05 Thread Moger, Babu

Use newly added config parameter PLATFORM_QOS to compile sources.
This is common parameter across both Intel and AMD.

Signed-off-by: Babu Moger 
---
 arch/x86/include/asm/rdt_sched.h | 4 ++--
 arch/x86/kernel/cpu/Makefile | 4 ++--
 include/linux/sched.h| 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/rdt_sched.h b/arch/x86/include/asm/rdt_sched.h
index 666bf9acb41d..6018a362d1cf 100644
--- a/arch/x86/include/asm/rdt_sched.h
+++ b/arch/x86/include/asm/rdt_sched.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_RDT_SCHED_H
 #define _ASM_X86_RDT_SCHED_H
 
-#ifdef CONFIG_INTEL_RDT
+#ifdef CONFIG_PLATFORM_QOS
 
 #include 
 #include 
@@ -88,6 +88,6 @@ static inline void rdt_sched_in(void)
 
 static inline void rdt_sched_in(void) {}
 
-#endif /* CONFIG_INTEL_RDT */
+#endif /* CONFIG_PLATFORM_QOS */
 
 #endif /* _ASM_X86_RDT_SCHED_H */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 6c35d89f174f..8655adc84f11 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -35,8 +35,8 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)   += umc.o
 
-obj-$(CONFIG_INTEL_RDT)+= rdt.o rdt_rdtgroup.o rdt_monitor.o
-obj-$(CONFIG_INTEL_RDT)+= rdt_ctrlmondata.o rdt_pseudo_lock.o
+obj-$(CONFIG_PLATFORM_QOS) += rdt.o rdt_rdtgroup.o rdt_monitor.o
+obj-$(CONFIG_PLATFORM_QOS) += rdt_ctrlmondata.o rdt_pseudo_lock.o
 CFLAGS_rdt_pseudo_lock.o = -I$(src)
 
 obj-$(CONFIG_X86_MCE)  += mcheck/
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 977cb57d7bc9..1a4d00b7a5b1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -985,7 +985,7 @@ struct task_struct {
/* cg_list protected by css_set_lock and tsk->alloc_lock: */
struct list_headcg_list;
 #endif
-#ifdef CONFIG_INTEL_RDT
+#ifdef CONFIG_PLATFORM_QOS
u32 closid;
u32 rmid;
 #endif
-- 
2.17.1

[PATCH v2 05/11] arch/x86: Introduce a new config parameter PLATFORM_QOS

2018-10-05 Thread Moger, Babu

Introduces a new config parameter PLATFORM_QOS.

This will be used as a common config parameter for both Intel and AMD.
Each vendor will have their own config parameter to enable RDT feature.
One for Intel(INTEL_RDT) and one for AMD(AMD_QOS). It can be enabled or
disabled separately. The new parameter PLATFORM_QOS will be dependent
on INTEL_RDT or AMD_QOS.

Signed-off-by: Babu Moger 
---
 arch/x86/Kconfig | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a0be022f91d..7f2da780a327 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -458,6 +458,10 @@ config INTEL_RDT
 
  Say N if unsure.
 
+config PLATFORM_QOS
+   def_bool y
+   depends on X86 && INTEL_RDT
+
 if X86_32
 config X86_BIGSMP
bool "Support for big SMP systems with more than 8 CPUs"
-- 
2.17.1

[PATCHv4 1/2] arm64: dts: stratix10: Add Stratix10 SMMU support

2018-10-05 Thread thor . thayer

From: Thor Thayer 

Add SMMU support to the Stratix10 Device Tree which
includes adding the SMMU node and adding IOMMU stream
ids to the SMMU peripherals.

Signed-off-by: Thor Thayer 
---
v4  Add clock-name since clk_bulk_get() needs name
for clock.
v3  Remove bindings changes since not adding new structure.
Remove new compatible string - use default "arm,mmu-500"
v2  Add bindings changes and compatible string for SOCFPGA.
---
 arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 29 +++
 1 file changed, 29 insertions(+)

diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi 
b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index d033da401c26..f58f7601ab88 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -137,6 +137,7 @@
reset-names = "stmmaceth", "stmmaceth-ocp";
clocks = <&clkmgr STRATIX10_EMAC0_CLK>;
clock-names = "stmmaceth";
+   iommus = <&smmu 1>;
status = "disabled";
};
 
@@ -150,6 +151,7 @@
reset-names = "stmmaceth", "stmmaceth-ocp";
clocks = <&clkmgr STRATIX10_EMAC1_CLK>;
clock-names = "stmmaceth";
+   iommus = <&smmu 2>;
status = "disabled";
};
 
@@ -163,6 +165,7 @@
reset-names = "stmmaceth", "stmmaceth-ocp";
clocks = <&clkmgr STRATIX10_EMAC2_CLK>;
clock-names = "stmmaceth";
+   iommus = <&smmu 3>;
status = "disabled";
};
 
@@ -273,6 +276,7 @@
clocks = <&clkmgr STRATIX10_L4_MP_CLK>,
 <&clkmgr STRATIX10_SDMMC_CLK>;
clock-names = "biu", "ciu";
+   iommus = <&smmu 5>;
status = "disabled";
};
 
@@ -307,6 +311,29 @@
altr,modrst-offset = <0x20>;
};
 
+   smmu: iommu@fa00 {
+   compatible = "arm,mmu-500", "arm,smmu-v2";
+   reg = <0xfa00 0x4>;
+   #global-interrupts = <2>;
+   #iommu-cells = <1>;
+   clocks = <&clkmgr STRATIX10_L4_MAIN_CLK>;
+   clock-names = "iommu";
+   interrupt-parent = <&intc>;
+   interrupts = <0 128 4>, /* Global Secure Fault */
+   <0 129 4>, /* Global Non-secure Fault */
+   /* Non-secure Context Interrupts (32) */
+   <0 138 4>, <0 139 4>, <0 140 4>, <0 141 4>,
+   <0 142 4>, <0 143 4>, <0 144 4>, <0 145 4>,
+   <0 146 4>, <0 147 4>, <0 148 4>, <0 149 4>,
+   <0 150 4>, <0 151 4>, <0 152 4>, <0 153 4>,
+   <0 154 4>, <0 155 4>, <0 156 4>, <0 157 4>,
+   <0 158 4>, <0 159 4>, <0 160 4>, <0 161 4>,
+   <0 162 4>, <0 163 4>, <0 164 4>, <0 165 4>,
+   <0 166 4>, <0 167 4>, <0 168 4>, <0 169 4>;
+   stream-match-mask = <0x7ff0>;
+   status = "disabled";
+   };
+
spi0: spi@ffda4000 {
compatible = "snps,dw-apb-ssi";
#address-cells = <1>;
@@ -416,6 +443,7 @@
resets = <&rst USB0_RESET>, <&rst USB0_OCP_RESET>;
reset-names = "dwc2", "dwc2-ecc";
clocks = <&clkmgr STRATIX10_USB_CLK>;
+   iommus = <&smmu 6>;
status = "disabled";
};
 
@@ -428,6 +456,7 @@
resets = <&rst USB1_RESET>, <&rst USB1_OCP_RESET>;
reset-names = "dwc2", "dwc2-ecc";
clocks = <&clkmgr STRATIX10_USB_CLK>;
+   iommus = <&smmu 7>;
status = "disabled";
};
 
-- 
2.7.4

[PATCH v2 07/11] arch/x86: Initialize the resource functions that are different

2018-10-05 Thread Moger, Babu

Initialize the resource functions that are different between the
vendors. Some features are initialized differently between the vendors.
Add _intel suffix to Intel specific functions.

For example, MBA feature varies significantly between Intel and AMD.
Separate the initialization of these resource functions. That way we
can easily add AMD's functions later.

Signed-off-by: Babu Moger 
---
 arch/x86/kernel/cpu/rdt.c | 34 +++
 arch/x86/kernel/cpu/rdt.h |  8 +--
 arch/x86/kernel/cpu/rdt_ctrlmondata.c |  2 +-
 3 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/rdt.c b/arch/x86/kernel/cpu/rdt.c
index 87fe073a0571..9680a43d9485 100644
--- a/arch/x86/kernel/cpu/rdt.c
+++ b/arch/x86/kernel/cpu/rdt.c
@@ -57,7 +57,8 @@ int max_name_width, max_data_width;
 bool rdt_alloc_capable;
 
 static void
-mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
+   struct rdt_resource *r);
 static void
 cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
 
@@ -171,10 +172,7 @@ struct rdt_resource rdt_resources_all[] = {
.rid= RDT_RESOURCE_MBA,
.name   = "MB",
.domains= domain_init(RDT_RESOURCE_MBA),
-   .msr_base   = IA32_MBA_THRTL_BASE,
-   .msr_update = mba_wrmsr,
.cache_level= 3,
-   .parse_ctrlval  = parse_bw,
.format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
},
@@ -356,7 +354,8 @@ u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
 }
 
 static void
-mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
+   struct rdt_resource *r)
 {
unsigned int i;
 
@@ -870,6 +869,25 @@ static __init bool get_rdt_resources(void)
return (rdt_mon_capable || rdt_alloc_capable);
 }
 
+static __init void rdt_init_res_defs_intel(void)
+{
+   struct rdt_resource *r;
+
+   for_each_rdt_resource(r) {
+   if (r->rid == RDT_RESOURCE_MBA) {
+   r->msr_base = IA32_MBA_THRTL_BASE;
+   r->msr_update = mba_wrmsr_intel;
+   r->parse_ctrlval = parse_bw_intel;
+   }
+   }
+}
+
+static __init void rdt_init_res_defs(void)
+{
+   if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+   rdt_init_res_defs_intel();
+}
+
 static enum cpuhp_state rdt_online;
 
 static int __init rdt_late_init(void)
@@ -877,6 +895,12 @@ static int __init rdt_late_init(void)
struct rdt_resource *r;
int state, ret;
 
+   /*
+* Initialize functions(or definitions) that are different
+* between vendors here.
+*/
+   rdt_init_res_defs();
+
/* Run quirks first */
rdt_quirks();
 
diff --git a/arch/x86/kernel/cpu/rdt.h b/arch/x86/kernel/cpu/rdt.h
index 8431af5c6825..42bf239313a0 100644
--- a/arch/x86/kernel/cpu/rdt.h
+++ b/arch/x86/kernel/cpu/rdt.h
@@ -444,8 +444,8 @@ struct rdt_resource {
 
 int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
  struct rdt_domain *d);
-int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
-struct rdt_domain *d);
+int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
+  struct rdt_domain *d);
 
 extern struct mutex rdtgroup_mutex;
 
@@ -468,6 +468,10 @@ enum {
RDT_NUM_RESOURCES,
 };
 
+#define for_each_rdt_resource(r) \
+   for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+r++)
+
 #define for_each_capable_rdt_resource(r) \
for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
 r++) \
diff --git a/arch/x86/kernel/cpu/rdt_ctrlmondata.c 
b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
index 812cc5c5e39e..ee3e8389d8d2 100644
--- a/arch/x86/kernel/cpu/rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
@@ -64,7 +64,7 @@ static bool bw_validate(char *buf, unsigned long *data, 
struct rdt_resource *r)
return true;
 }
 
-int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
 struct rdt_domain *d)
 {
unsigned long bw_val;
-- 
2.17.1

[PATCH v2 01/11] arch/x86: Start renaming the rdt files to more generic names

2018-10-05 Thread Moger, Babu

New generation of AMD processors start support RDT(or QOS) features.
With more than one vendors supporting these features, it seems more
appropriate to rename these files.

Signed-off-by: Babu Moger 
---
 arch/x86/include/asm/{intel_rdt_sched.h => rdt_sched.h} | 0
 arch/x86/kernel/cpu/Makefile| 6 +++---
 arch/x86/kernel/cpu/{intel_rdt.c => rdt.c}  | 4 ++--
 arch/x86/kernel/cpu/{intel_rdt.h => rdt.h}  | 0
 .../cpu/{intel_rdt_ctrlmondata.c => rdt_ctrlmondata.c}  | 2 +-
 arch/x86/kernel/cpu/{intel_rdt_monitor.c => rdt_monitor.c}  | 2 +-
 .../cpu/{intel_rdt_pseudo_lock.c => rdt_pseudo_lock.c}  | 6 +++---
 ...ntel_rdt_pseudo_lock_event.h => rdt_pseudo_lock_event.h} | 2 +-
 .../x86/kernel/cpu/{intel_rdt_rdtgroup.c => rdt_rdtgroup.c} | 4 ++--
 arch/x86/kernel/process_32.c| 2 +-
 arch/x86/kernel/process_64.c| 2 +-
 11 files changed, 15 insertions(+), 15 deletions(-)
 rename arch/x86/include/asm/{intel_rdt_sched.h => rdt_sched.h} (100%)
 rename arch/x86/kernel/cpu/{intel_rdt.c => rdt.c} (99%)
 rename arch/x86/kernel/cpu/{intel_rdt.h => rdt.h} (100%)
 rename arch/x86/kernel/cpu/{intel_rdt_ctrlmondata.c => rdt_ctrlmondata.c} (99%)
 rename arch/x86/kernel/cpu/{intel_rdt_monitor.c => rdt_monitor.c} (99%)
 rename arch/x86/kernel/cpu/{intel_rdt_pseudo_lock.c => rdt_pseudo_lock.c} (99%)
 rename arch/x86/kernel/cpu/{intel_rdt_pseudo_lock_event.h => 
rdt_pseudo_lock_event.h} (95%)
 rename arch/x86/kernel/cpu/{intel_rdt_rdtgroup.c => rdt_rdtgroup.c} (99%)

diff --git a/arch/x86/include/asm/intel_rdt_sched.h 
b/arch/x86/include/asm/rdt_sched.h
similarity index 100%
rename from arch/x86/include/asm/intel_rdt_sched.h
rename to arch/x86/include/asm/rdt_sched.h
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 347137e80bf5..6c35d89f174f 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -35,9 +35,9 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)   += umc.o
 
-obj-$(CONFIG_INTEL_RDT)+= intel_rdt.o intel_rdt_rdtgroup.o 
intel_rdt_monitor.o
-obj-$(CONFIG_INTEL_RDT)+= intel_rdt_ctrlmondata.o 
intel_rdt_pseudo_lock.o
-CFLAGS_intel_rdt_pseudo_lock.o = -I$(src)
+obj-$(CONFIG_INTEL_RDT)+= rdt.o rdt_rdtgroup.o rdt_monitor.o
+obj-$(CONFIG_INTEL_RDT)+= rdt_ctrlmondata.o rdt_pseudo_lock.o
+CFLAGS_rdt_pseudo_lock.o = -I$(src)
 
 obj-$(CONFIG_X86_MCE)  += mcheck/
 obj-$(CONFIG_MTRR) += mtrr/
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/rdt.c
similarity index 99%
rename from arch/x86/kernel/cpu/intel_rdt.c
rename to arch/x86/kernel/cpu/rdt.c
index abb71ac70443..28d6cd254ba9 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/rdt.c
@@ -30,8 +30,8 @@
 #include 
 
 #include 
-#include 
-#include "intel_rdt.h"
+#include 
+#include "rdt.h"
 
 #define MBA_IS_LINEAR  0x4
 #define MBA_MAX_MBPS   U32_MAX
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/rdt.h
similarity index 100%
rename from arch/x86/kernel/cpu/intel_rdt.h
rename to arch/x86/kernel/cpu/rdt.h
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c 
b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
similarity index 99%
rename from arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
rename to arch/x86/kernel/cpu/rdt_ctrlmondata.c
index 0f53049719cd..812cc5c5e39e 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
@@ -26,7 +26,7 @@
 #include 
 #include 
 #include 
-#include "intel_rdt.h"
+#include "rdt.h"
 
 /*
  * Check whether MBA bandwidth percentage value is correct. The value is
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c 
b/arch/x86/kernel/cpu/rdt_monitor.c
similarity index 99%
rename from arch/x86/kernel/cpu/intel_rdt_monitor.c
rename to arch/x86/kernel/cpu/rdt_monitor.c
index b0f3aed76b75..2898a61cbdd9 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/rdt_monitor.c
@@ -26,7 +26,7 @@
 #include 
 #include 
 #include 
-#include "intel_rdt.h"
+#include "rdt.h"
 
 #define MSR_IA32_QM_CTR0x0c8e
 #define MSR_IA32_QM_EVTSEL 0x0c8d
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c 
b/arch/x86/kernel/cpu/rdt_pseudo_lock.c
similarity index 99%
rename from arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
rename to arch/x86/kernel/cpu/rdt_pseudo_lock.c
index 40f3903ae5d9..6105a2af3216 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/rdt_pseudo_lock.c
@@ -23,13 +23,13 @@
 
 #include 
 #include 
-#include 
+#include 
 #include 
 
-#include "intel_rdt.h"
+#include "rdt.h"
 
 #define CREATE_TRACE_POINTS
-#include "intel_rdt_pseudo_lock_event.h"
+#include "rdt_pseudo_lock_event.h"
 
 /*
  * MSR_MISC_FEATURE_CONTROL register enables the modification of h

[PATCH v2 10/11] arch/x86: Add AMD feature bit X86_FEATURE_MBA in cpuid bits array

2018-10-05 Thread Moger, Babu

From: Sherry Hurwitz 

The feature bit X86_FEATURE_MBA is detected via CPUID leaf 0x8008
EBX Bit 06. This bit indicates the support of AMD's MBA feature.

This feature is supported by both Intel and AMD. But they are detected
different CPUID leaves.

Signed-off-by: Babu Moger 
Signed-off-by: Sherry Hurwitz 
---
 arch/x86/kernel/cpu/scattered.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 772c219b6889..bd7853334b27 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -17,7 +17,11 @@ struct cpuid_bit {
u32 sub_leaf;
 };
 
-/* Please keep the leaf sorted by cpuid_bit.level for faster search. */
+/*
+ * Please keep the leaf sorted by cpuid_bit.level for faster search.
+ * X86_FEATURE_MBA supported by both Intel and AMD. But the cpuid
+ * levels are different. Add a separate enty for each.
+ */
 static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF,   CPUID_ECX,  0, 0x0006, 0 },
{ X86_FEATURE_EPB,  CPUID_ECX,  3, 0x0006, 0 },
@@ -29,6 +33,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_HW_PSTATE,CPUID_EDX,  7, 0x8007, 0 },
{ X86_FEATURE_CPB,  CPUID_EDX,  9, 0x8007, 0 },
{ X86_FEATURE_PROC_FEEDBACK,CPUID_EDX, 11, 0x8007, 0 },
+   { X86_FEATURE_MBA,  CPUID_EBX,  6, 0x8008, 0 },
{ X86_FEATURE_SME,  CPUID_EAX,  0, 0x801f, 0 },
{ X86_FEATURE_SEV,  CPUID_EAX,  1, 0x801f, 0 },
{ 0, 0, 0, 0, 0 }
-- 
2.17.1

[PATCH v2 11/11] arch/x86: Introduce QOS feature for AMD

2018-10-05 Thread Moger, Babu

Enables QOS feature on AMD.
Following QoS sub-features are supported in AMD if the underlying
hardware supports it.
 - L3 Cache allocation enforcement
 - L3 Cache occupancy monitoring
 - L3 Code-Data Prioritization support
 - Memory Bandwidth Enforcement(Allocation)

There are differences in the way some of the features are implemented.
Separate those functions and add those as vendor specific functions.
The major difference is in MBA feature.
 - AMD uses CPUID leaf 0x8020 to initialize the MBA features.
 - AMD uses direct bandwidth value instead of delay based on bandwidth
   values.
 - MSR register base addresses are different for MBA.
 - Also AMD allows non-contiguous L3 cache bit masks.

Adds following functions to take care of the differences.
rdt_get_mem_config_amd : MBA initialization function
parse_bw_amd : Bandwidth parsing
mba_wrmsr_amd: Writes bandwidth value
cbm_validate_amd : L3 cache bitmask validation

Signed-off-by: Babu Moger 
---
 arch/x86/kernel/cpu/rdt.c | 69 +-
 arch/x86/kernel/cpu/rdt.h |  5 ++
 arch/x86/kernel/cpu/rdt_ctrlmondata.c | 70 +++
 3 files changed, 142 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/rdt.c b/arch/x86/kernel/cpu/rdt.c
index c7c2dbaae7bb..99b3a69457c7 100644
--- a/arch/x86/kernel/cpu/rdt.c
+++ b/arch/x86/kernel/cpu/rdt.c
@@ -61,6 +61,9 @@ mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r);
 static void
 cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+static void
+mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m,
+ struct rdt_resource *r);
 
 #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
 
@@ -280,6 +283,31 @@ static bool rdt_get_mem_config(struct rdt_resource *r)
return true;
 }
 
+static bool rdt_get_mem_config_amd(struct rdt_resource *r)
+{
+   union cpuid_0x10_3_eax eax;
+   union cpuid_0x10_x_edx edx;
+   u32 ebx, ecx;
+
+   cpuid_count(0x8020, 1, &eax.full, &ebx, &ecx, &edx.full);
+   r->num_closid = edx.split.cos_max + 1;
+   r->default_ctrl = MAX_MBA_BW_AMD;
+
+   /* AMD does not use delay. Set delay_linear to false by default */
+   r->membw.delay_linear = false;
+
+   /* FIX ME - May need to be read from MSR */
+   r->membw.min_bw = 0;
+   r->membw.bw_gran = 1;
+   /* Max value is 2048, Data width should be 4 in decimal */
+   r->data_width = 4;
+
+   r->alloc_capable = true;
+   r->alloc_enabled = true;
+
+   return true;
+}
+
 static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
 {
union cpuid_0x10_1_eax eax;
@@ -339,6 +367,16 @@ static int get_cache_id(int cpu, int level)
return -1;
 }
 
+static void
+mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource 
*r)
+{
+   unsigned int i;
+
+   /*  Write the bw values for mba. */
+   for (i = m->low; i < m->high; i++)
+   wrmsrl(r->msr_base + i, d->ctrl_val[i]);
+}
+
 /*
  * Map the memory b/w percentage value to delay values
  * that can be written to QOS_MSRs.
@@ -788,8 +826,13 @@ static bool __init rdt_cpu_has(int flag)
 static __init bool rdt_mba_config(void)
 {
if (rdt_cpu_has(X86_FEATURE_MBA)) {
-   if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
-   return true;
+   if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+   if 
(rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
+   return true;
+   } else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+   if 
(rdt_get_mem_config_amd(&rdt_resources_all[RDT_RESOURCE_MBA]))
+   return true;
+   }
}
 
return false;
@@ -890,10 +933,32 @@ static __init void rdt_init_res_defs_intel(void)
}
 }
 
+static __init void rdt_init_res_defs_amd(void)
+{
+   struct rdt_resource *r;
+
+   for_each_rdt_resource(r) {
+   if ((r->rid == RDT_RESOURCE_L3) ||
+   (r->rid == RDT_RESOURCE_L3DATA) ||
+   (r->rid == RDT_RESOURCE_L3CODE) ||
+   (r->rid == RDT_RESOURCE_L2) ||
+   (r->rid == RDT_RESOURCE_L2DATA) ||
+   (r->rid == RDT_RESOURCE_L2CODE))
+   r->cbm_validate = cbm_validate_amd;
+   else if (r->rid == RDT_RESOURCE_MBA) {
+   r->msr_base = IA32_MBA_BW_BASE;
+   r->msr_update = mba_wrmsr_amd;
+   r->parse_ctrlval = parse_bw_amd;
+   }
+   }
+}
+
 static __init void rdt_init_res_defs(void)
 {
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
rdt_init_res_defs_intel();
+   else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+   rdt_init_res_defs_amd()

[PATCH v2 00/11] arch/x86: AMD QoS support

2018-10-05 Thread Moger, Babu

This series adds support for AMD64 architectural extensions for Platform
Quality of Service. These extensions are intended to provide for the
monitoring of the usage of certain system resources by one or more
processors and for the separate allocation and enforcement of limits on
the use of certain system resources by one or more processors.

The monitoring and enforcement are not necessarily applied across the
entire system, but in general apply to a QOS domain which corresponds to
some shared system resource.  The set of resources which are monitored and
the set for which the enforcement of limits is provided are implementation
dependent. Platform QOS features are implemented on a logical processor basis.
Therefore, multiple hardware threads of a single physical CPU core may have
independent resource monitoring and enforcement configurations.

AMD's next generation of processors support following QoS sub-features.
- L3 Cache allocation enforcement
- L3 Cache occupancy monitoring
- L3 Code-Data Prioritization support
- Memory Bandwidth Enforcement(Allocation)

The public specification for this feature is available at
https://www.amd.com/system/files/TechDocs/56375_Quality_of_Service_Extensions.pdf

Obviously, there are multiple ways we can go about these changes. We felt
it is appropriate to rename and re-organize the code little bit before
making the functional changes. The first few patches(1-6) renames and
re-organizes the sources in preparation. Rest of the patches(7-11) adds
support for AMD QoS features.

Please review and provide me feedback. If you think of any better way to
approach this, please let us know. 

Changes from v1 -> v2:
 a. Removed RFC from subject header. Based on the discussion so far, 
plan is to go ahead with these patches and eventually re-structure
the code to make arch and non-arch separate.
 b. Addressed comments from Reinette Chatre and Fenghua Yu.
 c. Separated quirks and MBA from rdt init code. Kept the rest of the
code as is.
 d. Added _intel suffixes all the Intel only code just like AMD code.
 e. Added one more patch to bring the macros into header file.
 f. Few minor text changes.

v1:
https://lore.kernel.org/lkml/20180924191841.29111-1-babu.mo...@amd.com/

Babu Moger (10):
  arch/x86: Start renaming the rdt files to more generic names
  arch/x86: Rename the RDT functions and definitions
  arch/x86: Re-arrange RDT init code
  arch/x86: Bring all the macros to rdt.h
  arch/x86: Introduce a new config parameter PLATFORM_QOS
  arch/x86: Use new config parameter PLATFORM_QOS for compilation
  arch/x86: Initialize the resource functions that are different
  arch/x86: Bring few more functions into the resource structure
  arch/x86: Introduce new config parameter AMD_QOS
  arch/x86: Introduce QOS feature for AMD

Sherry Hurwitz (1):
  arch/x86: Add AMD feature bit X86_FEATURE_MBA in cpuid bits array

 arch/x86/Kconfig  |  19 ++
 .../asm/{intel_rdt_sched.h => rdt_sched.h}|  26 +--
 arch/x86/kernel/cpu/Makefile  |   6 +-
 arch/x86/kernel/cpu/{intel_rdt.c => rdt.c}| 167 +++---
 arch/x86/kernel/cpu/{intel_rdt.h => rdt.h}|  41 +++--
 ...el_rdt_ctrlmondata.c => rdt_ctrlmondata.c} |  78 +++-
 .../{intel_rdt_monitor.c => rdt_monitor.c}|  29 +--
 ...el_rdt_pseudo_lock.c => rdt_pseudo_lock.c} |   6 +-
 ...o_lock_event.h => rdt_pseudo_lock_event.h} |   2 +-
 .../{intel_rdt_rdtgroup.c => rdt_rdtgroup.c}  |  14 +-
 arch/x86/kernel/cpu/scattered.c   |   7 +-
 arch/x86/kernel/process_32.c  |   4 +-
 arch/x86/kernel/process_64.c  |   4 +-
 include/linux/sched.h |   2 +-
 14 files changed, 316 insertions(+), 89 deletions(-)
 rename arch/x86/include/asm/{intel_rdt_sched.h => rdt_sched.h} (80%)
 rename arch/x86/kernel/cpu/{intel_rdt.c => rdt.c} (85%)
 rename arch/x86/kernel/cpu/{intel_rdt.h => rdt.h} (92%)
 rename arch/x86/kernel/cpu/{intel_rdt_ctrlmondata.c => rdt_ctrlmondata.c} (86%)
 rename arch/x86/kernel/cpu/{intel_rdt_monitor.c => rdt_monitor.c} (96%)
 rename arch/x86/kernel/cpu/{intel_rdt_pseudo_lock.c => rdt_pseudo_lock.c} (99%)
 rename arch/x86/kernel/cpu/{intel_rdt_pseudo_lock_event.h => 
rdt_pseudo_lock_event.h} (95%)
 rename arch/x86/kernel/cpu/{intel_rdt_rdtgroup.c => rdt_rdtgroup.c} (99%)

-- 
2.17.1

[PATCH v2 04/11] arch/x86: Bring all the macros to rdt.h

2018-10-05 Thread Moger, Babu

Bring all the macros to rdt.h and rename for consistency.

Signed-off-by: Babu Moger 
---
 arch/x86/kernel/cpu/rdt.c | 3 ---
 arch/x86/kernel/cpu/rdt.h | 5 +
 arch/x86/kernel/cpu/rdt_monitor.c | 7 ++-
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/rdt.c b/arch/x86/kernel/cpu/rdt.c
index c3ac7f9a3a0f..87fe073a0571 100644
--- a/arch/x86/kernel/cpu/rdt.c
+++ b/arch/x86/kernel/cpu/rdt.c
@@ -33,9 +33,6 @@
 #include 
 #include "rdt.h"
 
-#define MBA_IS_LINEAR  0x4
-#define MBA_MAX_MBPS   U32_MAX
-
 /* Mutex to protect rdtgroup access. */
 DEFINE_MUTEX(rdtgroup_mutex);
 
diff --git a/arch/x86/kernel/cpu/rdt.h b/arch/x86/kernel/cpu/rdt.h
index 1d7aa7e266af..8431af5c6825 100644
--- a/arch/x86/kernel/cpu/rdt.h
+++ b/arch/x86/kernel/cpu/rdt.h
@@ -12,6 +12,9 @@
 #define IA32_L2_CBM_BASE   0xd10
 #define IA32_MBA_THRTL_BASE0xd50
 
+#define IA32_QM_CTR0x0c8e
+#define IA32_QM_EVTSEL 0x0c8d
+
 #define L3_QOS_CDP_ENABLE  0x01ULL
 
 #define L2_QOS_CDP_ENABLE  0x01ULL
@@ -29,6 +32,8 @@
 #define MBM_CNTR_WIDTH 24
 #define MBM_OVERFLOW_INTERVAL  1000
 #define MAX_MBA_BW 100u
+#define MBA_IS_LINEAR  0x4
+#define MBA_MAX_MBPS   U32_MAX
 
 #define RMID_VAL_ERROR BIT_ULL(63)
 #define RMID_VAL_UNAVAIL   BIT_ULL(62)
diff --git a/arch/x86/kernel/cpu/rdt_monitor.c 
b/arch/x86/kernel/cpu/rdt_monitor.c
index 577514cd4a71..c8b95561f5be 100644
--- a/arch/x86/kernel/cpu/rdt_monitor.c
+++ b/arch/x86/kernel/cpu/rdt_monitor.c
@@ -28,9 +28,6 @@
 #include 
 #include "rdt.h"
 
-#define MSR_IA32_QM_CTR0x0c8e
-#define MSR_IA32_QM_EVTSEL 0x0c8d
-
 struct rmid_entry {
u32 rmid;
int busy;
@@ -97,8 +94,8 @@ static u64 __rmid_read(u32 rmid, u32 eventid)
 * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
 * are error bits.
 */
-   wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
-   rdmsrl(MSR_IA32_QM_CTR, val);
+   wrmsr(IA32_QM_EVTSEL, eventid, rmid);
+   rdmsrl(IA32_QM_CTR, val);
 
return val;
 }
-- 
2.17.1

1 2 3 4 5 6 7 >

1 - 100 of 648 matches

Mail list logo