RE: [PATCH v8 04/20] dlb: add device ioctl layer and first three ioctls

2021-01-08 Thread Chen, Mike Ximing



> -Original Message-
> From: Greg KH 
> Sent: Thursday, January 7, 2021 2:42 PM
> To: Chen, Mike Ximing 
> Cc: linux-kernel@vger.kernel.org; a...@arndb.de; Williams, Dan J
> ; pierre-louis.boss...@linux.intel.com; Gage Eads
> 
> Subject: Re: [PATCH v8 04/20] dlb: add device ioctl layer and first three 
> ioctls
> 
> > +/* [7:0]: device revision, [15:8]: device version */
> > +#define DLB_SET_DEVICE_VERSION(ver, rev) (((ver) << 8) | (rev))
> > +
> > +static int
> > +dlb_ioctl_get_device_version(struct dlb *dlb __attribute__((unused)),
> 
> We don't use __attribute__((unused)) for function variables in Linux.
> Please remove and tell whatever operating system you ported this from to
> get with the times :)
> 
> thanks,
> 
> greg k-h

OK. Will remove __attribute__((unused)) in the patch set.

Thanks!

Mike 


RE: [PATCH v8 04/20] dlb: add device ioctl layer and first three ioctls

2021-01-08 Thread Chen, Mike Ximing



> -Original Message-
> From: Greg KH 
> Sent: Thursday, January 7, 2021 2:51 PM
> To: Chen, Mike Ximing 
> Cc: linux-kernel@vger.kernel.org; a...@arndb.de; Williams, Dan J
> ; pierre-louis.boss...@linux.intel.com; Gage Eads
> 
> Subject: Re: [PATCH v8 04/20] dlb: add device ioctl layer and first three 
> ioctls
> 
> On Mon, Jan 04, 2021 at 08:58:23PM -0600, Mike Ximing Chen wrote:
> > Introduce the dlb device ioctl layer and the first three ioctls: query
> > device version, query available resources, and create a scheduling domain.
> > Also introduce the user-space interface file dlb_user.h.
> >
> > The device version query is designed to allow each DLB device version/type
> > to have its own unique ioctl API through the /dev/dlb%d node. Each such API
> > would share in common the device version command as its first command, and
> > all subsequent commands can be unique to the particular device.
> >
> > The hardware operation for scheduling domain creation will be added in a
> > subsequent commit.
> >
> > Signed-off-by: Gage Eads 
> > Signed-off-by: Mike Ximing Chen 
> > Reviewed-by: Magnus Karlsson 
> > Reviewed-by: Dan Williams 
> > ---
> >  .../userspace-api/ioctl/ioctl-number.rst  |   1 +
> >  drivers/misc/dlb/Makefile |   2 +-
> >  drivers/misc/dlb/dlb_bitmap.h |  32 
> >  drivers/misc/dlb/dlb_ioctl.c  | 119 +
> >  drivers/misc/dlb/dlb_ioctl.h  |  11 ++
> >  drivers/misc/dlb/dlb_main.c   |   3 +
> >  drivers/misc/dlb/dlb_main.h   |   7 +
> >  drivers/misc/dlb/dlb_pf_ops.c |  21 +++
> >  drivers/misc/dlb/dlb_resource.c   |  63 +++
> >  drivers/misc/dlb/dlb_resource.h   |   5 +
> >  include/uapi/linux/dlb.h  | 166 ++
> >  11 files changed, 429 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/misc/dlb/dlb_ioctl.c
> >  create mode 100644 drivers/misc/dlb/dlb_ioctl.h
> >  create mode 100644 include/uapi/linux/dlb.h
> >
> > diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst
> b/Documentation/userspace-api/ioctl/ioctl-number.rst
> > index 55a2d9b2ce33..afca043d59f8 100644
> > --- a/Documentation/userspace-api/ioctl/ioctl-number.rst
> > +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
> > @@ -241,6 +241,7 @@ Code  Seq#Include File
> Comments
> >  'h'   00-7F  
> > conflict! Charon filesystem
> >   
> > 
> >  'h'   00-1F  linux/hpet.h
> > conflict!
> > +'h'   00-1F  uapi/linux/dlb.h
> > conflict!
> >  'h'   80-8F  fs/hfsplus/ioctl.c
> >  'i'   00-3F  linux/i2o-dev.h 
> > conflict!
> >  'i'   0B-1F  linux/ipmi.h
> > conflict!
> > diff --git a/drivers/misc/dlb/Makefile b/drivers/misc/dlb/Makefile
> > index 8a49ea5fd752..aaafb3086d8d 100644
> > --- a/drivers/misc/dlb/Makefile
> > +++ b/drivers/misc/dlb/Makefile
> > @@ -7,4 +7,4 @@
> >  obj-$(CONFIG_INTEL_DLB) := dlb.o
> >
> >  dlb-objs := dlb_main.o
> > -dlb-objs += dlb_pf_ops.o dlb_resource.o
> > +dlb-objs += dlb_pf_ops.o dlb_resource.o dlb_ioctl.o
> > diff --git a/drivers/misc/dlb/dlb_bitmap.h b/drivers/misc/dlb/dlb_bitmap.h
> > index fb3ef52a306d..3ea78b42c79f 100644
> > --- a/drivers/misc/dlb/dlb_bitmap.h
> > +++ b/drivers/misc/dlb/dlb_bitmap.h
> > @@ -73,4 +73,36 @@ static inline void dlb_bitmap_free(struct dlb_bitmap
> *bitmap)
> > kfree(bitmap);
> >  }
> >
> > +/**
> > + * dlb_bitmap_longest_set_range() - returns longest contiguous range of set
> > + *  bits
> > + * @bitmap: pointer to dlb_bitmap structure.
> > + *
> > + * Return:
> > + * Returns the bitmap's longest contiguous range of set bits upon success,
> > + * <0 otherwise.
> > + *
> > + * Errors:
> > + * EINVAL - bitmap is NULL or is uninitialized.
> > + */
> > +static inline int dlb_bitmap_longest_set_range(struct dlb_bitmap *bitmap)
> > +{
> > +   int max_len, len;
> > +   int start, end;
> > +
> > +   if (!bitmap || !bitmap->map)
> > +   return -EINVAL;
> > +
> > +   if (bitmap_weight(bitmap->map, bitmap->len) == 0)
> > +   return 0;
> > +
> > +   max_len = 0;
> > +   bitmap_for_each_set_region(bitmap->map, start, end, 0, bitmap->len) {
> > +   len = end - start;
> > +   if (max_len < len)
> > +   max_len = len;
> > +   }
> > +   return max_len;
> > +}
> > +
> >  #endif /*  __DLB_OSDEP_BITMAP_H */
> > diff --git a/drivers/misc/dlb/dlb_ioctl.c b/drivers/misc/dlb/dlb_ioctl.c
> > new file mode 100644
> > index ..c072ed9b921c
> > --- /dev/null
> > +++ b/drivers/misc/dlb/dlb_ioctl.c
> > @@ -0,0 +1,119 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> 

[PATCH v1 6/7] perf cs-etm: Add helper cs_etm__get_pid_fmt()

2021-01-08 Thread Leo Yan
This patch adds helper function cs_etm__get_pid_fmt(), by passing
parameter "traceID", it returns the corresponding PID format.

Signed-off-by: Leo Yan 
---
 tools/perf/util/cs-etm.c | 18 ++
 tools/perf/util/cs-etm.h |  1 +
 2 files changed, 19 insertions(+)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 8c125134a756..6705d39c8cee 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -157,6 +157,24 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
return 0;
 }
 
+int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt)
+{
+   struct int_node *inode;
+   u64 *metadata;
+
+   inode = intlist__find(traceid_list, trace_chan_id);
+   if (!inode)
+   return -EINVAL;
+
+   metadata = inode->priv;
+   if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic)
+   *pid_fmt = metadata[CS_ETM_PID_FMT];
+   else
+   *pid_fmt = metadata[CS_ETMV4_PID_FMT];
+
+   return 0;
+}
+
 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
  u8 trace_chan_id)
 {
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 8cbbea6100a1..98801040175f 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -175,6 +175,7 @@ struct cs_etm_packet_queue {
 int cs_etm__process_auxtrace_info(union perf_event *event,
  struct perf_session *session);
 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
+int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt);
 int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
 pid_t tid, u8 trace_chan_id);
 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq);
-- 
2.25.1



[PATCH v1 5/7] perf cs-etm: Fixup PID_FMT when it is zero

2021-01-08 Thread Leo Yan
If the metadata item CS_ETM_PID_FMT/CS_ETMV4_PID_FMT is zero, this means
the perf data file is recorded with old version tool and the tool has
not extended to support the item.

For this case, this patch fixes up PID_FMT entry to set the value as
BIT(ETM_OPT_CTXTID), this info will be delivered to the decoder to
extract PID from packet's field "context_id".

Signed-off-by: Leo Yan 
---
 tools/perf/util/cs-etm.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 763085db29ae..8c125134a756 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -7,6 +7,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2577,6 +2578,15 @@ int cs_etm__process_auxtrace_info(union perf_event 
*event,
for (k = 0; k < metadata_cpu_array_size; k++)
metadata[j][k] = ptr[i + k];
 
+   /*
+* If the data in CS_ETM_PID_FMT is zero, means the
+* information isn't stored in the data file, this is
+* because the old perf tool hasn't yet supported
+* CS_ETM_PID_FMT.  Fixup the item to option "CTXTID".
+*/
+   if (!metadata[j][CS_ETM_PID_FMT])
+   metadata[j][CS_ETM_PID_FMT] = 
BIT(ETM_OPT_CTXTID);
+
/* The traceID is our handle */
idx = metadata[j][CS_ETM_ETMTRACEIDR];
i += metadata_cpu_array_size;
@@ -2590,6 +2600,15 @@ int cs_etm__process_auxtrace_info(union perf_event 
*event,
for (k = 0; k < metadata_cpu_array_size; k++)
metadata[j][k] = ptr[i + k];
 
+   /*
+* If the data in CS_ETMV4_PID_FMT is zero, means the
+* information isn't stored in the data file, this is
+* because the old perf tool hasn't yet supported
+* CS_ETMV4_PID_FMT.  Fixup the item to option "CTXTID".
+*/
+   if (!metadata[j][CS_ETMV4_PID_FMT])
+   metadata[j][CS_ETMV4_PID_FMT] = 
BIT(ETM_OPT_CTXTID);
+
/* The traceID is our handle */
idx = metadata[j][CS_ETMV4_TRCTRACEIDR];
i += metadata_cpu_array_size;
-- 
2.25.1



[PATCH v1 3/7] perf cs-etm: Calculate per CPU metadata array size

2021-01-08 Thread Leo Yan
The metadata array can be extended over time and the tool, if using the
predefined macro (like CS_ETMV4_PRIV_MAX for ETMv4) as metadata array
size to copy data, it can cause compatible issue within different
versions of perf tool.

E.g. we recorded a data file with an old version tool, afterwards if
use the new version perf tool to parse the file, since the metadata
array has been extended and the macro CS_ETMV4_PRIV_MAX has been
altered, if use it to parse the perf data with old format, this will
lead to mismatch.

To maintain backward compatibility, this patch calculates per CPU
metadata array size on the runtime, the calculation is based on the
info stored in the data file so that it's reliable.

Signed-off-by: Leo Yan 
---
 tools/perf/util/cs-etm.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index a2a369e2fbb6..5e284725dceb 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -2497,6 +2497,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
int i, j, k;
u64 *ptr, *hdr = NULL;
u64 **metadata = NULL;
+   int metadata_cpu_array_size;
 
/*
 * sizeof(auxtrace_info_event::type) +
@@ -2544,6 +2545,19 @@ int cs_etm__process_auxtrace_info(union perf_event 
*event,
goto err_free_traceid_list;
}
 
+   /*
+* The metadata is a two dimensional array, the first dimension uses CPU
+* number as index and the second dimension is the metadata array per
+* CPU.  Since the metadata array can be extended over time, the
+* predefined macros (CS_ETM_PRIV_MAX or CS_ETMV4_PRIV_MAX) might
+* mismatch within different versions of tool, this can lead to copy
+* wrong data.  To maintain backward compatibility, calculate CPU's
+* metadata array size on the runtime.
+*/
+   metadata_cpu_array_size =
+   (auxtrace_info->header.size -
+sizeof(struct perf_record_auxtrace_info)) / num_cpu / 
sizeof(u64);
+
/*
 * The metadata is stored in the auxtrace_info section and encodes
 * the configuration of the ARM embedded trace macrocell which is
@@ -2558,12 +2572,12 @@ int cs_etm__process_auxtrace_info(union perf_event 
*event,
err = -ENOMEM;
goto err_free_metadata;
}
-   for (k = 0; k < CS_ETM_PRIV_MAX; k++)
+   for (k = 0; k < metadata_cpu_array_size; k++)
metadata[j][k] = ptr[i + k];
 
/* The traceID is our handle */
idx = metadata[j][CS_ETM_ETMTRACEIDR];
-   i += CS_ETM_PRIV_MAX;
+   i += metadata_cpu_array_size;
} else if (ptr[i] == __perf_cs_etmv4_magic) {
metadata[j] = zalloc(sizeof(*metadata[j]) *
 CS_ETMV4_PRIV_MAX);
@@ -2571,12 +2585,12 @@ int cs_etm__process_auxtrace_info(union perf_event 
*event,
err = -ENOMEM;
goto err_free_metadata;
}
-   for (k = 0; k < CS_ETMV4_PRIV_MAX; k++)
+   for (k = 0; k < metadata_cpu_array_size; k++)
metadata[j][k] = ptr[i + k];
 
/* The traceID is our handle */
idx = metadata[j][CS_ETMV4_TRCTRACEIDR];
-   i += CS_ETMV4_PRIV_MAX;
+   i += metadata_cpu_array_size;
}
 
/* Get an RB node for this CPU */
-- 
2.25.1



[PATCH v1 4/7] perf cs-etm: Add PID format into metadata

2021-01-08 Thread Leo Yan
It's possible for CoreSight to trace PID in either CONTEXTIDR_EL1 or
CONTEXTIDR_EL2, the PID format info is used to distinguish the PID
is traced in which register.

This patch saves PID format into the metadata when record.

Signed-off-by: Leo Yan 
---
 tools/perf/arch/arm/util/cs-etm.c | 21 +
 tools/perf/util/cs-etm.c  |  2 ++
 tools/perf/util/cs-etm.h  |  2 ++
 3 files changed, 25 insertions(+)

diff --git a/tools/perf/arch/arm/util/cs-etm.c 
b/tools/perf/arch/arm/util/cs-etm.c
index fad7b6e13ccc..ee78df3b1b07 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -613,6 +613,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset,
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+   u64 pid_fmt;
 
/* first see what kind of tracer this cpu is affined to */
if (cs_etm_is_etmv4(itr, cpu)) {
@@ -641,6 +642,16 @@ static void cs_etm_get_metadata(int cpu, u32 *offset,
  metadata_etmv4_ro
  [CS_ETMV4_TRCAUTHSTATUS]);
 
+   /*
+* The PID format will be used when decode the trace data;
+* based on it the decoder will make decision for setting
+* sample's PID as context_id or VMID.
+*/
+   pid_fmt = perf_pmu__format_bits(_etm_pmu->format, "pid");
+   if (!pid_fmt)
+   pid_fmt = 1ULL << ETM_OPT_CTXTID;
+   info->priv[*offset + CS_ETMV4_PID_FMT] = pid_fmt;
+
/* How much space was used */
increment = CS_ETMV4_PRIV_MAX;
} else {
@@ -658,6 +669,16 @@ static void cs_etm_get_metadata(int cpu, u32 *offset,
cs_etm_get_ro(cs_etm_pmu, cpu,
  metadata_etmv3_ro[CS_ETM_ETMIDR]);
 
+   /*
+* The PID format will be used when decode the trace data;
+* based on it the decoder will make decision for setting
+* sample's PID as context_id or VMID.
+*/
+   pid_fmt = perf_pmu__format_bits(_etm_pmu->format, "pid");
+   if (!pid_fmt)
+   pid_fmt = 1ULL << ETM_OPT_CTXTID;
+   info->priv[*offset + CS_ETM_PID_FMT] = pid_fmt;
+
/* How much space was used */
increment = CS_ETM_PRIV_MAX;
}
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 5e284725dceb..763085db29ae 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -2447,6 +2447,7 @@ static const char * const cs_etm_priv_fmts[] = {
[CS_ETM_ETMTRACEIDR]= " ETMTRACEIDR%llx\n",
[CS_ETM_ETMCCER]= " ETMCCER%llx\n",
[CS_ETM_ETMIDR] = " ETMIDR %llx\n",
+   [CS_ETM_PID_FMT]= " PID Format %llx\n",
 };
 
 static const char * const cs_etmv4_priv_fmts[] = {
@@ -2459,6 +2460,7 @@ static const char * const cs_etmv4_priv_fmts[] = {
[CS_ETMV4_TRCIDR2]  = " TRCIDR2%llx\n",
[CS_ETMV4_TRCIDR8]  = " TRCIDR8%llx\n",
[CS_ETMV4_TRCAUTHSTATUS] = "TRCAUTHSTATUS  %llx\n",
+   [CS_ETMV4_PID_FMT]  = " PID Format %llx\n",
 };
 
 static void cs_etm__print_auxtrace_info(__u64 *val, int num)
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 4ad925d6d799..8cbbea6100a1 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -38,6 +38,7 @@ enum {
/* RO, taken from sysFS */
CS_ETM_ETMCCER,
CS_ETM_ETMIDR,
+   CS_ETM_PID_FMT,
CS_ETM_PRIV_MAX,
 };
 
@@ -52,6 +53,7 @@ enum {
CS_ETMV4_TRCIDR2,
CS_ETMV4_TRCIDR8,
CS_ETMV4_TRCAUTHSTATUS,
+   CS_ETMV4_PID_FMT,
CS_ETMV4_PRIV_MAX,
 };
 
-- 
2.25.1



[PATCH v1 7/7] perf cs-etm: Detect pid in VMID for kernel running at EL2

2021-01-08 Thread Leo Yan
From: Suzuki K Poulose 

The pid of the task could be traced as VMID when the kernel is
running at EL2. Teach the decoder to look for vmid when the
context_id is invalid but we have a valid VMID.

Cc: Mike Leach 
Cc: Mathieu Poirier 
Cc: Al Grant 
Co-developed-by: Leo Yan 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Leo Yan 
---
 .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 32 ---
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c 
b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index cd007cc9c283..9e81169dfa76 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -6,6 +6,7 @@
  * Author: Mathieu Poirier 
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -500,13 +501,36 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
const ocsd_generic_trace_elem *elem,
const uint8_t trace_chan_id)
 {
-   pid_t tid;
+   pid_t tid = -1;
+   u64 pid_fmt;
+   int ret;
 
-   /* Ignore PE_CONTEXT packets that don't have a valid contextID */
-   if (!elem->context.ctxt_id_valid)
+   ret = cs_etm__get_pid_fmt(trace_chan_id, _fmt);
+   if (ret)
+   return OCSD_RESP_FATAL_SYS_ERR;
+
+   /*
+* Process the PE_CONTEXT packets if we have a valid
+* contextID or VMID.
+* If the kernel is running at EL2, the PID is traced
+* in contextidr_el2 as VMID.
+*/
+   switch (pid_fmt) {
+   case BIT(ETM_OPT_CTXTID):
+   if (elem->context.ctxt_id_valid)
+   tid = elem->context.context_id;
+   break;
+   case BIT(ETM_OPT_CTXTID_IN_VMID):
+   if (elem->context.vmid_valid)
+   tid = elem->context.vmid;
+   break;
+   default:
+   break;
+   }
+
+   if (tid == -1)
return OCSD_RESP_CONT;
 
-   tid =  elem->context.context_id;
if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id))
return OCSD_RESP_FATAL_SYS_ERR;
 
-- 
2.25.1



[PATCH v1 2/7] perf cs_etm: Use pid tracing explicitly instead of contextid

2021-01-08 Thread Leo Yan
From: Suzuki K Poulose 

If the kernel is running at EL2, the pid of the task is exposed
via VMID instead of the CONTEXTID. Add support for this in the
perf tool.

By default the perf tool requests contextid and timestamp for
task bound events. Instead of hard coding contextid, switch
to "pid" config exposed by the kernel. While at it, define new
independent macros (rather than using the "config" bits) for
requesting the "pid" and "timestamp" for cs_etm_set_option(),
since the PID config is now dynamic depending on the kernel
exception level.

Cc: Mike Leach 
Cc: Mathieu Poirier 
Cc: Al Grant 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Leo Yan 
---
 tools/include/linux/coresight-pmu.h | 11 +++--
 tools/perf/arch/arm/util/cs-etm.c   | 68 ++---
 2 files changed, 59 insertions(+), 20 deletions(-)

diff --git a/tools/include/linux/coresight-pmu.h 
b/tools/include/linux/coresight-pmu.h
index b0e35eec6499..927c6285ce5d 100644
--- a/tools/include/linux/coresight-pmu.h
+++ b/tools/include/linux/coresight-pmu.h
@@ -11,16 +11,19 @@
 #define CORESIGHT_ETM_PMU_SEED  0x10
 
 /* ETMv3.5/PTM's ETMCR config bit */
-#define ETM_OPT_CYCACC  12
-#define ETM_OPT_CTXTID 14
-#define ETM_OPT_TS  28
-#define ETM_OPT_RETSTK 29
+#define ETM_OPT_CYCACC 12
+#define ETM_OPT_CTXTID 14
+#define ETM_OPT_CTXTID_IN_VMID 15
+#define ETM_OPT_TS 28
+#define ETM_OPT_RETSTK 29
 
 /* ETMv4 CONFIGR programming bits for the ETM OPTs */
 #define ETM4_CFG_BIT_CYCACC4
 #define ETM4_CFG_BIT_CTXTID6
+#define ETM4_CFG_BIT_VMID  7
 #define ETM4_CFG_BIT_TS11
 #define ETM4_CFG_BIT_RETSTK12
+#define ETM4_CFG_BIT_VMID_OPT  15
 
 static inline int coresight_get_trace_id(int cpu)
 {
diff --git a/tools/perf/arch/arm/util/cs-etm.c 
b/tools/perf/arch/arm/util/cs-etm.c
index cad7bf783413..fad7b6e13ccc 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -59,14 +59,15 @@ static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = {
 
 static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu);
 
-static int cs_etm_set_context_id(struct auxtrace_record *itr,
-struct evsel *evsel, int cpu)
+static int cs_etm_set_pid(struct auxtrace_record *itr,
+ struct evsel *evsel, int cpu)
 {
struct cs_etm_recording *ptr;
struct perf_pmu *cs_etm_pmu;
char path[PATH_MAX];
int err = -EINVAL;
u32 val;
+   u64 pid_fmt;
 
ptr = container_of(itr, struct cs_etm_recording, itr);
cs_etm_pmu = ptr->cs_etm_pmu;
@@ -86,21 +87,50 @@ static int cs_etm_set_context_id(struct auxtrace_record 
*itr,
goto out;
}
 
+   pid_fmt = perf_pmu__format_bits(_etm_pmu->format, "pid");
/*
-* TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing
-* is supported:
-*  0b0 Context ID tracing is not supported.
-*  0b00100 Maximum of 32-bit Context ID size.
-*  All other values are reserved.
+* If the kernel doesn't support the "pid" format (older kernel),
+* fall back to using the CTXTID.
 */
-   val = BMVAL(val, 5, 9);
-   if (!val || val != 0x4) {
+   if (!pid_fmt)
+   pid_fmt = 1ULL << ETM_OPT_CTXTID;
+
+   switch (pid_fmt) {
+   case (1ULL << ETM_OPT_CTXTID):
+   /*
+* TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID
+* tracing is supported:
+*  0b0 Context ID tracing is not supported.
+*  0b00100 Maximum of 32-bit Context ID size.
+*  All other values are reserved.
+*/
+   val = BMVAL(val, 5, 9);
+   if (!val || val != 0x4) {
+   err = -EINVAL;
+   goto out;
+   }
+   break;
+   case (1ULL << ETM_OPT_CTXTID_IN_VMID):
+   /*
+* TRCIDR2.VMIDOPT[30:29] != 0 and
+* TRCIDR2.VMIDSIZE[14:10] == 0b00100 (32bit virtual contextid)
+* We can't support CONTEXTIDR in VMID if the size of the
+* virtual context id is < 32bit.
+* Any value of VMIDSIZE >= 4 (i.e, > 32bit) is fine for us.
+*/
+   if (!BMVAL(val, 29, 30) || BMVAL(val, 10, 14) < 4) {
+   err = -EINVAL;
+   goto out;
+   }
+   break;
+   default:
err = -EINVAL;
goto out;
}
 
+
/* All good, let the kernel know */
-   evsel->core.attr.config |= (1 << ETM_OPT_CTXTID);
+   evsel->core.attr.config |= pid_fmt;
err = 0;
 
 out:
@@ -156,6 +186,10 @@ static int cs_etm_set_timestamp(struct auxtrace_record 
*itr,
return err;
 }
 
+#define ETM_SET_OPT_PID(1 << 0)
+#define ETM_SET_OPT_TS (1 << 

[PATCH v1 0/7] coresight: etm-perf: Fix pid tracing with VHE

2021-01-08 Thread Leo Yan
This patch series is a following up for the previous version which was
delivered by Suzuki [1].  Below gives the background info for why we
need this patch series, directly quotes the description in the cover
letter of the previous version:

"With the Virtualization Host Extensions, the kernel can run at EL2.
In this case the pid is written to CONTEXTIDR_EL2 instead of the
CONTEXTIDR_EL1. Thus the normal coresight tracing will be unable
to detect the PID of the thread generating the trace by looking
at the CONTEXTIDR_EL1. Thus, depending on the kernel EL, we must
switch to tracing the correct CONTEXTIDR register.

With VHE, we must set the TRCCONFIGR.VMID and TRCCONFIGR.VMID_OPT
to include the CONTEXTIDR_EL2 as the VMID in the trace. This
requires the perf tool to detect the changes in the TRCCONFIGR and
use the VMID / CID field for the PID. The challenge here is for
the perf tool to detect the kernel behavior.

Instead of the previously proposed invasive approaches, this set
implements a less intrusive mechanism, by playing with the
perf_event.attribute.config bits."

Same as the previous series, this series keeps the same implementation
for two introduced format bits:

- contextid_in_vmid -> Is only supported when the VMID tracing
  and CONTEXTIDR_EL2 both are supported. When requested the perf
  etm4x backend sets (TRCCONFIGR.VMID | TRCCONFIGR.VMID_OPT).
  As per ETMv4.4 TRM, when the core supports VHE, the CONTEXTIDR_EL2
  tracing is mandatory. (See the field TRCID2.VMIDOPT)

- pid -> Is an alias for the correct config to enable PID tracing
  on any kernel.
  i.e, in EL1 kernel -> pid == contextid
  EL2 kernel -> pid == contextid_in_vmid

With this, the perf tool is also updated to request the "pid"
tracing whenever available, falling back to "contextid" if it
is unavailable.

Comparing against the old version, this patch series uses the metadata
to save PID format; after add new item into metadata, it introduces
backward compatibility issue.  To allow backward compatibility, this
series calculates per CPU metadata array size and avoid to use the
defined macro, so can always know the correct array size based on the
info stored in perf data file.  Finally, the PID format stored in
metadata is passed to decoder and guide the decoder to set PID from
CONTEXTIDR_EL1 or VMID.

This patch series has been tested on Arm Juno-r2 board, with testing
two perf data files: one data file is recorded by the latest perf tool
after applied this patch series, and another data file is recorded by
old perf tool without this patch series, so this can prove the tool is
backward compatible.

Changes from RFC:
* Added comments to clarify cases requested (Leo);
* Explain the change to generic flags for cs_etm_set_option() in the
  commit description;
* Stored PID format in metadata and passed it to decoder (Leo);
* Enhanced cs-etm for backward compatibility (Denis Nikitin).

[1] 
https://archive.armlinux.org.uk/lurker/message/20201110.183310.24406f33.en.html


Leo Yan (4):
  perf cs-etm: Calculate per CPU metadata array size
  perf cs-etm: Add PID format into metadata
  perf cs-etm: Fixup PID_FMT when it is zero
  perf cs-etm: Add helper cs_etm__get_pid_fmt()

Suzuki K Poulose (3):
  coresight: etm-perf: Add support for PID tracing for kernel at EL2
  perf cs_etm: Use pid tracing explicitly instead of contextid
  perf cs-etm: Detect pid in VMID for kernel running at EL2

 .../hwtracing/coresight/coresight-etm-perf.c  | 14 +++
 .../coresight/coresight-etm4x-core.c  |  9 ++
 include/linux/coresight-pmu.h | 11 ++-
 tools/include/linux/coresight-pmu.h   | 11 ++-
 tools/perf/arch/arm/util/cs-etm.c | 89 +++
 .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 32 ++-
 tools/perf/util/cs-etm.c  | 61 -
 tools/perf/util/cs-etm.h  |  3 +
 8 files changed, 198 insertions(+), 32 deletions(-)

-- 
2.25.1



[PATCH v1 1/7] coresight: etm-perf: Add support for PID tracing for kernel at EL2

2021-01-08 Thread Leo Yan
From: Suzuki K Poulose 

When the kernel is running at EL2, the PID is stored in CONTEXTIDR_EL2.
So, tracing CONTEXTIDR_EL1 doesn't give us the pid of the process.
Thus we should trace the VMID with VMIDOPT set to trace
CONTEXTIDR_EL2 instead of CONTEXTIDR_EL1. Given that we have an existing
config option "contextid" and this will be useful for tracing
virtual machines (when we get to support virtualization). So instead,
this patch adds a new option, contextid_in_vmid as a separate config.
Thus on an EL2 kernel, we will have two options available for
the perf tool. However, to make it easier for the user to
do pid tracing, we add a new format which will default to
"contextid" (on EL1 kernel) or "contextid_in_vmid" (on EL2
kernel). So that the user doesn't have to bother which EL the
kernel is running.

 i.e, perf record -e cs_etm/pid/u --

will always do the "pid" tracing, independent of the kernel EL.

Also, the perf tool will be updated to automatically select
"pid" config instead of the "contextid" for system wide/CPU wide
mode.

Cc: Mathieu Poirier 
Cc: Al Grant 
Cc: Mike Leach 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Leo Yan 
---
 drivers/hwtracing/coresight/coresight-etm-perf.c   | 14 ++
 drivers/hwtracing/coresight/coresight-etm4x-core.c |  9 +
 include/linux/coresight-pmu.h  | 11 +++
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c 
b/drivers/hwtracing/coresight/coresight-etm-perf.c
index bdc34ca449f7..f763def145e4 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -30,14 +30,28 @@ static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
 /* ETMv3.5/PTM's ETMCR is 'config' */
 PMU_FORMAT_ATTR(cycacc,"config:" __stringify(ETM_OPT_CYCACC));
 PMU_FORMAT_ATTR(contextid, "config:" __stringify(ETM_OPT_CTXTID));
+PMU_FORMAT_ATTR(contextid_in_vmid, "config:" 
__stringify(ETM_OPT_CTXTID_IN_VMID));
 PMU_FORMAT_ATTR(timestamp, "config:" __stringify(ETM_OPT_TS));
 PMU_FORMAT_ATTR(retstack,  "config:" __stringify(ETM_OPT_RETSTK));
 /* Sink ID - same for all ETMs */
 PMU_FORMAT_ATTR(sinkid,"config2:0-31");
 
+static ssize_t format_attr_pid_show(struct device *dev,
+   struct device_attribute *attr,
+   char *page)
+{
+   int pid_fmt = is_kernel_in_hyp_mode() ? ETM_OPT_CTXTID_IN_VMID : 
ETM_OPT_CTXTID;
+
+   return sprintf(page, "config:%d\n", pid_fmt);
+}
+
+struct device_attribute format_attr_pid = __ATTR(pid, 0444, 
format_attr_pid_show, NULL);
+
 static struct attribute *etm_config_formats_attr[] = {
_attr_cycacc.attr,
_attr_contextid.attr,
+   _attr_contextid_in_vmid.attr,
+   _attr_pid.attr,
_attr_timestamp.attr,
_attr_retstack.attr,
_attr_sinkid.attr,
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index b20b6ff17cf6..8b7c7a8b2874 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -477,6 +477,15 @@ static int etm4_parse_event_config(struct etmv4_drvdata 
*drvdata,
/* bit[6], Context ID tracing bit */
config->cfg |= BIT(ETM4_CFG_BIT_CTXTID);
 
+   /* Do not enable VMID tracing if we are not running in EL2 */
+   if (attr->config & BIT(ETM_OPT_CTXTID_IN_VMID)) {
+   if (!is_kernel_in_hyp_mode()) {
+   ret = -EINVAL;
+   goto out;
+   }
+   config->cfg |= BIT(ETM4_CFG_BIT_VMID) | 
BIT(ETM4_CFG_BIT_VMID_OPT);
+   }
+
/* return stack - enable if selected and supported */
if ((attr->config & BIT(ETM_OPT_RETSTK)) && drvdata->retstack)
/* bit[12], Return stack enable bit */
diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h
index b0e35eec6499..927c6285ce5d 100644
--- a/include/linux/coresight-pmu.h
+++ b/include/linux/coresight-pmu.h
@@ -11,16 +11,19 @@
 #define CORESIGHT_ETM_PMU_SEED  0x10
 
 /* ETMv3.5/PTM's ETMCR config bit */
-#define ETM_OPT_CYCACC  12
-#define ETM_OPT_CTXTID 14
-#define ETM_OPT_TS  28
-#define ETM_OPT_RETSTK 29
+#define ETM_OPT_CYCACC 12
+#define ETM_OPT_CTXTID 14
+#define ETM_OPT_CTXTID_IN_VMID 15
+#define ETM_OPT_TS 28
+#define ETM_OPT_RETSTK 29
 
 /* ETMv4 CONFIGR programming bits for the ETM OPTs */
 #define ETM4_CFG_BIT_CYCACC4
 #define ETM4_CFG_BIT_CTXTID6
+#define ETM4_CFG_BIT_VMID  7
 #define ETM4_CFG_BIT_TS11
 #define ETM4_CFG_BIT_RETSTK12
+#define ETM4_CFG_BIT_VMID_OPT  15
 
 static inline int coresight_get_trace_id(int cpu)
 {
-- 
2.25.1



Re: [PATCH 0/3] arm64: kasan: support CONFIG_KASAN_VMALLOC

2021-01-08 Thread Lecopzer Chen
Hi Andrey,
>  
> On Sun, Jan 3, 2021 at 6:12 PM Lecopzer Chen  wrote:
> >
> > Linux supports KAsan for VMALLOC since commit 3c5c3cfb9ef4da9
> > ("kasan: support backing vmalloc space with real shadow memory")
> >
> > Acroding to how x86 ported it [1], they early allocated p4d and pgd,
> > but in arm64 I just simulate how KAsan supports MODULES_VADDR in arm64
> > by not to populate the vmalloc area except for kimg address.
> >
> > Test environment:
> > 4G and 8G Qemu virt,
> > 39-bit VA + 4k PAGE_SIZE with 3-level page table,
> > test by lib/test_kasan.ko and lib/test_kasan_module.ko
> >
> > It also works in Kaslr with CONFIG_RANDOMIZE_MODULE_REGION_FULL,
> > but not test for HW_TAG(I have no proper device), thus keep
> > HW_TAG and KASAN_VMALLOC mutual exclusion until confirming
> > the functionality.
> 
> Re this: it makes sense to introduce vmalloc support one step a time
> and add SW_TAGS support before taking on HW_TAGS. SW_TAGS doesn't
> require any special hardware. Working on SW_TAGS first will also allow
> dealing with potential conflicts between vmalloc and tags without
> having MTE in the picture as well. Just FYI, no need to include that
> in this change.

Thanks for the information and suggestion, so this serise I'll keep 
only for KASAN_GENERIC support :)



BRs,
Lecopzer



Re: [PATCH 3/3] arm64: Kconfig: support CONFIG_KASAN_VMALLOC

2021-01-08 Thread Lecopzer Chen
Hi Andrey,
 
> On Sun, Jan 3, 2021 at 6:13 PM Lecopzer Chen  wrote:
> >
> > Now I have no device to test for HW_TAG, so keep it not selected
> > until someone can test this.
> >
> > Signed-off-by: Lecopzer Chen 
> > ---
> >  arch/arm64/Kconfig | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index 05e17351e4f3..29ab35aab59e 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -136,6 +136,7 @@ config ARM64
> > select HAVE_ARCH_JUMP_LABEL
> > select HAVE_ARCH_JUMP_LABEL_RELATIVE
> > select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
> > +   select HAVE_ARCH_KASAN_VMALLOC if (HAVE_ARCH_KASAN && 
> > !KASAN_HW_TAGS)
> 
> KASAN_VMALLOC currently "depends on" KASAN_GENERIC. I think we should
> either do "HAVE_ARCH_KASAN && KASAN_GENERIC" here as well, or just do
> "if HAVE_ARCH_KASAN".

Thanks for the correctness, I'll change to the following in V2 patch.
"select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN"

Let KASAN_VMALLOC depend on the mode it supports to avoid modifying
two places if KASAN_VMALLOC can support other than GENERIC in the future.


Re: Re: [PATCH] media: v4l2: Fix memleak in videobuf_read_one

2021-01-08 Thread dinghao . liu
> On 05/01/2021 08:59, Dinghao Liu wrote:
> > When videobuf_waiton() fails, we should execute clean
> > functions to prevent memleak. It's the same when
> > __videobuf_copy_to_user() fails.
> > 
> > Fixes: 7a7d9a89d0307 ("V4L/DVB (6251): Replace video-buf to a more generic 
> > approach")
> > Signed-off-by: Dinghao Liu 
> > ---
> >  drivers/media/v4l2-core/videobuf-core.c | 12 ++--
> >  1 file changed, 10 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/media/v4l2-core/videobuf-core.c 
> > b/drivers/media/v4l2-core/videobuf-core.c
> > index 606a271bdd2d..0709b75d11cd 100644
> > --- a/drivers/media/v4l2-core/videobuf-core.c
> > +++ b/drivers/media/v4l2-core/videobuf-core.c
> > @@ -924,8 +924,12 @@ ssize_t videobuf_read_one(struct videobuf_queue *q,
> >  
> > /* wait until capture is done */
> > retval = videobuf_waiton(q, q->read_buf, nonblocking, 1);
> > -   if (0 != retval)
> > +   if (retval != 0) {
> > +   q->ops->buf_release(q, q->read_buf);
> > +   kfree(q->read_buf);
> > +   q->read_buf = NULL;
> > goto done;
> > +   }
> 
> I'm fairly certain that this is wrong: if waiton returns an error, then
> that means that the wait is either interrupted or that we are in non-blocking
> mode and no buffer has arrived yet. In that case you just go to done since
> there is nothing to clean up.
> 

I found there was a similar error handling in videobuf_read_zerocopy(), where
q->read_buf was freed on failure of videobuf_waiton(), thus I reported this as
a memleak. Do you think the error handling in videobuf_read_zerocopy() is right?

> >  
> > CALL(q, sync, q, q->read_buf);
> >  
> > @@ -940,8 +944,12 @@ ssize_t videobuf_read_one(struct videobuf_queue *q,
> >  
> > /* Copy to userspace */
> > retval = __videobuf_copy_to_user(q, q->read_buf, data, count, 
> > nonblocking);
> > -   if (retval < 0)
> > +   if (retval < 0) {
> > +   q->ops->buf_release(q, q->read_buf);
> > +   kfree(q->read_buf);
> > +   q->read_buf = NULL;
> > goto done;
> 
> I'm not sure about this either: if userspace gave a crappy pointer and this
> copy_to_user fails, then that doesn't mean you should release the buffer.
> The next read() might have a valid pointer or, more likely, the application
> exits or crashes and everything is cleaned up when the filehandle is closed.
> 

You are right. Let's keep this part as it was for security.

Regards,
Dinghao


[PATCH] venus: pm_helpers: Control core power domain manually

2021-01-08 Thread Stanimir Varbanov
Presently we use device_link to control core power domain. But this
leads to issues because the genpd doesn't guarantee synchronous on/off
for supplier devices. Switch to manually control by pmruntime calls.

Signed-off-by: Stanimir Varbanov 
---
 drivers/media/platform/qcom/venus/core.h  |  1 -
 .../media/platform/qcom/venus/pm_helpers.c| 36 ++-
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/core.h 
b/drivers/media/platform/qcom/venus/core.h
index dfc13b2f371f..74d9fd3d51cc 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -128,7 +128,6 @@ struct venus_core {
struct icc_path *cpucfg_path;
struct opp_table *opp_table;
bool has_opp_table;
-   struct device_link *pd_dl_venus;
struct device *pmdomains[VIDC_PMDOMAINS_NUM_MAX];
struct device_link *opp_dl_venus;
struct device *opp_pmdomain;
diff --git a/drivers/media/platform/qcom/venus/pm_helpers.c 
b/drivers/media/platform/qcom/venus/pm_helpers.c
index 94219a3093cb..e0338932a720 100644
--- a/drivers/media/platform/qcom/venus/pm_helpers.c
+++ b/drivers/media/platform/qcom/venus/pm_helpers.c
@@ -774,13 +774,6 @@ static int vcodec_domains_get(struct device *dev)
core->pmdomains[i] = pd;
}
 
-   core->pd_dl_venus = device_link_add(dev, core->pmdomains[0],
-   DL_FLAG_PM_RUNTIME |
-   DL_FLAG_STATELESS |
-   DL_FLAG_RPM_ACTIVE);
-   if (!core->pd_dl_venus)
-   return -ENODEV;
-
 skip_pmdomains:
if (!core->has_opp_table)
return 0;
@@ -807,14 +800,12 @@ static int vcodec_domains_get(struct device *dev)
 opp_dl_add_err:
dev_pm_opp_detach_genpd(core->opp_table);
 opp_attach_err:
-   if (core->pd_dl_venus) {
-   device_link_del(core->pd_dl_venus);
-   for (i = 0; i < res->vcodec_pmdomains_num; i++) {
-   if (IS_ERR_OR_NULL(core->pmdomains[i]))
-   continue;
-   dev_pm_domain_detach(core->pmdomains[i], true);
-   }
+   for (i = 0; i < res->vcodec_pmdomains_num; i++) {
+   if (IS_ERR_OR_NULL(core->pmdomains[i]))
+   continue;
+   dev_pm_domain_detach(core->pmdomains[i], true);
}
+
return ret;
 }
 
@@ -827,9 +818,6 @@ static void vcodec_domains_put(struct device *dev)
if (!res->vcodec_pmdomains_num)
goto skip_pmdomains;
 
-   if (core->pd_dl_venus)
-   device_link_del(core->pd_dl_venus);
-
for (i = 0; i < res->vcodec_pmdomains_num; i++) {
if (IS_ERR_OR_NULL(core->pmdomains[i]))
continue;
@@ -917,16 +905,30 @@ static void core_put_v4(struct device *dev)
 static int core_power_v4(struct device *dev, int on)
 {
struct venus_core *core = dev_get_drvdata(dev);
+   struct device *pmctrl = core->pmdomains[0];
int ret = 0;
 
if (on == POWER_ON) {
+   if (pmctrl) {
+   ret = pm_runtime_get_sync(pmctrl);
+   if (ret < 0) {
+   pm_runtime_put_noidle(pmctrl);
+   return ret;
+   }
+   }
+
ret = core_clks_enable(core);
+   if (ret < 0 && pmctrl)
+   pm_runtime_put_sync(pmctrl);
} else {
/* Drop the performance state vote */
if (core->opp_pmdomain)
dev_pm_opp_set_rate(dev, 0);
 
core_clks_disable(core);
+
+   if (pmctrl)
+   pm_runtime_put_sync(pmctrl);
}
 
return ret;
-- 
2.17.1



Re: BUG: unable to handle kernel NULL pointer dereference in __lookup_slow

2021-01-08 Thread syzbot
syzbot suspects this issue was fixed by commit:

commit d24396c5290ba8ab04ba505176874c4e04a2d53c
Author: Rustam Kovhaev 
Date:   Sun Nov 1 14:09:58 2020 +

reiserfs: add check for an invalid ih_entry_count

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=111480e750
start commit:   a68a0262 mm/madvise: remove racy mm ownership check
git tree:   upstream
kernel config:  https://syzkaller.appspot.com/x/.config?x=e597c2b53c984cd8
dashboard link: https://syzkaller.appspot.com/bug?extid=3db80bbf66b88d68af9d
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=1737b8a750
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=1697246b50

If the result looks correct, please mark the issue as fixed by replying with:

#syz fix: reiserfs: add check for an invalid ih_entry_count

For information about bisection process see: https://goo.gl/tpsmEJ#bisection


[PATCH] maintainers: update my email address

2021-01-08 Thread Darrick J. Wong
From: Darrick J. Wong 

Change my email contact ahead of a likely painful eleven-month migration
to a certain cobalt enteprisey groupware cloud product that will totally
break my workflow.  Some day I may get used to having to email being
sequestered behind both claret and cerulean oath2+sms 2fa layers, but
for now I'll stick with keying in one password to receive an email vs.
the required four.

Signed-off-by: Darrick J. Wong 
---
 MAINTAINERS |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6eff4f720c72..9ed3ec2aa1aa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9273,7 +9273,7 @@ F:drivers/net/ethernet/sgi/ioc3-eth.c
 
 IOMAP FILESYSTEM LIBRARY
 M: Christoph Hellwig 
-M: Darrick J. Wong 
+M: Darrick J. Wong 
 M: linux-...@vger.kernel.org
 M: linux-fsde...@vger.kernel.org
 L: linux-...@vger.kernel.org
@@ -19505,7 +19505,7 @@ F:  arch/x86/xen/*swiotlb*
 F: drivers/xen/*swiotlb*
 
 XFS FILESYSTEM
-M: Darrick J. Wong 
+M: Darrick J. Wong 
 M: linux-...@vger.kernel.org
 L: linux-...@vger.kernel.org
 S: Supported


RE: [PATCH v8 04/20] dlb: add device ioctl layer and first three ioctls

2021-01-08 Thread Chen, Mike Ximing
> -Original Message-
> From: Greg KH 
> Sent: Thursday, January 7, 2021 2:41 PM
> To: Chen, Mike Ximing 
> Cc: linux-kernel@vger.kernel.org; a...@arndb.de; Williams, Dan J
> ; pierre-louis.boss...@linux.intel.com; Gage Eads
> 
> Subject: Re: [PATCH v8 04/20] dlb: add device ioctl layer and first three 
> ioctls
> 
> > diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst
> b/Documentation/userspace-api/ioctl/ioctl-number.rst
> > index 55a2d9b2ce33..afca043d59f8 100644
> > --- a/Documentation/userspace-api/ioctl/ioctl-number.rst
> > +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
> > @@ -241,6 +241,7 @@ Code  Seq#Include File
> Comments
> >  'h'   00-7F  
> > conflict! Charon filesystem
> >   
> > 
> >  'h'   00-1F  linux/hpet.h
> > conflict!
> > +'h'   00-1F  uapi/linux/dlb.h
> > conflict!
> 
> Why are you taking a range that you know there is a conflict for?

OK. We will switch to a unused magic number and range, probably 0x81 00-1F.
Thanks


RE: [PATCH v8 01/20] dlb: add skeleton for DLB driver

2021-01-08 Thread Chen, Mike Ximing



> -Original Message-
> From: Greg KH 
> Sent: Thursday, January 7, 2021 2:36 PM
> To: Chen, Mike Ximing 
> Cc: linux-kernel@vger.kernel.org; a...@arndb.de; Williams, Dan J
> ; pierre-louis.boss...@linux.intel.com; Gage Eads
> 
> Subject: Re: [PATCH v8 01/20] dlb: add skeleton for DLB driver
> 
> On Mon, Jan 04, 2021 at 08:58:20PM -0600, Mike Ximing Chen wrote:
> > +static int dlb_probe(struct pci_dev *pdev,
> > +const struct pci_device_id *pdev_id)
> > +{
> > +   struct dlb *dlb;
> > +   int ret;
> > +
> > +   dlb = devm_kzalloc(>dev, sizeof(*dlb), GFP_KERNEL);
> > +   if (!dlb)
> > +   return -ENOMEM;
> > +
> > +   pci_set_drvdata(pdev, dlb);
> > +
> > +   dlb->pdev = pdev;
> > +
> > +   spin_lock(_ids_lock);
> > +   dlb->id = idr_alloc(_ids,
> > +   (void *)dlb,
> > +   0,
> > +   DLB_MAX_NUM_DEVICES - 1,
> > +   GFP_KERNEL);
> > +   spin_unlock(_ids_lock);
> > +
> > +   if (dlb->id < 0) {
> > +   dev_err(>dev, "probe: device ID allocation failed\n");
> > +
> > +   ret = dlb->id;
> > +   goto alloc_id_fail;
> > +   }
> > +
> > +   ret = pcim_enable_device(pdev);
> > +   if (ret != 0) {
> > +   dev_err(>dev, "pcim_enable_device() returned %d\n", ret);
> > +
> > +   goto pci_enable_device_fail;
> > +   }
> > +
> > +   ret = pcim_iomap_regions(pdev,
> > +(1U << DLB_CSR_BAR) | (1U << DLB_FUNC_BAR),
> > +"dlb");
> > +   if (ret != 0) {
> > +   dev_err(>dev,
> > +   "pcim_iomap_regions(): returned %d\n", ret);
> > +
> > +   goto pci_enable_device_fail;
> > +   }
> > +
> > +   pci_set_master(pdev);
> > +
> > +   if (pci_enable_pcie_error_reporting(pdev))
> > +   dev_info(>dev, "[%s()] Failed to enable AER\n", __func__);
> 
> Shouldn't that be dev_err() and you fail here?
> 
Some of our earlier devices/platforms do not support AER.  
pci_enable_pcie_error_reporting() fails, 
everything else works fine. Will change to dev_err() when the old HWs are 
phased out.

> And no need for __func__ please, the driver name and device is listed,
> that's all that is necessary.

Will remove __func__. 
Thanks

> 
> thanks,
> 
> greg k-h


Re: Old platforms: bring out your dead

2021-01-08 Thread Willy Tarreau
On Fri, Jan 08, 2021 at 11:55:06PM +0100, Arnd Bergmann wrote:
> * 80486SX/DX: 80386 CPUs were dropped in 2012, and there are
>   indications that 486 have no users either on recent kernels.
>   There is still the Vortex86 family of SoCs, and the oldest of those were
>   486SX-class, but all the modern ones are 586-class.

These also are the last generation of fanless x86 boards with 100% compatible
controllers, that some people have probably kept around because these don't
age much and have plenty of connectivity. I've used an old one a few times
to plug in an old floppy drive, ISA SCSI controllers to access an old tape
drive and a few such things. That doesn't mean that it's a good justification
not to remove them, what I rather mean is that *if* there is no benefit
in dropping them maybe we can keep them. On the other hand, good luck for
running a modern OS on these, when 16MB-32MB RAM was about the maximum that
was commonly found by then (though if people kept them around that's probably
because they were well equipped, like that 64MB 386DX I'm having :-)).

Willy


Re: False positive "do_IRQ: #.55 No irq handler for vector" messages on AMD ryzen based laptops

2021-01-08 Thread Christopher William Snowhill
Replying to https://lkml.org/lkml/2019/2/19/516 from yes, 2019.

My MSI B450 Tomahawk is exhibiting this bug now that I've updated the firmware 
to the latest beta BIOS with AGESA 1.1.0.0 patch D.


Re: [PATCH] staging: mt7621-dts: remove obsolete switch node

2021-01-08 Thread DENG Qingfang
On Sat, Jan 9, 2021 at 2:25 AM Andrew Lunn  wrote:
>
> On Fri, Jan 08, 2021 at 10:51:55AM +0800, DENG Qingfang wrote:
> > This was for OpenWrt's swconfig driver, which never made it upstream,
> > and was also superseded by MT7530 DSA driver.
>
> What about
> Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt ?
> Should that also be removed?

Yes.

>
>Andrew


Re: [PATCH 3/3] regulator: mt6323: Add OF match table

2021-01-08 Thread kernel test robot
Hi,

I love your patch! Perhaps something to improve:

[auto build test WARNING on regulator/for-next]
[also build test WARNING on v5.11-rc2 next-20210108]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/matthias-bgg-kernel-org/regulator-mt6360-Add-OF-match-table/20210109-101451
base:   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git 
for-next
config: mips-randconfig-r032-20210108 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 
bc556e5685c0f97e79fb7b3c6f15cc5062db8e36)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install mips cross compiling tool for clang build
# apt-get install binutils-mips-linux-gnu
# 
https://github.com/0day-ci/linux/commit/ad9188be3a1fb2710a943ed2ca794c7d0e82e983
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
matthias-bgg-kernel-org/regulator-mt6360-Add-OF-match-table/20210109-101451
git checkout ad9188be3a1fb2710a943ed2ca794c7d0e82e983
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=mips 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

>> drivers/regulator/mt6323-regulator.c:409:34: warning: unused variable 
>> 'mt6323_of_match' [-Wunused-const-variable]
   static const struct of_device_id mt6323_of_match[] = {
^
   1 warning generated.


vim +/mt6323_of_match +409 drivers/regulator/mt6323-regulator.c

   408  
 > 409  static const struct of_device_id mt6323_of_match[] = {
   410  { .compatible = "mediatek,mt6323-regulator", },
   411  { /* sentinel */ },
   412  };
   413  MODULE_DEVICE_TABLE(of, mt6323_of_match);
   414  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


[rcu:dev.2021.01.06a] BUILD SUCCESS 23400a5e2d5edbb74d6299b83c67920f2fe8413d

2021-01-08 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git  
dev.2021.01.06a
branch HEAD: 23400a5e2d5edbb74d6299b83c67920f2fe8413d  EXP sched: Print list of 
runnable tasks in the current rq

elapsed time: 1650m

configs tested: 112
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm defconfig
arm64allyesconfig
arm64   defconfig
arm  allyesconfig
arm  allmodconfig
arc haps_hs_smp_defconfig
um   x86_64_defconfig
sparc64  alldefconfig
armoxnas_v6_defconfig
sh   se7721_defconfig
powerpc ps3_defconfig
powerpc   maple_defconfig
arm   h3600_defconfig
mips allyesconfig
microblaze  mmu_defconfig
shsh7785lcr_defconfig
nds32alldefconfig
sh microdev_defconfig
powerpc  arches_defconfig
mipsworkpad_defconfig
mipsomega2p_defconfig
shtitan_defconfig
powerpc  ppc64e_defconfig
shdreamcast_defconfig
arm   netwinder_defconfig
xtensa  audio_kc705_defconfig
powerpcgamecube_defconfig
mips tb0226_defconfig
nios2 3c120_defconfig
mips bigsur_defconfig
powerpc  storcenter_defconfig
powerpcmpc7448_hpc2_defconfig
m68km5272c3_defconfig
sh ecovec24_defconfig
arm eseries_pxa_defconfig
sh  r7780mp_defconfig
powerpc  acadia_defconfig
arc  axs103_defconfig
mips rt305x_defconfig
powerpc asp8347_defconfig
arm  tango4_defconfig
m68k   sun3_defconfig
ia64 allmodconfig
ia64defconfig
ia64 allyesconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
arc  allyesconfig
nds32 allnoconfig
c6x  allyesconfig
nds32   defconfig
nios2allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
parisc   allyesconfig
s390defconfig
i386 allyesconfig
sparcallyesconfig
sparc   defconfig
i386   tinyconfig
i386defconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a005-20210108
i386 randconfig-a002-20210108
i386 randconfig-a001-20210108
i386 randconfig-a003-20210108
i386 randconfig-a006-20210108
i386 randconfig-a004-20210108
i386 randconfig-a016-20210108
i386 randconfig-a011-20210108
i386 randconfig-a014-20210108
i386 randconfig-a015-20210108
i386 randconfig-a013-20210108
i386 randconfig-a012-20210108
x86_64   randconfig-a004-20210108
x86_64   randconfig-a006-20210108
x86_64   randconfig-a001-20210108
x86_64   randconfig-a002-20210108
x86_64   randconfig-a003-20210108
x86_64   randconfig-a005-20210108
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   rhel

[PATCH net-next] net: marvell: prestera: Correct typo

2021-01-08 Thread Florian Fainelli
The function was incorrectly named with a trailing 'r' at the end of
prestera.

Signed-off-by: Florian Fainelli 
---
Jakub, David,

This patch is on top of Vladimir's series: [PATCH v4 net-next 00/11] Get
rid of the switchdev transactional model

 .../net/ethernet/marvell/prestera/prestera_switchdev.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c 
b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index e2374a39e4f8..beb6447fbe40 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -652,9 +652,9 @@ static int prestera_port_bridge_vlan_stp_set(struct 
prestera_port *port,
return 0;
 }
 
-static int presterar_port_attr_stp_state_set(struct prestera_port *port,
-struct net_device *dev,
-u8 state)
+static int prestera_port_attr_stp_state_set(struct prestera_port *port,
+   struct net_device *dev,
+   u8 state)
 {
struct prestera_bridge_port *br_port;
struct prestera_bridge_vlan *br_vlan;
@@ -702,8 +702,8 @@ static int prestera_port_obj_attr_set(struct net_device 
*dev,
 
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
-   err = presterar_port_attr_stp_state_set(port, attr->orig_dev,
-   attr->u.stp_state);
+   err = prestera_port_attr_stp_state_set(port, attr->orig_dev,
+  attr->u.stp_state);
break;
case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
if (attr->u.brport_flags &
-- 
2.25.1



Re: [PATCH 2/2] scsi: ufs: Protect PM ops and err_handler from user access through sysfs

2021-01-08 Thread Can Guo

On 2021-01-09 12:45, Can Guo wrote:

On 2021-01-08 19:29, Bean Huo wrote:

On Wed, 2021-01-06 at 09:20 +0800, Can Guo wrote:

Hi Bean,

On 2021-01-06 02:38, Bean Huo wrote:
> On Tue, 2021-01-05 at 09:07 +0800, Can Guo wrote:
> > On 2021-01-05 04:05, Bean Huo wrote:
> > > On Sat, 2021-01-02 at 05:59 -0800, Can Guo wrote:
> > > > + * @shutting_down: flag to check if shutdown has been
> > > > invoked
> > >
> > > I am not much sure if this flag is need, since once PM going in
> > > shutdown path, what will be returnded by pm_runtime_get_sync()?
> > >
> > > If pm_runtime_get_sync() will fail, just check its return.
> > >
> >
> > That depends. During/after shutdown, for UFS's case only,
> > pm_runtime_get_sync(hba->dev) will most likely return 0,
> > because it is already RUNTIME_ACTIVE, pm_runtime_get_sync()
> > will directly return 0... meaning you cannot count on it.
> >
> > Check Stanley's change -
> > https://lore.kernel.org/patchwork/patch/1341389/
> >
> > Can Guo.
>
> Can,
>
> Thanks for pointing out that.
>
> Based on my understanding, that patch is redundent. maybe I
> misundestood Linux shutdown sequence.

Sorry, do you mean Stanley's change is redundant?


yes.



No, it is definitely needed. As Stanley replied you in another
thread, it is not protecting I/Os from user layer, but from
other subsystems during shutdown.



>
> I checked the shutdown flow:
>
> 1. Set the "system_state" variable
> 2. Disable usermod to ensure that no user from userspace can start
> a
> request

I hope it is like what you interpreted, but step #2 only stops
UMH(#265)
but not all user space activities. Whereas, UMH is for kernel space
calling
user space.



Can,

I did further study and homework on the Linux shutdown in the last few
days. Yes, you are right, usermodehelper_disable() is to prevent
executing the process from the kernel space.

But I didn't reproduce this "maybe" race issue while shutdown. no
matter how I torment my system, once Linux shutdown/halt/reboot 
starts,

nobody can access the sysfs node. I create 10 processes in the user
space and constantly access UFS sysfs node, also, fio is running in 
the

background for the normal data read/write. there is a shutdown thread
that will randomly trigger shutdown/halt/reboot. but no race issue
appears.

I don't know if this is a hypothetical issue(the race between shutdown
flow and sysfs node access), it may not really exist in the Linux
envriroment. everytime, the shutdonw flow will be:

e10_sync_handler()->e10_svc()->do_e10_svc()->__do_sys_reboot()-

kernel_poweroff/kernel_halt()->device_shutdown()->platform_shutdown()-
ufshcd_platform_shutdown()->ufshcd_shutdown().


I think before going into the kernel shutdown, the userspace cannot
issue new requests anymore. otherwise, this would be a big issue.

pm_runtime_get_sync() will return 0 or failure while shutdown? the
answer is not important now, maybe as you said, it is always 0. But in
my testing, it didn't get there the system has been shutdown. Which
means once shutdonw starts, sysfs node access path cannot reach
pm_runtime_get_sync(). (note, I don't know if sysfs node access thread
has been disabled or not)


Responsibly say, I didn't reproduce this issue on my system (ubuntu),
maybe you are using Android. I am not an expert on this topic, if you
have the best idea on how to reproduce this issue. please please let 
me

try. appreciate it!



When you do a reboot/shutdown/poweroff, how your system behaves highly
depends on how the reboot cmd is implemented in C code under /sbin/.

On Ubuntu, reboot looks like:
$ reboot --help
reboot [OPTIONS...] [ARG]

Reboot the system.

 --help  Show this help
 --halt  Halt the machine
  -p --poweroff  Switch off the machine
 --rebootReboot the machine
  -f --force Force immediate halt/power-off/reboot
  -w --wtmp-only Don't halt/power-off/reboot, just write wtmp record
  -d --no-wtmp   Don't write wtmp record
 --no-wall   Don't send wall message before halt/power-off/reboot


On a pure Linux with a initrd RAM FS built from busybox, reboot looks 
like:

# reboot --help
BusyBox v1.30.1 (2019-05-24 12:53:36 IST) multi-call binary.

Usage: reboot [-d DELAY] [-n] [-f]

Reboot the system

-d SEC  Delay interval
-n  Do not sync
-f  Force (don't go through init)


For example, when you work on a pure Linux with a filesystem built from
busybox, when you hit reboot cmd, halt_main() will be called. And based
on the reboot options passed to reboot cmd, halt_main() behaves 
differently.


A plain reboot cmd does things like sync filesystem, send SIGKILL to 
all
processes (except for init), remount all filesytem as read-only and so 
on

before invoking linux kernel reboot syscall. In this case, we are safe.

However, if you do a "reboot -f", halt_main() directly invokes 
reboot().

And with "reboot -f", I can easily reproduce the race condition we are
talking about here - it is not based on imagination.

Find the patch 

Re: [PATCH 2/2] scsi: ufs: Protect PM ops and err_handler from user access through sysfs

2021-01-08 Thread Can Guo

On 2021-01-08 19:29, Bean Huo wrote:

On Wed, 2021-01-06 at 09:20 +0800, Can Guo wrote:

Hi Bean,

On 2021-01-06 02:38, Bean Huo wrote:
> On Tue, 2021-01-05 at 09:07 +0800, Can Guo wrote:
> > On 2021-01-05 04:05, Bean Huo wrote:
> > > On Sat, 2021-01-02 at 05:59 -0800, Can Guo wrote:
> > > > + * @shutting_down: flag to check if shutdown has been
> > > > invoked
> > >
> > > I am not much sure if this flag is need, since once PM going in
> > > shutdown path, what will be returnded by pm_runtime_get_sync()?
> > >
> > > If pm_runtime_get_sync() will fail, just check its return.
> > >
> >
> > That depends. During/after shutdown, for UFS's case only,
> > pm_runtime_get_sync(hba->dev) will most likely return 0,
> > because it is already RUNTIME_ACTIVE, pm_runtime_get_sync()
> > will directly return 0... meaning you cannot count on it.
> >
> > Check Stanley's change -
> > https://lore.kernel.org/patchwork/patch/1341389/
> >
> > Can Guo.
>
> Can,
>
> Thanks for pointing out that.
>
> Based on my understanding, that patch is redundent. maybe I
> misundestood Linux shutdown sequence.

Sorry, do you mean Stanley's change is redundant?


yes.



No, it is definitely needed. As Stanley replied you in another
thread, it is not protecting I/Os from user layer, but from
other subsystems during shutdown.



>
> I checked the shutdown flow:
>
> 1. Set the "system_state" variable
> 2. Disable usermod to ensure that no user from userspace can start
> a
> request

I hope it is like what you interpreted, but step #2 only stops
UMH(#265)
but not all user space activities. Whereas, UMH is for kernel space
calling
user space.



Can,

I did further study and homework on the Linux shutdown in the last few
days. Yes, you are right, usermodehelper_disable() is to prevent
executing the process from the kernel space.

But I didn't reproduce this "maybe" race issue while shutdown. no
matter how I torment my system, once Linux shutdown/halt/reboot starts,
nobody can access the sysfs node. I create 10 processes in the user
space and constantly access UFS sysfs node, also, fio is running in the
background for the normal data read/write. there is a shutdown thread
that will randomly trigger shutdown/halt/reboot. but no race issue
appears.

I don't know if this is a hypothetical issue(the race between shutdown
flow and sysfs node access), it may not really exist in the Linux
envriroment. everytime, the shutdonw flow will be:

e10_sync_handler()->e10_svc()->do_e10_svc()->__do_sys_reboot()-

kernel_poweroff/kernel_halt()->device_shutdown()->platform_shutdown()-
ufshcd_platform_shutdown()->ufshcd_shutdown().


I think before going into the kernel shutdown, the userspace cannot
issue new requests anymore. otherwise, this would be a big issue.

pm_runtime_get_sync() will return 0 or failure while shutdown? the
answer is not important now, maybe as you said, it is always 0. But in
my testing, it didn't get there the system has been shutdown. Which
means once shutdonw starts, sysfs node access path cannot reach
pm_runtime_get_sync(). (note, I don't know if sysfs node access thread
has been disabled or not)


Responsibly say, I didn't reproduce this issue on my system (ubuntu),
maybe you are using Android. I am not an expert on this topic, if you
have the best idea on how to reproduce this issue. please please let me
try. appreciate it!



When you do a reboot/shutdown/poweroff, how your system behaves highly
depends on how the reboot cmd is implemented in C code under /sbin/.

On Ubuntu, reboot looks like:
$ reboot --help
reboot [OPTIONS...] [ARG]

Reboot the system.

 --help  Show this help
 --halt  Halt the machine
  -p --poweroff  Switch off the machine
 --rebootReboot the machine
  -f --force Force immediate halt/power-off/reboot
  -w --wtmp-only Don't halt/power-off/reboot, just write wtmp record
  -d --no-wtmp   Don't write wtmp record
 --no-wall   Don't send wall message before halt/power-off/reboot


On a pure Linux with a initrd RAM FS built from busybox, reboot looks 
like:

# reboot --help
BusyBox v1.30.1 (2019-05-24 12:53:36 IST) multi-call binary.

Usage: reboot [-d DELAY] [-n] [-f]

Reboot the system

-d SEC  Delay interval
-n  Do not sync
-f  Force (don't go through init)


For example, when you work on a pure Linux with a filesystem built from
busybox, when you hit reboot cmd, halt_main() will be called. And based
on the reboot options passed to reboot cmd, halt_main() behaves 
differently.


A plain reboot cmd does things like sync filesystem, send SIGKILL to all
processes (except for init), remount all filesytem as read-only and so 
on

before invoking linux kernel reboot syscall. In this case, we are safe.

However, if you do a "reboot -f", halt_main() directly invokes reboot().
And with "reboot -f", I can easily reproduce the race condition we are
talking about here - it is not based on imagination.

Find the patch I used for replication in the attachment, fix 

Re: [PATCH v5 9/9] bus: mhi: core: Do not clear channel context more than once

2021-01-08 Thread Hemant Kumar




On 1/8/21 12:54 PM, Bhaumik Bhatt wrote:

When clearing the channel context, calling mhi_free_coherent()
more than once can result in kernel warnings such as "trying to
free invalid coherent area". Prevent extra work by adding a check
to skip calling mhi_deinit_chan_ctxt() if the client driver has
already disabled the channels.

Signed-off-by: Bhaumik Bhatt 
---
  drivers/bus/mhi/core/init.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c
index 30eef19..272f350 100644
--- a/drivers/bus/mhi/core/init.c
+++ b/drivers/bus/mhi/core/init.c
@@ -1314,6 +1314,7 @@ static int mhi_driver_remove(struct device *dev)
  
  		if ((ch_state[dir] == MHI_CH_STATE_ENABLED ||

 ch_state[dir] == MHI_CH_STATE_STOP) &&
+   mhi_chan->ch_state != MHI_CH_STATE_DISABLED &&
!mhi_chan->offload_ch)
mhi_deinit_chan_ctxt(mhi_cntrl, mhi_chan);
  


Reviewed-by: Hemant Kumar 
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project


[PATCH] Revert "tcp: simplify window probe aborting on USER_TIMEOUT"

2021-01-08 Thread Enke Chen
From: Enke Chen 

This reverts commit 9721e709fa68ef9b860c322b474cfbd1f8285b0f.

With the commit 9721e709fa68 ("tcp: simplify window probe aborting
on USER_TIMEOUT"), the TCP session does not terminate with
TCP_USER_TIMEOUT when data remain untransmitted due to zero window.

The number of unanswered zero-window probes (tcp_probes_out) is
reset to zero with incoming acks irrespective of the window size,
as described in tcp_probe_timer():

RFC 1122 4.2.2.17 requires the sender to stay open indefinitely
as long as the receiver continues to respond probes. We support
this by default and reset icsk_probes_out with incoming ACKs.

This counter, however, is the wrong one to be used in calculating the
duration that the window remains closed and data remain untransmitted.
Thanks to Jonathan Maxwell  for diagnosing the
actual issue.

Cc: sta...@vger.kernel.org
Fixes: 9721e709fa68 ("tcp: simplify window probe aborting on USER_TIMEOUT")
Reported-by: William McCall 
Signed-off-by: Enke Chen 
---
 net/ipv4/tcp_timer.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6c62b9ea1320..ad98f2ea89f1 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -346,6 +346,7 @@ static void tcp_probe_timer(struct sock *sk)
struct sk_buff *skb = tcp_send_head(sk);
struct tcp_sock *tp = tcp_sk(sk);
int max_probes;
+   u32 start_ts;
 
if (tp->packets_out || !skb) {
icsk->icsk_probes_out = 0;
@@ -360,13 +361,12 @@ static void tcp_probe_timer(struct sock *sk)
 * corresponding system limit. We also implement similar policy when
 * we use RTO to probe window in tcp_retransmit_timer().
 */
-   if (icsk->icsk_user_timeout) {
-   u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out,
-   tcp_probe0_base(sk));
-
-   if (elapsed >= icsk->icsk_user_timeout)
-   goto abort;
-   }
+   start_ts = tcp_skb_timestamp(skb);
+   if (!start_ts)
+   skb->skb_mstamp_ns = tp->tcp_clock_cache;
+   else if (icsk->icsk_user_timeout &&
+(s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
+   goto abort;
 
max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
if (sock_flag(sk, SOCK_DEAD)) {
-- 
2.29.2



Re: [PATCH] mm/hugetlb: Fix potential double free in hugetlb_register_node() error path

2021-01-08 Thread Muchun Song
On Thu, Jan 7, 2021 at 8:36 PM Miaohe Lin  wrote:
>
> In hugetlb_sysfs_add_hstate(), we would do kobject_put() on hstate_kobjs
> when failed to create sysfs group but forget to set hstate_kobjs to NULL.
> Then in hugetlb_register_node() error path, we may free it again via
> hugetlb_unregister_node().
>
> Fixes: a3437870160c ("hugetlb: new sysfs interface")
> Signed-off-by: Miaohe Lin 
> Cc: 
> ---
>  mm/hugetlb.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)

Reviewed-by: Muchun Song 

>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index e249bffa0e75..91a2a2025a2c 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -2947,8 +2947,10 @@ static int hugetlb_sysfs_add_hstate(struct hstate *h, 
> struct kobject *parent,
> return -ENOMEM;
>
> retval = sysfs_create_group(hstate_kobjs[hi], hstate_attr_group);
> -   if (retval)
> +   if (retval) {
> kobject_put(hstate_kobjs[hi]);
> +   hstate_kobjs[hi] = NULL;
> +   }
>
> return retval;
>  }
> --
> 2.19.1
>


Re: [External] Re: [PATCH v2 6/6] mm: hugetlb: remove VM_BUG_ON_PAGE from page_huge_active

2021-01-08 Thread Muchun Song
On Sat, Jan 9, 2021 at 6:24 AM Mike Kravetz  wrote:
>
> On 1/6/21 12:47 AM, Muchun Song wrote:
> > The page_huge_active() can be called from scan_movable_pages() which
> > do not hold a reference count to the HugeTLB page. So when we call
> > page_huge_active() from scan_movable_pages(), the HugeTLB page can
> > be freed parallel. Then we will trigger a BUG_ON which is in the
> > page_huge_active() when CONFIG_DEBUG_VM is enabled. Just remove the
> > VM_BUG_ON_PAGE.
> >
> > Fixes: 7e1f049efb86 ("mm: hugetlb: cleanup using paeg_huge_active()")
> > Signed-off-by: Muchun Song 
> > Reviewed-by: Mike Kravetz 
> > ---
> >  mm/hugetlb.c | 1 -
> >  1 file changed, 1 deletion(-)
> >
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index 67200dd25b1d..7a24ed28ec4f 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -1372,7 +1372,6 @@ struct hstate *size_to_hstate(unsigned long size)
> >   */
> >  bool page_huge_active(struct page *page)
> >  {
> > - VM_BUG_ON_PAGE(!PageHuge(page), page);
> >   return PageHead(page) && PagePrivate([1]);
> >  }
>
> After more thought, should that return statement be changed to?
> return PageHeadHuge(page) && PagePrivate([1]);

Agree.

>
> We only want to test that PagePrivate flag for hugetlb head pages.
> Although, the possibility that the hugetlb page was freed and turned
> into another compound page in this race window is REALLY small.

Yeah. Thanks. I will update to PageHeadHuge().

> --
> Mike Kravetz


Re: [PATCH v4 1/1] PCI/ERR: don't clobber status after reset_link()

2021-01-08 Thread Kuppuswamy, Sathyanarayanan




On 1/8/21 2:30 PM, Bjorn Helgaas wrote:

Can we push this forward now?  There are several pending patches in
this area from Keith and Sathyanarayanan; I haven't gotten to them
yet, so not sure whether they help address any of this.


Following two patches should also address the same issue.

My patch:

https://patchwork.kernel.org/project/linux-pci/patch/6f63321637fef86b6cf0beebf98b987062f9e811.1610153755.git.sathyanarayanan.kuppusw...@linux.intel.com/

Keith's patch:

https://patchwork.kernel.org/project/linux-pci/patch/20210104230300.1277180-4-kbu...@kernel.org/



--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer


[rcu:rcu/test] BUILD SUCCESS 2ced10737a8b96d41e0c9d9e43d42b545552649c

2021-01-08 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git  rcu/test
branch HEAD: 2ced10737a8b96d41e0c9d9e43d42b545552649c  Merge branch 
'dev.2021.01.05a' into HEAD

elapsed time: 1581m

configs tested: 126
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm defconfig
arm64allyesconfig
arm64   defconfig
arm  allyesconfig
arm  allmodconfig
nios2allyesconfig
powerpcmvme5100_defconfig
mips  ath25_defconfig
sh shx3_defconfig
m68k  multi_defconfig
sh  lboxre2_defconfig
arc haps_hs_smp_defconfig
um   x86_64_defconfig
sparc64  alldefconfig
armoxnas_v6_defconfig
sh   se7721_defconfig
openriscdefconfig
sh   j2_defconfig
mipsomega2p_defconfig
mips  bmips_stb_defconfig
mips   ip32_defconfig
mips  loongson3_defconfig
mips cobalt_defconfig
arm   h3600_defconfig
m68k  atari_defconfig
sparc   sparc64_defconfig
c6xevmc6472_defconfig
xtensageneric_kc705_defconfig
mips  decstation_64_defconfig
shsh7785lcr_defconfig
nds32alldefconfig
sh microdev_defconfig
powerpc  arches_defconfig
mipsworkpad_defconfig
shtitan_defconfig
powerpc  ppc64e_defconfig
shdreamcast_defconfig
powerpc  mpc885_ads_defconfig
mipsmalta_kvm_guest_defconfig
i386 allyesconfig
powerpc   ppc64_defconfig
powerpc tqm8560_defconfig
nios2 3c120_defconfig
mips bigsur_defconfig
powerpc  storcenter_defconfig
powerpcmpc7448_hpc2_defconfig
m68km5272c3_defconfig
sh ecovec24_defconfig
arm eseries_pxa_defconfig
sh  r7780mp_defconfig
powerpc  acadia_defconfig
arc  axs103_defconfig
armvexpress_defconfig
mipsgpr_defconfig
shhp6xx_defconfig
arm  integrator_defconfig
sh   rts7751r2dplus_defconfig
ia64 allmodconfig
ia64defconfig
ia64 allyesconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
arc  allyesconfig
nds32 allnoconfig
c6x  allyesconfig
nds32   defconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
parisc   allyesconfig
s390defconfig
sparcallyesconfig
sparc   defconfig
i386   tinyconfig
i386defconfig
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
x86_64   randconfig-a004-20210108
x86_64   randconfig-a006-20210108
x86_64   randconfig-a001-20210108
x86_64   randconfig-a002-20210108
x86_64   randconfig-a003-20210108
x86_64   randconfig-a005-20210108
i386 randconfig-a005-20210108
i386 randconfig-a002-20210108
i386 randconfig-a001-20210108
i386 randconfig-a003-20210108
i386 randconfig-a006-20210108
i386 randconfig-a004-20210108
i386

[PATCH] ASoC: soc-pcm: return correct -ERRNO in failure path

2021-01-08 Thread Souptick Joarder
Kernel test robot throws below error ->

sound/soc/soc-pcm.c:2523 dpcm_run_update_startup() error: uninitialized
symbol 'ret'.

Initializing ret = 0 and returning correct -ERRNO in failure path.

Reported-by: kernel test robot 
Signed-off-by: Souptick Joarder 
---
 sound/soc/soc-pcm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 481a4a2..29328ce 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2432,7 +2432,7 @@ static int dpcm_run_update_startup(struct 
snd_soc_pcm_runtime *fe, int stream)
snd_soc_dpcm_get_substream(fe, stream);
struct snd_soc_dpcm *dpcm;
enum snd_soc_dpcm_trigger trigger = fe->dai_link->trigger[stream];
-   int ret;
+   int ret = 0;
unsigned long flags;
 
dev_dbg(fe->dev, "ASoC: runtime %s open on FE %s\n",
@@ -2441,6 +2441,7 @@ static int dpcm_run_update_startup(struct 
snd_soc_pcm_runtime *fe, int stream)
/* Only start the BE if the FE is ready */
if (fe->dpcm[stream].state == SND_SOC_DPCM_STATE_HW_FREE ||
fe->dpcm[stream].state == SND_SOC_DPCM_STATE_CLOSE) {
+   ret = -EINVAL;
dev_err(fe->dev, "ASoC: FE %s is not ready %d\n",
fe->dai_link->name, fe->dpcm[stream].state);
goto disconnect;
-- 
1.9.1



Re: [PATCH] dma-buf: cma_heap: Fix memory leak in CMA heap

2021-01-08 Thread Sumit Semwal
Hi John,

On Fri, 8 Jan 2021 at 01:56, John Stultz  wrote:
>
> Bing Song noticed the CMA heap was leaking memory due to a flub
> I made in commit a5d2d29e24be ("dma-buf: heaps: Move heap-helper
> logic into the cma_heap implementation"), and provided this fix
> which ensures the pagelist is also freed on release.
Thanks for your patch.
>
> Cc: Bing Song 
> Cc: Sumit Semwal 
> Cc: Liam Mark 
> Cc: Laura Abbott 
> Cc: Brian Starkey 
> Cc: Hridya Valsaraju 
> Cc: Suren Baghdasaryan 
> Cc: Sandeep Patil 
> Cc: Daniel Mentz 
> Cc: Chris Goldsworthy 
> Cc: Ørjan Eide 
> Cc: Robin Murphy 
> Cc: Ezequiel Garcia 
> Cc: Simon Ser 
> Cc: James Jones 
> Cc: linux-me...@vger.kernel.org
> Cc: dri-de...@lists.freedesktop.org
> Reported-by: Bing Song 
> Fixes: a5d2d29e24be ("dma-buf: heaps: Move heap-helper logic into the 
> cma_heap implementation")
> Signed-off-by: John Stultz 

I will queue it up so it gets in the v5.11 cycle.
> ---
>  drivers/dma-buf/heaps/cma_heap.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/drivers/dma-buf/heaps/cma_heap.c 
> b/drivers/dma-buf/heaps/cma_heap.c
> index 3c4e34301172..364fc2f3e499 100644
> --- a/drivers/dma-buf/heaps/cma_heap.c
> +++ b/drivers/dma-buf/heaps/cma_heap.c
> @@ -251,6 +251,9 @@ static void cma_heap_dma_buf_release(struct dma_buf 
> *dmabuf)
> buffer->vaddr = NULL;
> }
>
> +   /* free page list */
> +   kfree(buffer->pages);
> +   /* release memory */
> cma_release(cma_heap->cma, buffer->cma_pages, buffer->pagecount);
> kfree(buffer);
>  }
> --
> 2.17.1
>
Best,
Sumit.


Re: [PATCH V7 01/13] dt-bindings: soc: Add dvfsrc driver bindings

2021-01-08 Thread Rob Herring
On Fri, 08 Jan 2021 15:48:03 +0800, Henry Chen wrote:
> Document the binding for enabling dvfsrc on MediaTek SoC.
> 
> Signed-off-by: Henry Chen 
> ---
>  .../devicetree/bindings/soc/mediatek/dvfsrc.yaml   | 67 
> ++
>  1 file changed, 67 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/soc/mediatek/dvfsrc.yaml
> 

My bot found errors running 'make dt_binding_check' on your patch:

yamllint warnings/errors:

dtschema/dtc warnings/errors:
Documentation/devicetree/bindings/soc/mediatek/dvfsrc.example.dts:19:18: fatal 
error: dt-bindings/interconnect/mtk,mt8183-emi.h: No such file or directory
   19 | #include 
  |  ^~~
compilation terminated.
make[1]: *** [scripts/Makefile.lib:344: 
Documentation/devicetree/bindings/soc/mediatek/dvfsrc.example.dt.yaml] Error 1
make: *** [Makefile:1370: dt_binding_check] Error 2

See https://patchwork.ozlabs.org/patch/1423679

This check can fail if there are any dependencies. The base for a patch
series is generally the most recent rc1.

If you already ran 'make dt_binding_check' and didn't see the above
error(s), then make sure 'yamllint' is installed and dt-schema is up to
date:

pip3 install dtschema --upgrade

Please check and re-submit.



Re: [PATCH] mmc: sdhci-iproc: Add ACPI bindings for the rpi4

2021-01-08 Thread Jeremy Linton

Hi,

On 1/8/21 7:10 PM, kernel test robot wrote:

Hi Jeremy,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.11-rc2 next-20210108]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Jeremy-Linton/mmc-sdhci-iproc-Add-ACPI-bindings-for-the-rpi4/20210109-051645
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
6279d812eab67a6df6b22fa495201db6f2305924
config: riscv-randconfig-r012-20210108 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 
bc556e5685c0f97e79fb7b3c6f15cc5062db8e36)
reproduce (this is a W=1 build):
 wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
 chmod +x ~/bin/make.cross
 # install riscv cross compiling tool for clang build
 # apt-get install binutils-riscv64-linux-gnu
 # 
https://github.com/0day-ci/linux/commit/659eacf5a5de971ea94390dd6c7443c82d53ea5e
 git remote add linux-review https://github.com/0day-ci/linux
 git fetch --no-tags linux-review 
Jeremy-Linton/mmc-sdhci-iproc-Add-ACPI-bindings-for-the-rpi4/20210109-051645
 git checkout 659eacf5a5de971ea94390dd6c7443c82d53ea5e
 # save the attached .config to linux build tree
 COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=riscv

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):


(trimming)


include/asm-generic/io.h:1005:55: warning: performing pointer arithmetic on 
a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
return (port > MMIO_UPPER_LIMIT) ? NULL : PCI_IOBASE + port;
  ~~ ^

drivers/mmc/host/sdhci-iproc.c:272:38: warning: unused variable 
'bcm_arasan_data' [-Wunused-const-variable]

static const struct sdhci_iproc_data bcm_arasan_data = {


I think this is the only one caused by this patch, and its because the 
new structures are only used inside the #ifdef ACPI block.


I will post a v2.


 ^
8 warnings generated.


vim +/bcm_arasan_data +272 drivers/mmc/host/sdhci-iproc.c

271 
  > 272  static const struct sdhci_iproc_data bcm_arasan_data = {
273 .pdata = _bcm_arasan_data,
274 };
275 

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org





Re: [PATCH net v3] net: fix use-after-free when UDP GRO with shared fraglist

2021-01-08 Thread Jakub Kicinski
On Fri, 8 Jan 2021 11:18:39 +0100 Daniel Borkmann wrote:
> On 1/8/21 3:28 AM, Dongseok Yi wrote:
> > skbs in fraglist could be shared by a BPF filter loaded at TC. If TC
> > writes, it will call skb_ensure_writable -> pskb_expand_head to create
> > a private linear section for the head_skb. And then call
> > skb_clone_fraglist -> skb_get on each skb in the fraglist.
> > 
> > skb_segment_list overwrites part of the skb linear section of each
> > fragment itself. Even after skb_clone, the frag_skbs share their
> > linear section with their clone in PF_PACKET.
> > 
> > Both sk_receive_queue of PF_PACKET and PF_INET (or PF_INET6) can have
> > a link for the same frag_skbs chain. If a new skb (not frags) is
> > queued to one of the sk_receive_queue, multiple ptypes can see and
> > release this. It causes use-after-free.
> > 
> > [ 4443.426215] [ cut here ]
> > [ 4443.426222] refcount_t: underflow; use-after-free.
> > [ 4443.426291] WARNING: CPU: 7 PID: 28161 at lib/refcount.c:190
> > refcount_dec_and_test_checked+0xa4/0xc8
> > [ 4443.426726] pstate: 6045 (nZCv daif +PAN -UAO)
> > [ 4443.426732] pc : refcount_dec_and_test_checked+0xa4/0xc8
> > [ 4443.426737] lr : refcount_dec_and_test_checked+0xa0/0xc8
> > [ 4443.426808] Call trace:
> > [ 4443.426813]  refcount_dec_and_test_checked+0xa4/0xc8
> > [ 4443.426823]  skb_release_data+0x144/0x264
> > [ 4443.426828]  kfree_skb+0x58/0xc4
> > [ 4443.426832]  skb_queue_purge+0x64/0x9c
> > [ 4443.426844]  packet_set_ring+0x5f0/0x820
> > [ 4443.426849]  packet_setsockopt+0x5a4/0xcd0
> > [ 4443.426853]  __sys_setsockopt+0x188/0x278
> > [ 4443.426858]  __arm64_sys_setsockopt+0x28/0x38
> > [ 4443.426869]  el0_svc_common+0xf0/0x1d0
> > [ 4443.426873]  el0_svc_handler+0x74/0x98
> > [ 4443.426880]  el0_svc+0x8/0xc
> > 
> > Fixes: 3a1296a38d0c (net: Support GRO/GSO fraglist chaining.)
> > Signed-off-by: Dongseok Yi 
> > Acked-by: Willem de Bruijn   
> 
> Acked-by: Daniel Borkmann 

Applied, thanks!


RE: [PATCH v2] ACPI: scan: Fix a Hyper-V Linux VM panic caused by buffer overflow

2021-01-08 Thread Dexuan Cui
> From: Dexuan Cui 
> Sent: Thursday, January 7, 2021 11:24 PM
> ...
> Linux VM on Hyper-V crashes with the latest mainline:
> ...
> 
> Changes in v2:
> strlcpy -> kstrdup_const. Thanks Rafael J. Wysocki!
> Change commit log accordingly.

Hi Rafael, Len, and all,
Can you please take a look at the v2 patch?

The Linux mainline has been broken for several weeks when it
runs as a guest on Hyper-V, so we'd like this to be fixed ASAP,
as more people are being affected, e.g.
https://bugzilla.kernel.org/show_bug.cgi?id=210449

Thanks,
-- Dexuan


[PATCH] kvm: x86: Mark __kvm_vcpu_halt() as static

2021-01-08 Thread Souptick Joarder
Kernel test robot throws below warning ->

>> arch/x86/kvm/x86.c:7979:5: warning: no previous prototype for
>> '__kvm_vcpu_halt' [-Wmissing-prototypes]
7979 | int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int
reason)
 | ^~~

Marking __kvm_vcpu_halt() as static as it is used inside this file.

Reported-by: kernel test robot 
Signed-off-by: Souptick Joarder 
---
 arch/x86/kvm/x86.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 61499e1..c2fdf14 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -109,6 +109,7 @@
 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
 static void store_regs(struct kvm_vcpu *vcpu);
 static int sync_regs(struct kvm_vcpu *vcpu);
+static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason);
 
 struct kvm_x86_ops kvm_x86_ops __read_mostly;
 EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -7976,7 +7977,7 @@ void kvm_arch_exit(void)
kmem_cache_destroy(x86_fpu_cache);
 }
 
-int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
+static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
 {
++vcpu->stat.halt_exits;
if (lapic_in_kernel(vcpu)) {
-- 
1.9.1



[rcu:dev.2021.01.04b] BUILD SUCCESS 27d9a16d9727531fbc4d01025e7c6f5712c7a859

2021-01-08 Thread kernel test robot
   allnoconfig
x86_64   randconfig-a006-20210108
x86_64   randconfig-a005-20210108
x86_64   randconfig-a004-20210108
x86_64   randconfig-a001-20210108
x86_64   randconfig-a002-20210108
x86_64   randconfig-a003-20210108
i386 randconfig-a005-20210108
i386 randconfig-a002-20210108
i386 randconfig-a001-20210108
i386 randconfig-a003-20210108
i386 randconfig-a006-20210108
i386 randconfig-a004-20210108
i386 randconfig-a016-20210108
i386 randconfig-a011-20210108
i386 randconfig-a014-20210108
i386 randconfig-a015-20210108
i386 randconfig-a013-20210108
i386 randconfig-a012-20210108
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  rhel-8.3-kbuiltin
x86_64  kexec

clang tested configs:
x86_64   randconfig-a013-20210108
x86_64   randconfig-a011-20210108
x86_64   randconfig-a012-20210108
x86_64   randconfig-a016-20210108
x86_64   randconfig-a014-20210108
x86_64   randconfig-a015-20210108

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


Re: [RFC 2/2] clk: vc5: Add support for optional load capacitance

2021-01-08 Thread Adam Ford
On Fri, Jan 8, 2021 at 4:49 PM Luca Ceresoli  wrote:
>
> Hi Adam,
>
> On 06/01/21 18:39, Adam Ford wrote:
> > There are two registers which can set the load capacitance for
> > XTAL1 and XTAL2. These are optional registers when using an
> > external crystal.  Parse the device tree and set the
> > corresponding registers accordingly.
>
> No need to repeat the first 2 sentences, they are already in patch 1.

The reason I did that was because if someone does a git log on the
individual file, they'd see the comment.  While it's redundant not, it
might not be as obvious in the future when looking back.   Not
everyone reviews the history of the binding, but the source files' git
logs usually have some value.   However, if you want me to drop it or
rephrase it, I can do that.

>
> >
> > Signed-off-by: Adam Ford 
> > ---
> >  drivers/clk/clk-versaclock5.c | 64 +++
> >  1 file changed, 64 insertions(+)
> >
> > diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c
> > index 43db67337bc0..445abc3731fb 100644
> > --- a/drivers/clk/clk-versaclock5.c
> > +++ b/drivers/clk/clk-versaclock5.c
> > @@ -759,6 +759,63 @@ static int vc5_update_power(struct device_node 
> > *np_output,
> >   return 0;
> >  }
> >
> > +static int vc5_map_cap_value(u32 femtofarads)
> > +{
> > + int mapped_value;
> > +
> > + /* The datasheet explicitly states 9000 - 25000 */
> > + if ((femtofarads < 9000) || (femtofarads > 25000))
> > + return -EINVAL;
> > +
> > + /* The lowest target we can hit is 9430, so exit if it's less */
> > + if (femtofarads < 9430)
> > + return 0;
> > +
> > + /*
> > +  * According to VersaClock 6E Programming Guide, there are 6
> > +  * bits which translate to 64 entries in XTAL registers 12 and
> > +  * 13. Because bits 0 and 1 increase the capacitance the
> > +  * same, some of the values can be repeated.  Plugging this
> > +  * into a spreadsheet and generating a trendline, the output
> > +  * equation becomes x = (y-9098.29) / 216.44, where 'y' is
> > +  * the desired capacitance in femtofarads, and x is the value
> > +  * of XTAL[5:0].
> > +  * To help with rounding, do fixed point math
> > +  */
> > + femtofarads *= 100;
> > + mapped_value = (femtofarads - 909829) / 21644;
>
> Thanks for the extensive comment, but I am confused. Not by your code
> which is very clean and readable, but by the chip documentation
> (disclaimer: I haven't read it in full depth).

I was confused too since the datasheet and programmers manual differ a bit.
>
> The 5P49V6965 datasheet at page 17 clearly states capacitance can be
> increased in 0.5 pF steps. The "VersaClock 6E Family Register
> Descriptions and Programming Guide" at page 18 shows a table that allows
> 0.43 pF. Can you clarify how the thing works?

I used the Versaclock 6E doc which is based on the following:

BIT 5 - Add 6.92pF
BIT 4 - Add 3.46pF
BIT 3 - Add 1.73pF
BIT 2 - Add 0.86pF
Bit 1 - Add 0.43pF
Bit 0 - Add 0.43pF

Because the Datasheet starts at 9pF, the math I used, assumes these
numbers are added to 9pF.
Because the datasheet shows the increments are in .5pF increments, the
430nF seems close.  The datasheet shows 9pF - 25pF and based on the
programmer table, we could get close to 25pF by enabling all bits and
adding 9pF, however the math doesn't quite hit 25pF.

For what it's worth I needed around 11.5pF, and with this patch, the
hardware engineer said our ppm went from around 70 ppm to around 4ppm.

>
> > +
> > + /*
> > +  * The datasheet states, the maximum capacitance is 25000,
> > +  * but the programmer guide shows a max value is 22832,
> > +  * so values higher values could overflow, so cap it.
> > +  */
>
> The 22832 limit is if you assume 0.43 pF steps. Assuming 0.5 pF steps
> leads to 25000. Now I am more confused than before.

I agree.  It would be nice to get some clarification from Renesas.

>
> > + mapped_value = max(mapped_value/100, 0x3f);
>
> Uhm, min()?

Oops!  You're absolutely right.

>
> > +
> > + return mapped_value;
> > +}
> > +static int vc5_update_cap_load(struct device_node *node, struct 
> > vc5_driver_data *vc5)
> > +{
> > + u32 value, mapped_value;
> > +
> > + if (!of_property_read_u32(node, "idt,xtal1-load-femtofarads", 
> > )) {
> > + mapped_value = vc5_map_cap_value(value);
> > + if (mapped_value < 0)
> > + return mapped_value;
> > +
> > + regmap_write(vc5->regmap, VC5_XTAL_X1_LOAD_CAP, (mapped_value 
> > << 2));
> > + }
> > +
> > + if (!of_property_read_u32(node, "idt,xtal2-load-femtofarads", 
> > )) {
> > + mapped_value = vc5_map_cap_value(value);
> > + if (mapped_value < 0)
> > + return mapped_value;
> > + regmap_write(vc5->regmap, VC5_XTAL_X2_LOAD_CAP, (mapped_value 
> > << 2));
> > + }
> > +
> > + return 0;
> > +}
> > 

Re: [RFC PATCH v2] pinctrl: add helper to expose pinctrl state in debugfs

2021-01-08 Thread Drew Fustini
On Sat, Jan 09, 2021 at 02:22:07AM +0100, Linus Walleij wrote:
> Hi Drew,
> 
> sorry for belated review. The approach is so uncommon so it had me
> confused.
> 
> On Thu, Dec 24, 2020 at 9:36 PM Drew Fustini  wrote:
> 
> > > > I used the compatible string "pinctrl,state-helper" but would appreciate
> > > > advice on how to best name this. Should I create a new vendor prefix?
> > >
> > > Here is the first concern. Why does this require to be a driver with a
> > > compatible string?
> >
> > I have not been able to figure out how to have different active pinctrl
> > states for each header pins (for example P2 header pin 3) unless they
> > are represented as DT nodes with their own compatible for this helper
> > driver such as:
> >
> >  {
> > P2_03_pinmux {
> > compatible = "pinctrl,state-helper";
> > pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", 
> > "gpio_input", "pwm";
> > pinctrl-0 = <_03_default_pin>;
> > pinctrl-1 = <_03_gpio_pin>;
> > pinctrl-2 = <_03_gpio_pu_pin>;
> > pinctrl-3 = <_03_gpio_pd_pin>;
> > pinctrl-4 = <_03_gpio_input_pin>;
> > pinctrl-5 = <_03_pwm_pin>;
> > };
> > }
> 
> I do not think the DT people are going to appreciate this pseudo-device.

Thank you for reviewing and commenting.

It is does seem like creating a platform device for each header pin and
binding to this proposed helper driver is not the correct approach.
 
> Can you not just represent them as pin control hogs and have the debugfs
> code with the other debugfs code in drivers/pinctrl/core.c?

I tried defining pinctrl states in the am33xx_pinmux DT node (which has 
compatible "pinctrl-single").  It does work to have default state
defined for pins.  However, I was not sure how represent having
different states active for independent header pins.

Instead of DT binds, maybe I need to use PIN_MAP_MUX_GROUP_HOG_DEFAULT()
in pinctrl-single code?

> 
> Normal drivers cannot play around with the state assigned to a
> hog, but debugfs can certainly do that so go ahead and patch
> the core.

Is there an existing debugfs file that you think would be appropriate to
allow the state of a hog to be changed?
 
> > I can assign pinctrl states in the pin controller DT node which has
> > compatible pinctrl-single (line 301 arch/arm/boot/dts/am33xx-l4.dtsi):
> >
> > _pinmux {
> >
> > pinctrl-names = "default", "gpio", "pwm";
> > pinctrl-0 =   < _03_default_pin _34_default_pin 
> > _19_default_pin _24_default_pin
> > _33_default_pin _22_default_pin 
> > _18_default_pin _10_default_pin
> > _06_default_pin _04_default_pin 
> > _02_default_pin _08_default_pin
> > _17_default_pin >;
> > pinctrl-1 =   < _03_gpio_pin _34_gpio_pin _19_gpio_pin 
> > _24_gpio_pin
> > _33_gpio_pin _22_gpio_pin _18_gpio_pin 
> > _10_gpio_pin
> > _06_gpio_pin _04_gpio_pin _02_gpio_pin 
> > _08_gpio_pin
> > _17_gpio_pin >;
> > pinctrl-2 =   < _03_pwm _34_pwm _19_pwm _24_pwm
> > _33_pwm _22_pwm _18_pwm _10_pwm
> > _06_pwm _04_pwm _02_pwm _08_pwm
> > _17_pwm >;
> >
> > }
> >
> > However, there is no way to later select "gpio" for P2.03 and select
> > "pwm" for P1.34 at the same time.  Thus, I can not figure out a way to
> > select independent states per pin unless I make a node for each pin that
> > binds to a helper driver.
> >
> > It feels like there may be a simpler soluation but I can't see to figure
> > it out.  Suggestions welcome!
> 
> I think maybe there is no solution because you are solving a problem
> that only pinctrl-single while trying to stay generic? The single
> driver is special in that it requires all states of pins to be encoded
> into the device tree, but for debugging that is kind of unfriendly
> which was mentioned in its inception. For deep debugging it is good
> to let the core know of all available functions and groups and
> single does not IIUC.
> 
> Yours,
> Linus Walleij

I discussed my use case and this patch on #armlinux earlier this week
and Alexandre Belloni suggested looking at the pinmux-pins debugfs file.

This made me think that a possible solution could be to define a store
function for pinmux-pins to handle something like " ".
I believe the ability to activate a pin function (or pin group) from
userspace would satisfy our beagleboard.org use-case.

Does that seem like a reasonable approach?

Thank you!
Drew



Re: [PATCH 0/2] page_count can't be used to decide when wp_page_copy

2021-01-08 Thread Andrea Arcangeli
Hello Jason,

On Fri, Jan 08, 2021 at 08:42:55PM -0400, Jason Gunthorpe wrote:
> There is already a patch series floating about to do exactly that for
> FOLL_LONGTERM pins based on the existing code in GUP for CMA migration

Sounds great.

> The ship sailed on this a decade ago, it is completely infeasible to
> go back now, it would completely break widely used things like GPU,
> RDMA and more.

For all those that aren't using mmu notifier and that rely solely on
page pins, they still require privilege, except they do through /dev/
permissions.

Just the fact there's no capability check in the read/write/ioctl
doesn't mean those device inodes can be opened any luser: the fact the
kernel allows it, doesn't mean the /dev/ permission does too. The same
applies to /dev/kvm too, not just PCI device drivers.

Device drivers that you need to open in /dev/ before you can take a
GUP pin require whole different checks than syscalls like vmsplice and
io_uring that are universally available.

The very same GUP long term pinning kernel code can be perfectly safe
to use without any permission check for a device driver of an iommu in
/dev/, but completely unsafe for a syscall.

> If we want to have a high speed copy_from_user like thing that is not
> based on page pins but on mmu notifiers, then we should make that
> infrastructure and the various places that need it should use common
> code. At least vhost and io_uring are good candidates.

Actually the mmu notifier doesn't strictly require pins, it only
requires GUP. All users tend to use FOLL_GET just as a safety
precaution (I already tried to optimize away the two atomics per GUP,
but we were naked by the KVM maintainer that didn't want to take the
risk, I would have, but it's a fair point indeed, obviously it's safer
with the pin plus the mmu notifier, two is safer than one).

I'm not sure how any copy-user could obviate a secondary MMU mapping,
mappings and copies are mutually exclusive. Any copy would be breaking
memory coherency in this environment.

> Otherwise, we are pretending that they are DMA and using the DMA
> centric pin_user_pages() interface, which we still have to support and
> make work.

vhost and io_uring would be pure software constructs, but there are
hardware users of the GUP pin that don't use any DMA.

The long term GUP pin is not only about PCI devices doing DMA. KVM is
not ever using any DMA, despite it takes terabytes worth of very long
term GUP pins.

> > In any case, the extra flags required in FOLL_LONGTERM should be
> > implied by FOLL_LONGTERM itself, once it enters the gup code, because
> > it's not cool having to FOLL_WRITE in all drivers for a GUP(write=0),
> > let alone having to specify FOLL_FORCE for just a read. But this is
> > going offtopic.
> 
> We really should revise this, I've been thinking for a while we need
> to internalize into gup.c the FOLL_FORCE|FOLL_WRITE|FOLL_LONGTERM
> idiom at least..

100% agreed.

> > > simply because it is using the CPU to memory copy as its "DMA".
> > 
> > vmsplice can't find all put_pages that may release the pages when the
> > pipe is read, or it'd be at least be able to do the unreliable
> > RLIMIT_MEMLOCK accounting.
> 
> Yikes! So it can't even use pin_user_pages FOLL_LONGTERM properly
> because that requires unpin_user_pages(), which means finding all the
> unpin sites too :\

Exactly.

> > To make another example a single unprivileged pin on the movable zone,
> > can break memhotunplug unless you use the mmu notifier. Every other
> > advanced feature falls apart.
> 
> As above FOLL_LONGTERM will someday migrate from movable zones.

Something like:

1) migrate from movable zones contextually to GUP

2) be accounted on the compound_order not on the number of GUP
   (io_uring needs fixing here)

3) maybe account not only in rlimit, but also expose the total worth
   of GUP pins in page_order units (not pins) to the OOM killer to be
   added to the rss (will double count though).

Maybe 3 is overkill but without it, OOM killer won't even see those
GUP pin coming, so if not done it's still kind of unsafe, if done
it'll risk double count.

Even then a GUP pin, still prevents optimization, it can't converge in
the right NUMA node the io ring just to make an example, but that's a
secondary performance concern.

The primary concern with the mmu notifier in io_uring is the
take_all_locks latency.

Longlived apps like qemu would be fine with mmu notifier. The main
question is also if there's any short-lived latency io_uring
usage... that wouldn't fly with take_all_locks.

The problem with the mmu notifier as an universal solution, for
example is that it can't wait for I/O completion of O_DIRECT since it
has no clue where the put_page is to wait for it, otherwise we could
avoid even the FOLL_GET for O_DIRECT and guarantee the I/O has to be
completed before paging or anything can unmap the page under I/O from
the pagetable.

Even if we could reliably identify all the put_page of transient pins

Re: [RFC 1/2] dt-bindings: clk: versaclock5: Add load capacitance properties

2021-01-08 Thread Adam Ford
On Fri, Jan 8, 2021 at 4:49 PM Luca Ceresoli  wrote:
>
> Hi Adam,
>
> On 06/01/21 18:38, Adam Ford wrote:
> > There are two registers which can set the load capacitance for
> > XTAL1 and XTAL2. These are optional registers when using an
> > external crystal.  Update the bindings to support them.
> >
> > Signed-off-by: Adam Ford 
> > ---
> >  .../devicetree/bindings/clock/idt,versaclock5.yaml   | 12 
> >  1 file changed, 12 insertions(+)
> >
> > diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml 
> > b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
> > index 2ac1131fd922..e5e55ffb266e 100644
> > --- a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
> > +++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
> > @@ -59,6 +59,18 @@ properties:
> >  minItems: 1
> >  maxItems: 2
> >
> > +  idt,xtal1-load-femtofarads:
>
> I wonder whether we should have a common, vendor independent property.

That would be nice.

> In mainline we have xtal-load-pf (ti,cdce925.txt bindings) which has no
> vendor prefix. However I don't know how much common it is to need

rtc-pcf85063.c uses  quartz-load-femtofarads, so there is already some
discrepancy.

Since the unit of measure here is femtofarads, using pF in the name seems wrong.
We need to read the data as a u32, so femtofarads works better than
pF, which would require a decimal point.

> different loads for x1 and x2. Any hardware engineer around?

I talked to a hardware engineer where I work, and he said it makes
sense to keep them the same.  I only separated them because there are
two registers, and I assumed there might be a reason to have X1 and X2
be different, but I'm ok with reading one value and writing it to two
different registers.

adam
>
> > +$ref: /schemas/types.yaml#/definitions/uint32
> > +minimum: 9000
> > +maximum: 25000
> > +description: Optional loading capacitor for XTAL1
>
> Nit: I think the common wording is "load capacitor", not "loading
> capacitor".
>
> --
> Luca


Re: ipmi_msghandler.c question

2021-01-08 Thread Corey Minyard
On Fri, Jan 08, 2021 at 11:37:04PM +, Asmaa Mnebhi wrote:
> Hi Corey,
> 
> I have a question for you related to the following function in 
> ipmi_msghandler.c
> 
> static void __get_guid(struct ipmi_smi *intf)
> {
>   int rv;
>   struct bmc_device *bmc = intf->bmc;
> 
>   bmc->dyn_guid_set = 2;
>   intf->null_user_handler = guid_handler;
>   rv = send_guid_cmd(intf, 0);
>   if (rv)
>   /* Send failed, no GUID available. */
>   bmc->dyn_guid_set = 0;
>   else
>   wait_event(intf->waitq, bmc->dyn_guid_set != 2);
> 
>   /* dyn_guid_set makes the guid data available. */
>   smp_rmb();
> 
>   intf->null_user_handler = NULL;
> }
> 
> Why is wait_event used as opposed to wait_event_timeout? In the context where 
> the dyn_guid_set value doesn't change from 2, this would run forever. 
> Wouldn't we want to timeout after a certain amount of time?
> 

The low-level IPMI driver is guarateed to return a response to a
message, though if something goes wrong with the BMC it can take a few
seconds to return the failure message.  So it shouldn't be an issue.

-corey

> Thanks.
> Asmaa


[rcu:dev.2021.01.06b] BUILD SUCCESS 1f4548c8c38751b4e54ce84b7773444b8a88c1d3

2021-01-08 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git  
dev.2021.01.06b
branch HEAD: 1f4548c8c38751b4e54ce84b7773444b8a88c1d3  squash! x86/mce: Make 
mce_timed_out() identify holdout CPUs

elapsed time: 1486m

configs tested: 97
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm defconfig
arm64allyesconfig
arm64   defconfig
arm  allyesconfig
arm  allmodconfig
mips   mtx1_defconfig
s390 alldefconfig
arm   h5000_defconfig
c6xevmc6474_defconfig
openriscdefconfig
m68k   sun3_defconfig
armspear3xx_defconfig
arm   aspeed_g4_defconfig
nios2alldefconfig
arm vf610m4_defconfig
arm  badge4_defconfig
armneponset_defconfig
arm   imx_v4_v5_defconfig
sh ap325rxa_defconfig
m68k amcore_defconfig
ia64 alldefconfig
shsh7785lcr_defconfig
nds32alldefconfig
sh microdev_defconfig
powerpc  arches_defconfig
nios2 3c120_defconfig
mips bigsur_defconfig
powerpc  storcenter_defconfig
powerpcmpc7448_hpc2_defconfig
m68km5272c3_defconfig
sh ecovec24_defconfig
ia64 allmodconfig
ia64defconfig
ia64 allyesconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
nds32   defconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
nios2allyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
parisc   allyesconfig
s390defconfig
nios2   defconfig
arc  allyesconfig
nds32 allnoconfig
c6x  allyesconfig
i386 allyesconfig
sparcallyesconfig
sparc   defconfig
i386   tinyconfig
i386defconfig
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a006-20210108
x86_64   randconfig-a004-20210108
x86_64   randconfig-a006-20210108
x86_64   randconfig-a001-20210108
x86_64   randconfig-a002-20210108
x86_64   randconfig-a003-20210108
x86_64   randconfig-a005-20210108
i386 randconfig-a016-20210108
i386 randconfig-a011-20210108
i386 randconfig-a014-20210108
i386 randconfig-a015-20210108
i386 randconfig-a013-20210108
i386 randconfig-a012-20210108
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  rhel-8.3-kbuiltin
x86_64  kexec
x86_64   rhel

clang tested configs:
x86_64   randconfig-a013-20210108
x86_64   randconfig-a011-20210108
x86_64   randconfig-a012-20210108
x86_64   randconfig-a016-20210108
x86_64   randconfig-a014-20210108
x86_64   randconfig-a015-20210108

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


Re: [PATCH] drm/amdkfd: Fix out-of-bounds read in kdf_create_vcrat_image_cpu()

2021-01-08 Thread Jeremy Cline
On Fri, Jan 08, 2021 at 06:46:17PM -0500, Felix Kuehling wrote:
> Am 2021-01-08 um 11:31 a.m. schrieb Jeremy Cline:
> > KASAN reported a slab-out-of-bounds read of size 1 in
> > kdf_create_vcrat_image_cpu().
> >
> > This occurs when, for example, when on an x86_64 with a single NUMA node
> > because kfd_fill_iolink_info_for_cpu() is a no-op, but afterwards the
> > sub_type_hdr->length, which is out-of-bounds, is read and multiplied by
> > entries. Fortunately, entries is 0 in this case so the overall
> > crat_table->length is still correct.
> 
> That's a pretty big change to fix that. Wouldn't it be enough to add a
> simple check after calling kfd_fill_iolink_info_for_cpu:
> 
> if (entries) {
>   crat_table->length += (sub_type_hdr->length * entries);
>   crat_table->total_entries += entries;
> }
> 
> Or change the output parameters of the kfd_fill_..._for_cpu functions
> from num_entries to size_filled, so the caller doesn't need to read
> sub_type_hdr->length any more.
> 

For sure. I felt like this was a bit tidier afterwards, but that's an
opinion and not one I hold strongly. I'll look at preparing a smaller fix
next week.

Thanks,
Jeremy

> >
> > This refactors the helper functions to accept the crat_table directly
> > and calculate the table entry pointer based on the current table length.
> > This allows us to avoid an out-of-bounds read and hopefully makes the
> > pointer arithmetic clearer. It should have no functional change beyond
> > removing the out-of-bounds read.
> >
> > Fixes: b7b6c38529c9 ("drm/amdkfd: Calculate CPU VCRAT size dynamically 
> > (v2)")
> > Signed-off-by: Jeremy Cline 
> > ---
> >  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 86 +--
> >  1 file changed, 40 insertions(+), 46 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
> > b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > index 8cac497c2c45..e50db2c0f4ee 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > @@ -829,21 +829,24 @@ int kfd_create_crat_image_acpi(void **crat_image, 
> > size_t *size)
> >  /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
> >   *
> >   * @numa_node_id: CPU NUMA node id
> > - * @avail_size: Available size in the memory
> > - * @sub_type_hdr: Memory into which compute info will be filled in
> > + * @avail_size: Available space in bytes at the end of the @crat_table.
> > + * @crat_table: The CRAT table to append the Compute info to;
> > + * on success the table length and total_entries count is updated.
> >   *
> >   * Return 0 if successful else return -ve value
> >   */
> >  static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
> > -   int proximity_domain,
> > -   struct crat_subtype_computeunit *sub_type_hdr)
> > +   struct crat_header *crat_table)
> >  {
> > const struct cpumask *cpumask;
> > +   struct crat_subtype_computeunit *sub_type_hdr;
> >  
> > *avail_size -= sizeof(struct crat_subtype_computeunit);
> > if (*avail_size < 0)
> > return -ENOMEM;
> >  
> > +   sub_type_hdr = (typeof(sub_type_hdr))((char *)crat_table +
> > +   crat_table->length);
> > memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
> >  
> > /* Fill in subtype header data */
> > @@ -855,36 +858,42 @@ static int kfd_fill_cu_for_cpu(int numa_node_id, int 
> > *avail_size,
> >  
> > /* Fill in CU data */
> > sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;
> > -   sub_type_hdr->proximity_domain = proximity_domain;
> > +   sub_type_hdr->proximity_domain = crat_table->num_domains;
> > sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);
> > if (sub_type_hdr->processor_id_low == -1)
> > return -EINVAL;
> >  
> > sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);
> >  
> > +   crat_table->length += sub_type_hdr->length;
> > +   crat_table->total_entries++;
> > +
> > return 0;
> >  }
> >  
> >  /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA 
> > node
> >   *
> >   * @numa_node_id: CPU NUMA node id
> > - * @avail_size: Available size in the memory
> > - * @sub_type_hdr: Memory into which compute info will be filled in
> > + * @avail_size: Available space in bytes at the end of the @crat_table.
> > + * @crat_table: The CRAT table to append the Memory info to;
> > + * on success the table length and total_entries count is updated.
> >   *
> >   * Return 0 if successful else return -ve value
> >   */
> >  static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
> > -   int proximity_domain,
> > -   struct crat_subtype_memory *sub_type_hdr)
> > +   struct crat_header *crat_table)
> >  {
> > uint64_t mem_in_bytes = 0;
> > pg_data_t *pgdat;
> > int zone_type;
> > +   struct crat_subtype_memory 

Re: [PATCH] mm/memcontrol: fix warning in mem_cgroup_page_lruvec()

2021-01-08 Thread Hugh Dickins
On Thu, 7 Jan 2021, Vlastimil Babka wrote:
> On 1/4/21 6:03 AM, Hugh Dickins wrote:
> > Boot a CONFIG_MEMCG=y kernel with "cgroup_disabled=memory" and you are
> > met by a series of warnings from the VM_WARN_ON_ONCE_PAGE(!memcg, page)
> > recently added to the inline mem_cgroup_page_lruvec().
> > 
> > An earlier attempt to place that warning, in mem_cgroup_lruvec(), had
> > been careful to do so after weeding out the mem_cgroup_disabled() case;
> > but was itself invalid because of the mem_cgroup_lruvec(NULL, pgdat) in
> > clear_pgdat_congested() and age_active_anon().
> > 
> > Warning in mem_cgroup_page_lruvec() was once useful in detecting a KSM
> > charge bug, so may be worth keeping: but skip if mem_cgroup_disabled().
> > 
> > Fixes: 9a1ac2288cf1 ("mm/memcontrol:rewrite mem_cgroup_page_lruvec()")
> > Signed-off-by: Hugh Dickins 
> 
> Acked-by: Vlastimil Babka 

Thanks.

> 
> > ---
> > 
> >  include/linux/memcontrol.h |2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > --- 5.11-rc2/include/linux/memcontrol.h 2020-12-27 20:39:36.751923135 
> > -0800
> > +++ linux/include/linux/memcontrol.h2021-01-03 19:38:24.822978559 
> > -0800
> > @@ -665,7 +665,7 @@ static inline struct lruvec *mem_cgroup_
> >  {
> > struct mem_cgroup *memcg = page_memcg(page);
> >  
> > -   VM_WARN_ON_ONCE_PAGE(!memcg, page);
> > +   VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page);
> 
> Nit: I would reverse the order of conditions as mem_cgroup_disabled() is 
> either
> "return true" or a static key. Not that it matters too much on DEBUG_VM 
> configs...

tl;dr I'm going to leave the patch as is.

You are certainly right that I was forgetting the static-key-ness of
mem_cgroup_disabled() when I put the tests that way round: I was thinking
of the already-in-a-register-ness of "memcg"; but had also not realized
that page_memcg() just did an "&", so condition bits nicely set already.

And I think you are right in principle, that the tests should be better
the way you suggest, when static key is in use - in the (unusual)
mem_cgroup_disabled() case, though not in the usual enabled case.

I refuse to confess how many hours I've spent poring over "objdump -ld"s
of lock_page_lruvec_irqsave(), and comparing with how it is patched when
the kernel is booted with "cgroup_disable=memory".

But I have seen builds where my way round worked out better than yours,
for both the enabled and disabled cases (SUSE gcc 9.3.1 was good, in
the config I was trying on it); and builds where disabled was treated
rather poorly my way (with external call to mem_cgroup_disabled() from
lock_page_lruvec() and lock_page_lruvec_irqsave(), but inlined into
lock_page_lruvec_irq() - go figure! - with SUSE gcc 10.2.1).

I suspect a lot depends on what inlining is done, and on that prior
page_memcg() doing its "&", and the second mem_cgroup_disabled() which
follows immediately in mem_cgroup_lruvec(): different compilers will
make different choices, favouring one or the other ordering.

I've grown rather tired of it all (and discovered on the way that
static keys depend on CONFIG_JUMP_LABEL=y, which I didn't have in
a config I've carried forward through "make oldconfig"s for years -
thanks); but not found a decisive reason to change the patch.

Hugh

> 
> > return mem_cgroup_lruvec(memcg, pgdat);
> >  }
> >  
> > 


Re: [PATCH 1/1] mm/madvise: replace ptrace attach requirement for process_madvise

2021-01-08 Thread Suren Baghdasaryan
On Fri, Jan 8, 2021 at 5:02 PM David Rientjes  wrote:
>
> On Fri, 8 Jan 2021, Suren Baghdasaryan wrote:
>
> > > > @@ -1197,12 +1197,22 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, 
> > > > const struct iovec __user *, vec,
> > > >   goto release_task;
> > > >   }
> > > >
> > > > - mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
> > > > + /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
> > > > + mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
> > > >   if (IS_ERR_OR_NULL(mm)) {
> > > >   ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
> > > >   goto release_task;
> > > >   }
> > > >
> > > > + /*
> > > > +  * Require CAP_SYS_NICE for influencing process performance. Note 
> > > > that
> > > > +  * only non-destructive hints are currently supported.
> > > > +  */
> > > > + if (!capable(CAP_SYS_NICE)) {
> > > > + ret = -EPERM;
> > > > + goto release_task;
> > >
> > > mmput?
> >
> > Ouch! Thanks for pointing it out! Will include in the next respin.
> >
>
> With the fix, feel free to add:
>
> Acked-by: David Rientjes 

Thanks! Will post a new version with the fix on Monday.

>
> Thanks Suren!


[PATCH] perf/core: Mark perf_pmu_snapshot_aux() as static

2021-01-08 Thread Souptick Joarder
Kernel test robot throws below warning ->

 kernel/events/core.c:6535:6: warning: no previous prototype for
'perf_pmu_snapshot_aux' [-Wmissing-prototypes]
6535 | long perf_pmu_snapshot_aux(struct perf_buffer *rb,
 |  ^
Marking perf_pmu_snapshot_aux() as static as it is not used outside
this file.

Reported-by: kernel test robot 
Signed-off-by: Souptick Joarder 
---
 kernel/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 55d1879..a4ba6fd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6532,7 +6532,7 @@ static unsigned long perf_prepare_sample_aux(struct 
perf_event *event,
return data->aux_size;
 }
 
-long perf_pmu_snapshot_aux(struct perf_buffer *rb,
+static long perf_pmu_snapshot_aux(struct perf_buffer *rb,
   struct perf_event *event,
   struct perf_output_handle *handle,
   unsigned long size)
-- 
1.9.1



[PATCH 3/3] regulator: mt6323: Add OF match table

2021-01-08 Thread matthias . bgg
From: Matthias Brugger 

The binding documentation mentions that a compatible is required for the
MT6323 device node. But the driver does not provide a OF match table.
This way auto-loading is broken as the MFD driver that registers the
device has a .of_compatible set which makes the platform .uevent
callback report a OF modalias, but that's not in the module.

Signed-off-by: Matthias Brugger 

---

 drivers/regulator/mt6323-regulator.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/regulator/mt6323-regulator.c 
b/drivers/regulator/mt6323-regulator.c
index ff9016170db3..b3d84e95f051 100644
--- a/drivers/regulator/mt6323-regulator.c
+++ b/drivers/regulator/mt6323-regulator.c
@@ -406,9 +406,16 @@ static const struct platform_device_id 
mt6323_platform_ids[] = {
 };
 MODULE_DEVICE_TABLE(platform, mt6323_platform_ids);
 
+static const struct of_device_id mt6323_of_match[] = {
+   { .compatible = "mediatek,mt6323-regulator", },
+   { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, mt6323_of_match);
+
 static struct platform_driver mt6323_regulator_driver = {
.driver = {
.name = "mt6323-regulator",
+   .of_match_table = of_match_ptr(mt6323_of_match),
},
.probe = mt6323_regulator_probe,
.id_table = mt6323_platform_ids,
-- 
2.29.2



[PATCH 2/3] regulator: mt6358: Add OF match table

2021-01-08 Thread matthias . bgg
From: Matthias Brugger 

The binding documentation mentions that a compatible is required for the
MT6358 device node. But the driver does not provide a OF match table.
This way auto-loading is broken as the MFD driver that registers the
device has a .of_compatible set which makes the platform .uevent
callback report a OF modalias, but that's not in the module.

Signed-off-by: Matthias Brugger 
---

 drivers/regulator/mt6358-regulator.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/regulator/mt6358-regulator.c 
b/drivers/regulator/mt6358-regulator.c
index 13cb6ac9a892..6cadb6d4dfe4 100644
--- a/drivers/regulator/mt6358-regulator.c
+++ b/drivers/regulator/mt6358-regulator.c
@@ -534,9 +534,16 @@ static const struct platform_device_id 
mt6358_platform_ids[] = {
 };
 MODULE_DEVICE_TABLE(platform, mt6358_platform_ids);
 
+static const struct of_device_id mt6358_of_match[] = {
+   { .compatible = "mediatek,mt6358-regulator", },
+   { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, mt6358_of_match);
+
 static struct platform_driver mt6358_regulator_driver = {
.driver = {
.name = "mt6358-regulator",
+   .of_match_table = of_match_ptr(mt6358_of_match),
},
.probe = mt6358_regulator_probe,
.id_table = mt6358_platform_ids,
-- 
2.29.2



[PATCH 1/3] regulator: mt6360: Add OF match table

2021-01-08 Thread matthias . bgg
From: Matthias Brugger 

Binding documentation mentions that a compatible is required for the
MT6360 device node, but the driver doesn't provide a OF match table.

Signed-off-by: Matthias Brugger 
---

 drivers/regulator/mt6360-regulator.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/regulator/mt6360-regulator.c 
b/drivers/regulator/mt6360-regulator.c
index 15308ee29c13..07cbb9bb3c09 100644
--- a/drivers/regulator/mt6360-regulator.c
+++ b/drivers/regulator/mt6360-regulator.c
@@ -445,9 +445,16 @@ static const struct platform_device_id 
mt6360_regulator_id_table[] = {
 };
 MODULE_DEVICE_TABLE(platform, mt6360_regulator_id_table);
 
+static const struct of_device_id mt6360_of_match[] = {
+   { .compatible = "mediatek,mt6360-regulator", },
+   { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, mt6360_of_match);
+
 static struct platform_driver mt6360_regulator_driver = {
.driver = {
.name = "mt6360-regulator",
+   .of_match_table = of_match_ptr(mt6360_of_match),
},
.probe = mt6360_regulator_probe,
.id_table = mt6360_regulator_id_table,
-- 
2.29.2



Re: [PATCH v5 00/15] Add functionality to ipu3-cio2 driver allowing software_node connections to sensors on platforms designed for Windows

2021-01-08 Thread Laurent Pinchart
Hi Greg,

On Thu, Jan 07, 2021 at 01:28:23PM +, Daniel Scally wrote:
> 
> Hello all
> 
> v4:
> https://lore.kernel.org/linux-media/20210103231235.792999-1-djrsca...@gmail.com/T/#m11b7cb977e1b73fba1e625c3d6a189e2943a7783
> v3:
> https://lore.kernel.org/linux-media/20201224010907.263125-1-djrsca...@gmail.com/T/#m37b831bb2b406917d6db5da9acf9ed35df65d72d
> v2:
> https://lore.kernel.org/linux-media/20201217234337.1983732-1-djrsca...@gmail.com/T/#md93fd090009b42a6a98aed892aff0d38cf07e0cd
> v1:
> https://lore.kernel.org/linux-media/20201130133129.1024662-1-djrsca...@gmail.com/T/#m91934e12e3d033da2e768e952ea3b4a125ee3e67
> 
> This series is to start adding support for webcams on laptops with ACPI tables
> designed for use with CIO2 on Windows. This series extends the ipu3-cio2
> driver to allow for patching the firmware via software_nodes if endpoints
> aren't defined by ACPI.
> 
> I'm hopeful that most or all of this series could get picked up for 5.12.
> We touch a few different areas (listed below), but I think the easiest
> approach would be to merge everything through media tree. Rafael, Greg,
> Mauro and Sergey; are you ok with that plan, or would you prefer a
> different approach? Mauro; if that plan is ok (and of course assuming that
> the rest of the patches are acked by their respective maintainers) could
> we get a dedicated feature branch just in case the following series ends
> up being ready in time too?
> 
> lib
>   lib/test_printf.c: Use helper function to unwind array of
> software_nodes
> 
> base
>   software_node: Fix refcounts in software_node_get_next_child()
>   property: Return true in fwnode_device_is_available for NULL ops
>   property: Call fwnode_graph_get_endpoint_by_id() for fwnode->secondary
>   software_node: Enforce parent before child ordering of nodes arrays
>   software_node: unregister software_nodes in reverse order

Could you please let us know with an Acked-by if these patches can be
merged through the linux-media tree for v5.12 ? This is a cross-tree
series and we would like to avoid topic branches if possible.

>   include: fwnode.h: Define format macros for ports and endpoints
> 
> acpi
>   acpi: Add acpi_dev_get_next_match_dev() and helper macro
> 
> media
>   media: v4l2-core: v4l2-async: Check sd->fwnode->secondary in
> match_fwnode()
>   ipu3-cio2: Add T: entry to MAINTAINERS
>   ipu3-cio2: Rename ipu3-cio2.c
>   ipu3-cio2: Add cio2-bridge to ipu3-cio2 driver
>   include: media: v4l2-fwnode: Include v4l2_fwnode_bus_type
> 
> Series-level changelog:
>   - Rebased onto 5.11-rc1
> 
> Thanks
> Dan
> 
> Andy Shevchenko (1):
>   media: ipu3-cio2: Add headers that ipu3-cio2.h is direct user of
> 
> Daniel Scally (13):
>   software_node: Fix refcounts in software_node_get_next_child()
>   device property: Return true in fwnode_device_is_available for NULL
> ops
>   device property: Call fwnode_graph_get_endpoint_by_id() for
> fwnode->secondary
>   software_node: Enforce parent before child ordering of nodes arrays
>   software_node: unregister software_nodes in reverse order
>   device property: Define format macros for ports and endpoints
>   lib/test_printf.c: Use helper function to unwind array of
> software_nodes
>   ipu3-cio2: Add T: entry to MAINTAINERS
>   ipu3-cio2: Rename ipu3-cio2.c
>   media: v4l2-core: v4l2-async: Check sd->fwnode->secondary in
> match_fwnode()
>   ACPI / bus: Add acpi_dev_get_next_match_dev() and helper macro
>   media: v4l2-fwnode: Include v4l2_fwnode_bus_type
>   ipu3-cio2: Add cio2-bridge to ipu3-cio2 driver
> 
> Heikki Krogerus (1):
>   software_node: Add support for fwnode_graph*() family of functions
> 
>  MAINTAINERS   |   2 +
>  drivers/acpi/utils.c  |  30 +-
>  drivers/base/property.c   |  15 +-
>  drivers/base/swnode.c | 180 --
>  drivers/media/pci/intel/ipu3/Kconfig  |  18 +
>  drivers/media/pci/intel/ipu3/Makefile |   3 +
>  drivers/media/pci/intel/ipu3/cio2-bridge.c| 311 ++
>  drivers/media/pci/intel/ipu3/cio2-bridge.h| 125 +++
>  .../ipu3/{ipu3-cio2.c => ipu3-cio2-main.c}|  34 ++
>  drivers/media/pci/intel/ipu3/ipu3-cio2.h  |  24 ++
>  drivers/media/v4l2-core/v4l2-async.c  |   8 +
>  drivers/media/v4l2-core/v4l2-fwnode.c |  11 -
>  include/acpi/acpi_bus.h   |   7 +
>  include/linux/fwnode.h|   7 +
>  include/media/v4l2-fwnode.h   |  22 ++
>  lib/test_printf.c |   4 +-
>  16 files changed, 763 insertions(+), 38 deletions(-)
>  create mode 100644 drivers/media/pci/intel/ipu3/cio2-bridge.c
>  create mode 100644 drivers/media/pci/intel/ipu3/cio2-bridge.h
>  rename drivers/media/pci/intel/ipu3/{ipu3-cio2.c => ipu3-cio2-main.c} (98%)

-- 
Regards,

Laurent Pinchart


[RFC PATCH 5/8] entry: Explicitly flush pending rcuog wakeup before last rescheduling points

2021-01-08 Thread Frederic Weisbecker
Following the idle loop model, cleanly check for pending rcuog wakeup
before the last rescheduling point on resuming to user mode. This
way we can avoid to do it from rcu_user_enter() with the last resort
self-IPI hack that enforces rescheduling.

Signed-off-by: Frederic Weisbecker 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ingo Molnar
Cc: Paul E. McKenney 
Cc: Rafael J. Wysocki 
---
 kernel/entry/common.c |  6 ++
 kernel/rcu/tree.c | 12 +++-
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 378341642f94..8f3292b5f9b7 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -178,6 +178,9 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs 
*regs,
/* Architecture specific TIF work */
arch_exit_to_user_mode_work(regs, ti_work);
 
+   /* Check if any of the above work has queued a deferred wakeup 
*/
+   rcu_nocb_flush_deferred_wakeup();
+
/*
 * Disable interrupts and reevaluate the work flags as they
 * might have changed while interrupts and preemption was
@@ -197,6 +200,9 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs)
 
lockdep_assert_irqs_disabled();
 
+   /* Flush pending rcuog wakeup before the last need_resched() check */
+   rcu_nocb_flush_deferred_wakeup();
+
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
ti_work = exit_to_user_mode_loop(regs, ti_work);
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2920dfc9f58c..3c4c0d5cea65 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -705,12 +705,14 @@ noinstr void rcu_user_enter(void)
 
lockdep_assert_irqs_disabled();
/*
-* We may be past the last rescheduling opportunity in the entry code.
-* Trigger a self IPI that will fire and reschedule once we resume to
-* user/guest mode.
+* Other than generic entry implementation, we may be past the last
+* rescheduling opportunity in the entry code. Trigger a self IPI
+* that will fire and reschedule once we resume in user/guest mode.
 */
-   if (do_nocb_deferred_wakeup(rdp) && need_resched())
-   irq_work_queue(this_cpu_ptr(_wakeup_work));
+   if (!IS_ENABLED(CONFIG_GENERIC_ENTRY) || (current->flags & PF_VCPU)) {
+   if (do_nocb_deferred_wakeup(rdp) && need_resched())
+   irq_work_queue(this_cpu_ptr(_wakeup_work));
+   }
 
rcu_eqs_enter(true);
 }
-- 
2.25.1



[RFC PATCH 7/8] entry: Report local wake up on resched blind zone while resuming to user

2021-01-08 Thread Frederic Weisbecker
The last rescheduling opportunity while resuming to user is in
exit_to_user_mode_loop(). This means that any wake up performed on
the local runqueue after this point is going to have its rescheduling
silently ignored.

Perform sanity checks to report these situations.

Signed-off-by: Frederic Weisbecker 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ingo Molnar
Cc: Paul E. McKenney 
Cc: Rafael J. Wysocki 
---
 kernel/entry/common.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 8f3292b5f9b7..1dfb97762336 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -5,6 +5,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "common.h"
 
@@ -23,6 +24,8 @@ static __always_inline void __enter_from_user_mode(struct 
pt_regs *regs)
instrumentation_begin();
trace_hardirqs_off_finish();
instrumentation_end();
+
+   sched_resched_local_allow();
 }
 
 void noinstr enter_from_user_mode(struct pt_regs *regs)
@@ -206,6 +209,7 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs)
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
ti_work = exit_to_user_mode_loop(regs, ti_work);
 
+   sched_resched_local_forbid();
arch_exit_to_user_mode_prepare(regs, ti_work);
 
/* Ensure that the address limit is intact and no locks are held */
-- 
2.25.1



[RFC PATCH 8/8] timer: Report ignored local enqueue in nohz mode

2021-01-08 Thread Frederic Weisbecker
Enqueuing a local timer after the tick has been stopped will result in
the timer being ignored until the next random interrupt.

Perform sanity checks to report these situations.

Signed-off-by: Frederic Weisbecker 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ingo Molnar
Cc: Paul E. McKenney 
Cc: Rafael J. Wysocki 
---
 kernel/sched/core.c | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6056f0374674..6c8b04272a9a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -715,6 +715,26 @@ int get_nohz_timer_target(void)
return cpu;
 }
 
+static void wake_idle_assert_possible(void)
+{
+#ifdef CONFIG_SCHED_DEBUG
+   /* Timers are re-evaluated after idle IRQs */
+   if (in_hardirq())
+   return;
+   /*
+* Same as hardirqs, assuming they are executing
+* on IRQ tail. Ksoftirqd shouldn't reach here
+* as the timer base wouldn't be idle. And inline
+* softirq processing after a call to local_bh_enable()
+* within idle loop sound too fun to be considered here.
+*/
+   if (in_serving_softirq())
+   return;
+
+   WARN_ON_ONCE("Late timer enqueue may be ignored\n");
+#endif
+}
+
 /*
  * When add_timer_on() enqueues a timer into the timer wheel of an
  * idle CPU then this timer might expire before the next timer event
@@ -729,8 +749,10 @@ static void wake_up_idle_cpu(int cpu)
 {
struct rq *rq = cpu_rq(cpu);
 
-   if (cpu == smp_processor_id())
+   if (cpu == smp_processor_id()) {
+   wake_idle_assert_possible();
return;
+   }
 
if (set_nr_and_not_polling(rq->idle))
smp_send_reschedule(cpu);
-- 
2.25.1



[RFC PATCH 6/8] sched: Report local wake up on resched blind zone within idle loop

2021-01-08 Thread Frederic Weisbecker
The idle loop has several need_resched() checks that make sure we don't
miss a rescheduling request. This means that any wake up performed on
the local runqueue after the last generic need_resched() check is going
to have its rescheduling silently ignored. This has happened in the
past with rcu kthreads awaken from rcu_idle_enter() for example.

Perform sanity checks to report these situations.

Signed-off-by: Frederic Weisbecker 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ingo Molnar
Cc: Paul E. McKenney 
Cc: Rafael J. Wysocki 
---
 include/linux/sched.h | 11 +++
 kernel/sched/core.c   | 42 ++
 kernel/sched/idle.c   |  3 +++
 kernel/sched/sched.h  |  3 +++
 4 files changed, 59 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6e3a5eeec509..83fedda54943 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1917,6 +1917,17 @@ static __always_inline bool need_resched(void)
return unlikely(tif_need_resched());
 }
 
+#ifdef CONFIG_SCHED_DEBUG
+extern void sched_resched_local_allow(void);
+extern void sched_resched_local_forbid(void);
+extern void sched_resched_local_assert_allowed(void);
+#else
+static inline void sched_resched_local_allow(void) { }
+static inline void sched_resched_local_forbid(void) { }
+static inline void sched_resched_local_assert_allowed(void) { }
+#endif
+
+
 /*
  * Wrappers for p->thread_info->cpu access. No-op on UP.
  */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 15d2562118d1..6056f0374674 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -591,6 +591,44 @@ void wake_up_q(struct wake_q_head *head)
}
 }
 
+#ifdef CONFIG_SCHED_DEBUG
+void noinstr sched_resched_local_allow(void)
+{
+   this_rq()->resched_local_allow = 1;
+}
+
+void noinstr sched_resched_local_forbid(void)
+{
+   this_rq()->resched_local_allow = 0;
+}
+
+void noinstr sched_resched_local_assert_allowed(void)
+{
+   if (this_rq()->resched_local_allow)
+   return;
+
+   /*
+* Idle interrupts break the CPU from its pause and
+* rescheduling happens on idle loop exit.
+*/
+   if (in_hardirq())
+   return;
+
+   /*
+* What applies to hardirq also applies to softirq as
+* we assume they execute on hardirq tail. Ksoftirqd
+* shouldn't have resched_local_allow == 0.
+* We also assume that no local_bh_enable() call may
+* execute softirqs inline on fragile idle/entry
+* path...
+*/
+   if (in_serving_softirq())
+   return;
+
+   WARN_ONCE(1, "Late current task rescheduling may be lost\n");
+}
+#endif
+
 /*
  * resched_curr - mark rq's current task 'to be rescheduled now'.
  *
@@ -613,6 +651,7 @@ void resched_curr(struct rq *rq)
if (cpu == smp_processor_id()) {
set_tsk_need_resched(curr);
set_preempt_need_resched();
+   sched_resched_local_assert_allowed();
return;
}
 
@@ -7796,6 +7835,9 @@ void __init sched_init(void)
 #endif /* CONFIG_SMP */
hrtick_rq_init(rq);
atomic_set(>nr_iowait, 0);
+#ifdef CONFIG_SCHED_DEBUG
+   rq->resched_local_allow = 1;
+#endif
}
 
set_load_weight(_task, false);
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index b601a3aa2152..cdffd32812bd 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -185,6 +185,8 @@ static void cpuidle_idle_call(void)
return;
}
 
+   sched_resched_local_forbid();
+
/*
 * The RCU framework needs to be told that we are entering an idle
 * section, so no more rcu read side critical sections and one more
@@ -247,6 +249,7 @@ static void cpuidle_idle_call(void)
}
 
 exit_idle:
+   sched_resched_local_allow();
__current_set_polling();
 
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 12ada79d40f3..a9416c383451 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1060,6 +1060,9 @@ struct rq {
 #endif
unsigned intpush_busy;
struct cpu_stop_workpush_work;
+#ifdef CONFIG_SCHED_DEBUG
+   unsigned intresched_local_allow;
+#endif
 };
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-- 
2.25.1



Re: [PATCH v5 07/15] device property: Define format macros for ports and endpoints

2021-01-08 Thread Laurent Pinchart
Hi Rafael,

Could you please let us know with an Acked-by if this patch can be
merged through the linux-media tree for v5.12 ? The cover letter
contains additional details (in a nutshell, this is a cross-tree series
and we would like to avoid topic branches).

On Thu, Jan 07, 2021 at 01:28:30PM +, Daniel Scally wrote:
> OF, ACPI and software_nodes all implement graphs including nodes for ports
> and endpoints. These are all intended to be named with a common schema,
> as "port@n" and "endpoint@n" where n is an unsigned int representing the
> index of the node. To ensure commonality across the subsystems, provide a
> set of macros to define the format.
> 
> Suggested-by: Andy Shevchenko 
> Reviewed-by: Andy Shevchenko 
> Reviewed-by: Laurent Pinchart 
> Signed-off-by: Daniel Scally 
> ---
> Changes in v5:
> 
>   - Changed commit subject
> 
>  include/linux/fwnode.h | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
> index fde4ad97564c..77414e431e89 100644
> --- a/include/linux/fwnode.h
> +++ b/include/linux/fwnode.h
> @@ -50,6 +50,13 @@ struct fwnode_endpoint {
>   const struct fwnode_handle *local_fwnode;
>  };
>  
> +/*
> + * ports and endpoints defined as software_nodes should all follow a common
> + * naming scheme; use these macros to ensure commonality.
> + */
> +#define SWNODE_GRAPH_PORT_NAME_FMT   "port@%u"
> +#define SWNODE_GRAPH_ENDPOINT_NAME_FMT   "endpoint@%u"
> +
>  #define NR_FWNODE_REFERENCE_ARGS 8
>  
>  /**

-- 
Regards,

Laurent Pinchart


[RFC PATCH 4/8] rcu/nocb: Trigger self-IPI on late deferred wake up before user resume

2021-01-08 Thread Frederic Weisbecker
Entering RCU idle mode may cause a deferred wake up of an RCU NOCB_GP
kthread (rcuog) to be serviced.

Unfortunately the call to rcu_user_enter() is already past the last
rescheduling opportunity before we resume to userspace or to guest mode.
We may escape there with the woken task ignored.

The ultimate resort to fix every callsites is to trigger a self-IPI
(nohz_full depends on IRQ_WORK) that will trigger a reschedule on IRQ
tail or guest exit.

Eventually every site that want a saner treatment will need to carefully
place a call to rcu_nocb_flush_deferred_wakeup() before the last explicit
need_resched() check upon resume.

Reported-by: Paul E. McKenney 
Fixes: 96d3fd0d315a (rcu: Break call_rcu() deadlock involving scheduler and 
perf)
Cc: sta...@vger.kernel.org
Cc: Rafael J. Wysocki 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ingo Molnar
Signed-off-by: Frederic Weisbecker 
---
 kernel/rcu/tree.c| 22 +-
 kernel/rcu/tree.h|  2 +-
 kernel/rcu/tree_plugin.h | 25 -
 3 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b6e134e3..2920dfc9f58c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -676,6 +676,18 @@ void rcu_idle_enter(void)
 EXPORT_SYMBOL_GPL(rcu_idle_enter);
 
 #ifdef CONFIG_NO_HZ_FULL
+
+/*
+ * An empty function that will trigger a reschedule on
+ * IRQ tail once IRQs get re-enabled on userspace resume.
+ */
+static void late_wakeup_func(struct irq_work *work)
+{
+}
+
+static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
+   IRQ_WORK_INIT(late_wakeup_func);
+
 /**
  * rcu_user_enter - inform RCU that we are resuming userspace.
  *
@@ -692,9 +704,17 @@ noinstr void rcu_user_enter(void)
struct rcu_data *rdp = this_cpu_ptr(_data);
 
lockdep_assert_irqs_disabled();
-   do_nocb_deferred_wakeup(rdp);
+   /*
+* We may be past the last rescheduling opportunity in the entry code.
+* Trigger a self IPI that will fire and reschedule once we resume to
+* user/guest mode.
+*/
+   if (do_nocb_deferred_wakeup(rdp) && need_resched())
+   irq_work_queue(this_cpu_ptr(_wakeup_work));
+
rcu_eqs_enter(true);
 }
+
 #endif /* CONFIG_NO_HZ_FULL */
 
 /**
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 7708ed161f4a..9226f4021a36 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -433,7 +433,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, 
struct rcu_head *rhp,
 static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
 unsigned long flags);
 static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
-static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
+static bool do_nocb_deferred_wakeup(struct rcu_data *rdp);
 static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
 static void rcu_spawn_cpu_nocb_kthread(int cpu);
 static void __init rcu_spawn_nocb_kthreads(void);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index d5b38c28abd1..384856e4d13e 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1631,8 +1631,8 @@ bool rcu_is_nocb_cpu(int cpu)
  * Kick the GP kthread for this NOCB group.  Caller holds ->nocb_lock
  * and this function releases it.
  */
-static void wake_nocb_gp(struct rcu_data *rdp, bool force,
-  unsigned long flags)
+static bool wake_nocb_gp(struct rcu_data *rdp, bool force,
+unsigned long flags)
__releases(rdp->nocb_lock)
 {
bool needwake = false;
@@ -1643,7 +1643,7 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force,
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("AlreadyAwake"));
rcu_nocb_unlock_irqrestore(rdp, flags);
-   return;
+   return false;
}
del_timer(>nocb_timer);
rcu_nocb_unlock_irqrestore(rdp, flags);
@@ -1656,6 +1656,8 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force,
raw_spin_unlock_irqrestore(_gp->nocb_gp_lock, flags);
if (needwake)
wake_up_process(rdp_gp->nocb_gp_kthread);
+
+   return needwake;
 }
 
 /*
@@ -2152,20 +2154,23 @@ static int rcu_nocb_need_deferred_wakeup(struct 
rcu_data *rdp)
 }
 
 /* Do a deferred wakeup of rcu_nocb_kthread(). */
-static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
+static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
 {
unsigned long flags;
int ndw;
+   int ret;
 
rcu_nocb_lock_irqsave(rdp, flags);
if (!rcu_nocb_need_deferred_wakeup(rdp)) {
rcu_nocb_unlock_irqrestore(rdp, flags);
-   return;
+   return false;
}
ndw = READ_ONCE(rdp->nocb_defer_wakeup);
WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
-   wake_nocb_gp(rdp, ndw == 

[RFC PATCH 1/8] rcu: Remove superfluous rdp fetch

2021-01-08 Thread Frederic Weisbecker
Signed-off-by: Frederic Weisbecker 
Cc: Paul E. McKenney 
Cc: Rafael J. Wysocki 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ingo Molnar
---
 kernel/rcu/tree.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 40e5e3dd253e..fef90c467670 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -643,7 +643,6 @@ static noinstr void rcu_eqs_enter(bool user)
instrumentation_begin();
trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, 
atomic_read(>dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && 
!is_idle_task(current));
-   rdp = this_cpu_ptr(_data);
do_nocb_deferred_wakeup(rdp);
rcu_prepare_for_idle();
rcu_preempt_deferred_qs(current);
-- 
2.25.1



[RFC PATCH 3/8] rcu/nocb: Perform deferred wake up before last idle's need_resched() check

2021-01-08 Thread Frederic Weisbecker
Entering RCU idle mode may cause a deferred wake up of an RCU NOCB_GP
kthread (rcuog) to be serviced.

Usually a local wake up happening while running the idle task is handled
in one of the need_resched() checks carefully placed within the idle
loop that can break to the scheduler.

Unfortunately the call to rcu_idle_enter() is already beyond the last
generic need_resched() check and we may halt the CPU with a resched
request unhandled, leaving the task hanging.

Fix this with splitting the rcuog wakeup handling from rcu_idle_enter()
and place it before the last generic need_resched() check in the idle
loop. It is then assumed that no call to call_rcu() will be performed
after that in the idle loop until the CPU is put in low power mode.
Further debug code will help spotting the offenders.

Reported-by: Paul E. McKenney 
Fixes: 96d3fd0d315a (rcu: Break call_rcu() deadlock involving scheduler and 
perf)
Cc: sta...@vger.kernel.org
Cc: Rafael J. Wysocki 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ingo Molnar
Signed-off-by: Frederic Weisbecker 
---
 include/linux/rcupdate.h | 2 ++
 kernel/rcu/tree.c| 3 ---
 kernel/rcu/tree_plugin.h | 5 +
 kernel/sched/idle.c  | 3 +++
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index de0826411311..4068234fb303 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -104,8 +104,10 @@ static inline void rcu_user_exit(void) { }
 
 #ifdef CONFIG_RCU_NOCB_CPU
 void rcu_init_nohz(void);
+void rcu_nocb_flush_deferred_wakeup(void);
 #else /* #ifdef CONFIG_RCU_NOCB_CPU */
 static inline void rcu_init_nohz(void) { }
+static inline void rcu_nocb_flush_deferred_wakeup(void) { }
 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
 
 /**
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b9fff18d14d9..b6e134e3 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -670,10 +670,7 @@ static noinstr void rcu_eqs_enter(bool user)
  */
 void rcu_idle_enter(void)
 {
-   struct rcu_data *rdp = this_cpu_ptr(_data);
-
lockdep_assert_irqs_disabled();
-   do_nocb_deferred_wakeup(rdp);
rcu_eqs_enter(false);
 }
 EXPORT_SYMBOL_GPL(rcu_idle_enter);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7e291ce0a1d6..d5b38c28abd1 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2187,6 +2187,11 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
do_nocb_deferred_wakeup_common(rdp);
 }
 
+void rcu_nocb_flush_deferred_wakeup(void)
+{
+   do_nocb_deferred_wakeup(this_cpu_ptr(_data));
+}
+
 void __init rcu_init_nohz(void)
 {
int cpu;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 305727ea0677..b601a3aa2152 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -55,6 +55,7 @@ __setup("hlt", cpu_idle_nopoll_setup);
 static noinline int __cpuidle cpu_idle_poll(void)
 {
trace_cpu_idle(0, smp_processor_id());
+   rcu_nocb_flush_deferred_wakeup();
stop_critical_timings();
rcu_idle_enter();
local_irq_enable();
@@ -173,6 +174,8 @@ static void cpuidle_idle_call(void)
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
int next_state, entered_state;
 
+   rcu_nocb_flush_deferred_wakeup();
+
/*
 * Check if the idle task must be rescheduled. If it is the
 * case, exit the function after re-enabling the local irq.
-- 
2.25.1



[RFC PATCH 2/8] rcu: Pull deferred rcuog wake up to rcu_eqs_enter() callers

2021-01-08 Thread Frederic Weisbecker
Deferred wakeup of rcuog kthreads upon RCU idle mode entry is going to
be handled differently whether initiated by idle, user or guest. Prepare
with pulling that control up to rcu_eqs_enter() callers.

Signed-off-by: Frederic Weisbecker 
Cc: Paul E. McKenney 
Cc: Rafael J. Wysocki 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ingo Molnar
---
 kernel/rcu/tree.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index fef90c467670..b9fff18d14d9 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -643,7 +643,6 @@ static noinstr void rcu_eqs_enter(bool user)
instrumentation_begin();
trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, 
atomic_read(>dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && 
!is_idle_task(current));
-   do_nocb_deferred_wakeup(rdp);
rcu_prepare_for_idle();
rcu_preempt_deferred_qs(current);
 
@@ -671,7 +670,10 @@ static noinstr void rcu_eqs_enter(bool user)
  */
 void rcu_idle_enter(void)
 {
+   struct rcu_data *rdp = this_cpu_ptr(_data);
+
lockdep_assert_irqs_disabled();
+   do_nocb_deferred_wakeup(rdp);
rcu_eqs_enter(false);
 }
 EXPORT_SYMBOL_GPL(rcu_idle_enter);
@@ -690,7 +692,10 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
  */
 noinstr void rcu_user_enter(void)
 {
+   struct rcu_data *rdp = this_cpu_ptr(_data);
+
lockdep_assert_irqs_disabled();
+   do_nocb_deferred_wakeup(rdp);
rcu_eqs_enter(true);
 }
 #endif /* CONFIG_NO_HZ_FULL */
-- 
2.25.1



[RFC PATCH 0/8] rcu/sched: Fix ignored rescheduling after rcu_eqs_enter() v3

2021-01-08 Thread Frederic Weisbecker
(This was [PATCH 0/4] sched/idle: Fix missing need_resched() checks after 
rcu_idle_enter() v2)

I initially followed Peterz review but eventually I tried a different
approach. Instead of handling the late wake up from rcu_idle_enter(),
I've split the delayed rcuog wake up and moved it right before
the last generic need_resched() check, it makes more sense and we don't
need to fiddle with cpuidle core and drivers anymore. It's also less
error prone.

I also fixed the nohz_full case and (hopefully) the guest case.

And this comes with debugging to prevent from that pattern to happen
again.

Only lightly tested so far.

git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks.git
sched/idle-v3

HEAD: d95fc510e804a5c4658a823ff12d9caba1d906c7

Thanks,
Frederic
---

Frederic Weisbecker (8):
  rcu: Remove superfluous rdp fetch
  rcu: Pull deferred rcuog wake up to rcu_eqs_enter() callers
  rcu/nocb: Perform deferred wake up before last idle's need_resched() check
  rcu/nocb: Trigger self-IPI on late deferred wake up before user resume
  entry: Explicitly flush pending rcuog wakeup before last rescheduling 
points
  sched: Report local wake up on resched blind zone within idle loop
  entry: Report local wake up on resched blind zone while resuming to user
  timer: Report ignored local enqueue in nohz mode


 include/linux/rcupdate.h |  2 ++
 include/linux/sched.h| 11 
 kernel/entry/common.c| 10 
 kernel/rcu/tree.c| 27 ++--
 kernel/rcu/tree.h|  2 +-
 kernel/rcu/tree_plugin.h | 30 +++---
 kernel/sched/core.c  | 66 +++-
 kernel/sched/idle.c  |  6 +
 kernel/sched/sched.h |  3 +++
 9 files changed, 144 insertions(+), 13 deletions(-)


Re: kernel BUG at mm/page-writeback.c:LINE!

2021-01-08 Thread Linus Torvalds
On Tue, Jan 5, 2021 at 11:53 AM Linus Torvalds
 wrote:
>
> I took your "way to go" statement as an ack, and made it all be commit
> c2407cf7d22d ("mm: make wait_on_page_writeback() wait for multiple
> pending writebacks").

Oh, and Michael Larabel (of phoronix) reports that that one-liner does
something bad to a few PostgreSQL tests, on the order of 5-10%
regression on some machines (but apparently not others).

I suspect that's a sign of instability in the benchmark numbers, but
it probably also means that we have some silly condition where
multiple threads want to clean the same page.

I sent him a patch to try if it ends up being better to just not wake
things up early at all (instead of the "if" -> "while") conversion.
That trivial patch appended here in case anybody has comments.

Just the fact that that one-liner made a performance impact makes me
go "hmm", though. Michael didn't see the BUG_ON(), so it's presumably
some _other_ user of wait_on_page_writeback() than the
write_cache_pages() one that causes issues.

Anybody got any suspicions? Honestly, when working on the page wait
queues, I was working under the assumption that it's really just the
page lock that truly matters.

I'm thinking things like __filemap_fdatawait_range(), which doesn't
hold the page lock at all, so it's all kinds of non-serialized, and
could now be waiting for any number of IO's ro complete..

Oh well. This email doesn't really have a point, it's more of a
heads-up that that "wait to see one or multiple writebacks" thing
seems to matter more than I would have expected for some loads..

Linus


patch
Description: Binary data


Re: [PATCH v5 09/15] lib/test_printf.c: Use helper function to unwind array of software_nodes

2021-01-08 Thread Laurent Pinchart
Hi Peter, Steven and Sergey,

Could you please let us know if you're fine with this patch getting
merged in v5.12 through the linux-media tree ? The cover letter contains
additional details (in a nutshell, this is a cross-tree series and we
would like to avoid topic branches if possible).

On Thu, Jan 07, 2021 at 01:28:32PM +, Daniel Scally wrote:
> Use the software_node_unregister_nodes() helper function to unwind this
> array in a cleaner way.
> 
> Acked-by: Petr Mladek 
> Reviewed-by: Andy Shevchenko 
> Reviewed-by: Laurent Pinchart 
> Reviewed-by: Sergey Senozhatsky 
> Suggested-by: Andy Shevchenko 
> Signed-off-by: Daniel Scally 
> ---
> Changes in v5:
> 
>   - None
> 
>  lib/test_printf.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/lib/test_printf.c b/lib/test_printf.c
> index 7ac87f18a10f..7d60f24240a4 100644
> --- a/lib/test_printf.c
> +++ b/lib/test_printf.c
> @@ -644,9 +644,7 @@ static void __init fwnode_pointer(void)
>   test(second_name, "%pfwP", software_node_fwnode([1]));
>   test(third_name, "%pfwP", software_node_fwnode([2]));
>  
> - software_node_unregister([2]);
> - software_node_unregister([1]);
> - software_node_unregister([0]);
> + software_node_unregister_nodes(softnodes);
>  }
>  
>  static void __init

-- 
Regards,

Laurent Pinchart


Re: [PATCH v5 13/15] ACPI / bus: Add acpi_dev_get_next_match_dev() and helper macro

2021-01-08 Thread Laurent Pinchart
Hi Rafael,

Could you please review this patch, and let us know (see question in the
cover letter) if it can be merged through the linux-media tree for v5.12
?

On Thu, Jan 07, 2021 at 01:28:36PM +, Daniel Scally wrote:
> To ensure we handle situations in which multiple sensors of the same
> model (and therefore _HID) are present in a system, we need to be able
> to iterate over devices matching a known _HID but unknown _UID and _HRV
>  - add acpi_dev_get_next_match_dev() to accommodate that possibility and
> change acpi_dev_get_first_match_dev() to simply call the new function
> with a NULL starting point. Add an iterator macro for convenience.
> 
> Reviewed-by: Andy Shevchenko 
> Reviewed-by: Sakari Ailus 
> Suggested-by: Andy Shevchenko 
> Signed-off-by: Daniel Scally 
> ---
> Changes in v5:
> 
>   - Changed commit subject
> 
>  drivers/acpi/utils.c| 30 ++
>  include/acpi/acpi_bus.h |  7 +++
>  2 files changed, 33 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
> index d5411a166685..ddca1550cce6 100644
> --- a/drivers/acpi/utils.c
> +++ b/drivers/acpi/utils.c
> @@ -843,12 +843,13 @@ bool acpi_dev_present(const char *hid, const char *uid, 
> s64 hrv)
>  EXPORT_SYMBOL(acpi_dev_present);
>  
>  /**
> - * acpi_dev_get_first_match_dev - Return the first match of ACPI device
> + * acpi_dev_get_next_match_dev - Return the next match of ACPI device
> + * @adev: Pointer to the previous acpi_device matching this @hid, @uid and 
> @hrv
>   * @hid: Hardware ID of the device.
>   * @uid: Unique ID of the device, pass NULL to not check _UID
>   * @hrv: Hardware Revision of the device, pass -1 to not check _HRV
>   *
> - * Return the first match of ACPI device if a matching device was present
> + * Return the next match of ACPI device if another matching device was 
> present
>   * at the moment of invocation, or NULL otherwise.
>   *
>   * The caller is responsible to call put_device() on the returned device.
> @@ -856,8 +857,9 @@ EXPORT_SYMBOL(acpi_dev_present);
>   * See additional information in acpi_dev_present() as well.
>   */
>  struct acpi_device *
> -acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv)
> +acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const 
> char *uid, s64 hrv)
>  {
> + struct device *start = adev ? >dev : NULL;
>   struct acpi_dev_match_info match = {};
>   struct device *dev;
>  
> @@ -865,9 +867,29 @@ acpi_dev_get_first_match_dev(const char *hid, const char 
> *uid, s64 hrv)
>   match.uid = uid;
>   match.hrv = hrv;
>  
> - dev = bus_find_device(_bus_type, NULL, , acpi_dev_match_cb);
> + dev = bus_find_device(_bus_type, start, , acpi_dev_match_cb);
>   return dev ? to_acpi_device(dev) : NULL;
>  }
> +EXPORT_SYMBOL(acpi_dev_get_next_match_dev);
> +
> +/**
> + * acpi_dev_get_first_match_dev - Return the first match of ACPI device
> + * @hid: Hardware ID of the device.
> + * @uid: Unique ID of the device, pass NULL to not check _UID
> + * @hrv: Hardware Revision of the device, pass -1 to not check _HRV
> + *
> + * Return the first match of ACPI device if a matching device was present
> + * at the moment of invocation, or NULL otherwise.
> + *
> + * The caller is responsible to call put_device() on the returned device.
> + *
> + * See additional information in acpi_dev_present() as well.
> + */
> +struct acpi_device *
> +acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv)
> +{
> + return acpi_dev_get_next_match_dev(NULL, hid, uid, hrv);
> +}
>  EXPORT_SYMBOL(acpi_dev_get_first_match_dev);
>  
>  /*
> diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
> index 6d1879bf9440..02a716a0af5d 100644
> --- a/include/acpi/acpi_bus.h
> +++ b/include/acpi/acpi_bus.h
> @@ -683,9 +683,16 @@ static inline bool acpi_device_can_poweroff(struct 
> acpi_device *adev)
>  
>  bool acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, 
> const char *uid2);
>  
> +struct acpi_device *
> +acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const 
> char *uid, s64 hrv);
>  struct acpi_device *
>  acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv);
>  
> +#define for_each_acpi_dev_match(adev, hid, uid, hrv) \
> + for (adev = acpi_dev_get_first_match_dev(hid, uid, hrv);\
> +  adev;  \
> +  adev = acpi_dev_get_next_match_dev(adev, hid, uid, hrv))
> +
>  static inline void acpi_dev_put(struct acpi_device *adev)
>  {
>   put_device(>dev);

-- 
Regards,

Laurent Pinchart


[PATCH 0/5] Optimize iommu_map_sg() performance

2021-01-08 Thread Isaac J. Manjarres
The iommu_map_sg() code currently iterates through the given
scatter-gather list, and in the worst case, invokes iommu_map()
for each element in the scatter-gather list, which calls into
the IOMMU driver through an indirect call. For an IOMMU driver
that uses a format supported by the io-pgtable code, the IOMMU
driver will then call into the io-pgtable code to map the chunk.

Jumping between the IOMMU core code, the IOMMU driver, and the
io-pgtable code and back for each element in a scatter-gather list
is not efficient.

Instead, add a map_sg() hook in both the IOMMU driver ops and the
io-pgtable ops. iommu_map_sg() can then call into the IOMMU driver's
map_sg() hook with the entire scatter-gather list, which can call
into the io-pgtable map_sg() hook, which can process the entire
scatter-gather list, signficantly reducing the number of indirect
calls, and jumps between these layers, boosting performance.

On a system that uses the ARM SMMU driver, and the ARM LPAE format,
the current implementation of iommu_map_sg() yields the following
latencies for mapping scatter-gather lists of various sizes. These
latencies are calculated by repeating the mapping operation 10 times:

sizeiommu_map_sg latency
  4K0.624 us
 64K9.468 us
  1M  122.557 us
  2M  239.807 us
 12M 1435.979 us
 24M 2884.968 us
 32M 3832.979 us

On the same system, the proposed modifications yield the following
results:

sizeiommu_map_sg latency
  4K3.645 us
 64K4.198 us
  1M   11.010 us
  2M   17.125 us
 12M   82.416 us
 24M  158.677 us
 32M  210.468 us

The procedure for collecting the iommu_map_sg latencies is
the same in both experiments. Clearly, reducing the jumps
between the different layers in the IOMMU code offers a
signficant performance boost in iommu_map_sg() latency.

Thanks,
Isaac

Isaac J. Manjarres (5):
  iommu/io-pgtable: Introduce map_sg() as a page table op
  iommu/io-pgtable-arm: Hook up map_sg()
  iommu/io-pgtable-arm-v7s: Hook up map_sg()
  iommu: Introduce map_sg() as an IOMMU op for IOMMU drivers
  iommu/arm-smmu: Hook up map_sg()

 drivers/iommu/arm/arm-smmu/arm-smmu.c | 19 
 drivers/iommu/io-pgtable-arm-v7s.c| 90 +++
 drivers/iommu/io-pgtable-arm.c| 86 +
 drivers/iommu/iommu.c | 25 --
 include/linux/io-pgtable.h|  6 +++
 include/linux/iommu.h | 13 +
 6 files changed, 234 insertions(+), 5 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 5/5] iommu/arm-smmu: Hook up map_sg()

2021-01-08 Thread Isaac J. Manjarres
Now that everything is in place for iommu_map_sg() to defer
mapping a scatter-gather list to the io-pgtable layer, implement
the map_sg() callback in the SMMU driver, so that iommu_map_sg()
can invoke it with the entire scatter-gather list that will be
mapped.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index d8c6bfd..52acc68 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1208,6 +1208,24 @@ static int arm_smmu_map(struct iommu_domain *domain, 
unsigned long iova,
return ret;
 }
 
+static int arm_smmu_map_sg(struct iommu_domain *domain, unsigned long iova,
+  struct scatterlist *sg, unsigned int nents, int prot,
+  gfp_t gfp, size_t *mapped)
+{
+   struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+   struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+   int ret;
+
+   if (!ops)
+   return -ENODEV;
+
+   arm_smmu_rpm_get(smmu);
+   ret = ops->map_sg(ops, iova, sg, nents, prot, gfp, mapped);
+   arm_smmu_rpm_put(smmu);
+
+   return ret;
+}
+
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *gather)
 {
@@ -1624,6 +1642,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_free= arm_smmu_domain_free,
.attach_dev = arm_smmu_attach_dev,
.map= arm_smmu_map,
+   .map_sg = arm_smmu_map_sg,
.unmap  = arm_smmu_unmap,
.flush_iotlb_all= arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 3/5] iommu/io-pgtable-arm-v7s: Hook up map_sg()

2021-01-08 Thread Isaac J. Manjarres
Implement the map_sg io-pgtable op for the ARMv7s io-pgtable
code, so that IOMMU drivers can call it when they need to map
a scatter-gather list.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 90 ++
 1 file changed, 90 insertions(+)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 1d92ac9..40d96d2 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -545,6 +545,95 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
 }
 
+static int arm_v7s_map_by_pgsize(struct io_pgtable_ops *ops,
+unsigned long iova, phys_addr_t paddr,
+size_t size, int prot, gfp_t gfp,
+size_t *mapped)
+{
+   struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   struct io_pgtable *iop = >iop;
+   struct io_pgtable_cfg *cfg = >cfg;
+   unsigned int min_pagesz = 1 << __ffs(cfg->pgsize_bitmap);
+   int ret;
+   size_t pgsize;
+
+   if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+   pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 
0x%x\n",
+  iova, , size, min_pagesz);
+   return -EINVAL;
+   }
+
+   if (WARN_ON((iova + size) >= (1ULL << cfg->ias) ||
+   (paddr + size) >= (1ULL << cfg->oas)))
+   return -ERANGE;
+
+   while (size) {
+   pgsize = iommu_pgsize(cfg->pgsize_bitmap, iova | paddr, size);
+   ret = __arm_v7s_map(data, iova, paddr, pgsize, prot, 1,
+   data->pgd, gfp);
+
+   if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
+   io_pgtable_tlb_flush_walk(>iop, iova, size,
+ ARM_V7S_BLOCK_SIZE(2));
+   } else {
+   wmb();
+   }
+
+   if (ret)
+   return ret;
+
+   iova += pgsize;
+   paddr += pgsize;
+   *mapped += pgsize;
+   size -= pgsize;
+   }
+
+   return 0;
+}
+
+static int arm_v7s_map_sg(struct io_pgtable_ops *ops, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents,
+ int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+   size_t len = 0;
+   unsigned int i = 0;
+   int ret;
+   phys_addr_t start;
+
+   *mapped = 0;
+
+   /* If no access, then nothing to do */
+   if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
+   return 0;
+
+   while (i <= nents) {
+   phys_addr_t s_phys = sg_phys(sg);
+
+   if (len && s_phys != start + len) {
+   ret = arm_v7s_map_by_pgsize(ops, iova + *mapped, start,
+   len, iommu_prot, gfp,
+   mapped);
+
+   if (ret)
+   return ret;
+
+   len = 0;
+   }
+
+   if (len) {
+   len += sg->length;
+   } else {
+   len = sg->length;
+   start = s_phys;
+   }
+
+   if (++i < nents)
+   sg = sg_next(sg);
+   }
+
+   return 0;
+}
+
 static void arm_v7s_free_pgtable(struct io_pgtable *iop)
 {
struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
@@ -783,6 +872,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct 
io_pgtable_cfg *cfg,
 
data->iop.ops = (struct io_pgtable_ops) {
.map= arm_v7s_map,
+   .map_sg = arm_v7s_map_sg,
.unmap  = arm_v7s_unmap,
.iova_to_phys   = arm_v7s_iova_to_phys,
};
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 4/5] iommu: Introduce map_sg() as an IOMMU op for IOMMU drivers

2021-01-08 Thread Isaac J. Manjarres
Add support for IOMMU drivers to have their own map_sg() callbacks.
This completes the path for having iommu_map_sg() invoke an IOMMU
driver's map_sg() callback, which can then invoke the io-pgtable
map_sg() callback with the entire scatter-gather list, so that it
can be processed entirely in the io-pgtable layer.

For IOMMU drivers that do not provide a callback, the default
implementation of iterating through the scatter-gather list, while
calling iommu_map() will be used.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 13 +
 include/linux/iommu.h |  5 +
 2 files changed, 18 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0da0687..46acd5c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2535,11 +2535,24 @@ static size_t __iommu_map_sg(struct iommu_domain 
*domain, unsigned long iova,
 struct scatterlist *sg, unsigned int nents, int 
prot,
 gfp_t gfp)
 {
+   const struct iommu_ops *ops = domain->ops;
size_t len = 0, mapped = 0;
phys_addr_t start;
unsigned int i = 0;
int ret;
 
+   if (ops->map_sg) {
+   ret = ops->map_sg(domain, iova, sg, nents, prot, gfp, );
+
+   if (ops->iotlb_sync_map)
+   ops->iotlb_sync_map(domain);
+
+   if (ret)
+   goto out_err;
+
+   return mapped;
+   }
+
while (i <= nents) {
phys_addr_t s_phys = sg_phys(sg);
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0e40a38..bac7681 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -192,6 +192,8 @@ struct iommu_iotlb_gather {
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
+ * @map_sg: map a scatter-gather list of physically contiguous chunks to
+ *  an iommu domain.
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
  * @iotlb_sync_map: Sync mappings created recently using @map to the hardware
@@ -243,6 +245,9 @@ struct iommu_ops {
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
int (*map)(struct iommu_domain *domain, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_sg)(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 2/5] iommu/io-pgtable-arm: Hook up map_sg()

2021-01-08 Thread Isaac J. Manjarres
Implement the map_sg io-pgtable op for the ARM LPAE io-pgtable
code, so that IOMMU drivers can call it when they need to map
a scatter-gather list.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm.c | 86 ++
 drivers/iommu/iommu.c  | 12 +++---
 include/linux/iommu.h  |  8 
 3 files changed, 101 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58..9c17d9d 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -473,6 +473,91 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
 }
 
+static int arm_lpae_map_by_pgsize(struct io_pgtable_ops *ops,
+ unsigned long iova, phys_addr_t paddr,
+ size_t size, int iommu_prot, gfp_t gfp,
+ size_t *mapped)
+{
+   struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   struct io_pgtable_cfg *cfg = >iop.cfg;
+   arm_lpae_iopte *ptep = data->pgd;
+   int ret, lvl = data->start_level;
+   arm_lpae_iopte prot = arm_lpae_prot_to_pte(data, iommu_prot);
+   unsigned int min_pagesz = 1 << __ffs(cfg->pgsize_bitmap);
+   long iaext = (s64)(iova + size) >> cfg->ias;
+   size_t pgsize;
+
+   if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+   pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 
0x%x\n",
+  iova, , size, min_pagesz);
+   return -EINVAL;
+   }
+
+   if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
+   iaext = ~iaext;
+   if (WARN_ON(iaext || (paddr + size) >> cfg->oas))
+   return -ERANGE;
+
+   while (size) {
+   pgsize = iommu_pgsize(cfg->pgsize_bitmap, iova | paddr, size);
+   ret = __arm_lpae_map(data, iova, paddr, pgsize, prot, lvl, ptep,
+gfp);
+   if (ret)
+   return ret;
+
+   iova += pgsize;
+   paddr += pgsize;
+   *mapped += pgsize;
+   size -= pgsize;
+   }
+
+   return 0;
+}
+
+static int arm_lpae_map_sg(struct io_pgtable_ops *ops, unsigned long iova,
+  struct scatterlist *sg, unsigned int nents,
+  int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+
+   size_t len = 0;
+   unsigned int i = 0;
+   int ret;
+   phys_addr_t start;
+
+   *mapped = 0;
+
+   /* If no access, then nothing to do */
+   if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
+   return 0;
+
+   while (i <= nents) {
+   phys_addr_t s_phys = sg_phys(sg);
+
+   if (len && s_phys != start + len) {
+   ret = arm_lpae_map_by_pgsize(ops, iova + *mapped, start,
+len, iommu_prot, gfp,
+mapped);
+
+   if (ret)
+   return ret;
+
+   len = 0;
+   }
+
+   if (len) {
+   len += sg->length;
+   } else {
+   len = sg->length;
+   start = s_phys;
+   }
+
+   if (++i < nents)
+   sg = sg_next(sg);
+   }
+
+   return 0;
+}
+
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
arm_lpae_iopte *ptep)
 {
@@ -750,6 +835,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
 
data->iop.ops = (struct io_pgtable_ops) {
.map= arm_lpae_map,
+   .map_sg = arm_lpae_map_sg,
.unmap  = arm_lpae_unmap,
.iova_to_phys   = arm_lpae_iova_to_phys,
};
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index ffeebda..0da0687 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2346,8 +2346,8 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain 
*domain, dma_addr_t iova)
 }
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
-static size_t iommu_pgsize(struct iommu_domain *domain,
-  unsigned long addr_merge, size_t size)
+size_t iommu_pgsize(unsigned long pgsize_bitmap, unsigned long addr_merge,
+   size_t size)
 {
unsigned int pgsize_idx;
size_t pgsize;
@@ -2366,7 +2366,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain,
pgsize = (1UL << (pgsize_idx + 1)) - 1;
 
/* throw away page sizes not supported by the hardware */
-   pgsize &= domain->pgsize_bitmap;
+   pgsize &= pgsize_bitmap;
 
/* make sure we're still sane */
BUG_ON(!pgsize);
@@ -2412,7 +2412,8 @@ static int __iommu_map(struct iommu_domain *domain, 

[PATCH 1/5] iommu/io-pgtable: Introduce map_sg() as a page table op

2021-01-08 Thread Isaac J. Manjarres
While mapping a scatter-gather list, iommu_map_sg() calls
into the IOMMU driver through an indirect call, which can
call into the io-pgtable code through another indirect call.

This sequence of going through the IOMMU core code, the IOMMU
driver, and finally the io-pgtable code, occurs for every
element in the scatter-gather list, in the worse case, which
is not optimal.

Introduce a map_sg callback in the io-pgtable ops so that
IOMMU drivers can invoke it with the complete scatter-gather
list, so that it can be processed within the io-pgtable
code entirely, reducing the number of indirect calls, and
boosting overall iommu_map_sg() performance.

Signed-off-by: Isaac J. Manjarres 
---
 include/linux/io-pgtable.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index ea727eb..6d0e731 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -147,6 +147,9 @@ struct io_pgtable_cfg {
  * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
  *
  * @map:  Map a physically contiguous memory region.
+ * @map_sg:   Map a scatter-gather list of physically contiguous memory
+ *chunks. The mapped pointer argument is used to store how
+ *many bytes are mapped.
  * @unmap:Unmap a physically contiguous memory region.
  * @iova_to_phys: Translate iova to physical address.
  *
@@ -156,6 +159,9 @@ struct io_pgtable_cfg {
 struct io_pgtable_ops {
int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_sg)(struct io_pgtable_ops *ops, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



Re: [PATCH v2] proc_sysctl: fix oops caused by incorrect command parameters.

2021-01-08 Thread Andrew Morton
On Fri, 8 Jan 2021 21:10:25 +0100 Michal Hocko  wrote:

> > > Why would that matter? A missing value is clearly a error path and it
> > > should be reported.
> > 
> > This test is in the correct place. I think it's just a question of the
> > return values.
> 
> I was probably not clear. The test for val is at the right place. I
> would just expect -EINVAL and have the generic code to report.

It does seem a bit screwy that process_sysctl_arg() returns zero in all
situations (parse_args() is set up to handle an error return from it). 
But this patch is consistent with all the other error handling in
process_sysctl_arg().


Re: [PATCH v1 16/17] arm64: dts: sdm845-db845c: Add CAMSS ISP node

2021-01-08 Thread Laurent Pinchart
Hi Rob,

Thank you for the patch.

The subject line doesn't match the patch.

On Fri, Jan 08, 2021 at 01:04:28PM +0100, Robert Foss wrote:
> Add regulators and camss DT node.
> 
> Signed-off-by: Robert Foss 
> ---
>  arch/arm64/boot/dts/qcom/sdm845-db845c.dts | 12 
>  1 file changed, 12 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts 
> b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
> index a943b3f353ce..7bad0515345e 100644
> --- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
> +++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
> @@ -294,6 +294,9 @@ vreg_s7a_1p025: smps7 {
>   regulator-max-microvolt = <1028000>;
>   };
>  
> + vdda_mipi_csi0_0p9:
> + vdda_mipi_csi1_0p9:
> + vdda_mipi_csi2_0p9:

No need for new labels, you can simply use vreg_l1a_0p875 below.

>   vreg_l1a_0p875: ldo1 {
>   regulator-min-microvolt = <88>;
>   regulator-max-microvolt = <88>;
> @@ -1106,6 +1109,15 @@  {
>   status = "okay";
>  };
>  
> + {
> + vdda-csi0-supply = <_mipi_csi0_0p9>;
> + vdda-csi1-supply = <_mipi_csi1_0p9>;
> + vdda-csi2-supply = <_mipi_csi2_0p9>;
> +
> + status = "disabled";

This isn't needed.

> +

Extra blank line.

> +};
> +
>  _i2c0 {
>   camera@10 {
>   compatible = "ovti,ov8856";

-- 
Regards,

Laurent Pinchart


Re: [PATCH v1 12/17] media: dt-bindings: media: qcom,camss: Add bindings for SDM845 camss

2021-01-08 Thread Laurent Pinchart
Hi Rob,

Thank you for the patch.

On Fri, Jan 08, 2021 at 01:04:24PM +0100, Robert Foss wrote:
> Add bindings for qcom,sdm845-camss in order to support the camera
> subsystem on SDM845.
> 
> Signed-off-by: Robert Foss 
> ---
>  .../devicetree/bindings/media/qcom,camss.txt  | 51 +++
>  1 file changed, 40 insertions(+), 11 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/media/qcom,camss.txt 
> b/Documentation/devicetree/bindings/media/qcom,camss.txt
> index 498234629e21..276c5d0c25cb 100644
> --- a/Documentation/devicetree/bindings/media/qcom,camss.txt
> +++ b/Documentation/devicetree/bindings/media/qcom,camss.txt
> @@ -9,6 +9,7 @@ Qualcomm Camera Subsystem
>   - "qcom,msm8916-camss"
>   - "qcom,msm8996-camss"
>   - "qcom,sdm660-camss"
> + - "qcom,sdm845-camss"
>  - reg:
>   Usage: required
>   Value type: 
> @@ -18,19 +19,21 @@ Qualcomm Camera Subsystem
>   Value type: 
>   Definition: Should contain the following entries:
>   - "csiphy0"
> - - "csiphy0_clk_mux"
> + - "csiphy0_clk_mux" (not 845)
>   - "csiphy1"
> - - "csiphy1_clk_mux"
> - - "csiphy2" (8996 only)
> + - "csiphy1_clk_mux" (not 845)
> + - "csiphy2" (8996 & 845)
>   - "csiphy2_clk_mux" (8996 only)
> + - "csiphy3" (845 only)
>   - "csid0"
>   - "csid1"
> - - "csid2"   (8996 only)
> - - "csid3"   (8996 only)
> + - "csid2"   (8996 & 845)
> + - "csid3"   (8996 & 845)
>   - "ispif"
> - - "csi_clk_mux"
> + - "csi_clk_mux" (not 845)
>   - "vfe0"
> - - "vfe1"(8996 only)
> + - "vfe1"(8996 & 845)
> + - "vfe_lite"(845 only)

All this would be more readable after a conversion to YAML ;-)

>  - interrupts:
>   Usage: required
>   Value type: 
> @@ -41,14 +44,16 @@ Qualcomm Camera Subsystem
>   Definition: Should contain the following entries:
>   - "csiphy0"
>   - "csiphy1"
> - - "csiphy2" (8996 only)
> + - "csiphy2" (8996 & 845)
> + - "csiphy3" (845 only)
>   - "csid0"
>   - "csid1"
> - - "csid2"   (8996 only)
> + - "csid2"   (8996 & 845)
>   - "csid3"   (8996 only)
>   - "ispif"
>   - "vfe0"
> - - "vfe1"(8996 only)
> + - "vfe1"(8996 & 845)
> + - "vfe_lite"(845 only)
>  - power-domains:
>   Usage: required
>   Value type: 
> @@ -67,27 +72,40 @@ Qualcomm Camera Subsystem
>   - "top_ahb"
>   - "throttle_axi"(660 only)
>   - "ispif_ahb"
> + - "camnoc_axi"  (845 only)
> + - "cpas_ahb"(845 only)
> + - "cphy_rx_src" (856 only)
>   - "csiphy0_timer"
> + - "csiphy0" (845 only)
> + - "csiphy0_timer_src"   (845 only)
>   - "csiphy1_timer"
> - - "csiphy2_timer"   (8996 only)
> + - "csiphy2" (845 only)
> + - "csiphy2_timer_src"   (845 only)
> + - "csiphy2_timer"   (8996 & 845)
> + - "csiphy3" (845 only)
> + - "csiphy3_timer_src"   (845 only)
> + - "csiphy3_timer"   (845 only)
>   - "csiphy_ahb2crif" (660 only)
>   - "csi0_ahb"
>   - "csi0"
>   - "csi0_phy"
>   - "csi0_pix"
>   - "csi0_rdi"
> + - "csi0_src"(845 only)
>   - "cphy_csid0"  (660 only)
>   - "csi1_ahb"
>   - "csi1"
>   - "csi1_phy"
>   - "csi1_pix"
>   - "csi1_rdi"
> + - "csi1_src"(845 only)
>   - "cphy_csid1"  (660 only)
>   - "csi2_ahb"(8996 only)
>   - "csi2"(8996 only)
>   - "csi2_phy"(8996 only)
>   - "csi2_pix"(8996 only)
>   - "csi2_rdi"(8996 only)
> + - "csi2_src"(845 only)
>   - "cphy_csid2"  (660 only)
>   - "csi3_ahb"(8996 only)
>   - "csi3"(8996 only)
> @@ -96,14 +114,25 @@ Qualcomm Camera Subsystem
>   - "csi3_rdi"(8996 only)
>   - "cphy_csid3"  (660 only)
>   - "ahb"
> + - "slow_ahb_src" 

[PATCH v4 1/2] dt-bindings: input: Create macros for cros-ec keymap

2021-01-08 Thread Philip Chen
In Chrome OS, the keyboard matrix can be split to two groups:

The keymap for the top row keys can be customized based on OEM
preference, while the keymap for the other keys is generic/fixed
across boards.

This patch creates marcos for the keymaps of these two groups, making
it easier to reuse the generic portion of keymap when we override the
keymap in the board-specific dts for custom top row design.

Signed-off-by: Philip Chen 
---

(no changes since v2)

Changes in v2:
- Rename CROS_STD_NON_TOP_ROW_KEYMAP to CROS_STD_MAIN_KEYMAP

 include/dt-bindings/input/cros-ec-keyboard.h | 103 +++
 1 file changed, 103 insertions(+)
 create mode 100644 include/dt-bindings/input/cros-ec-keyboard.h

diff --git a/include/dt-bindings/input/cros-ec-keyboard.h 
b/include/dt-bindings/input/cros-ec-keyboard.h
new file mode 100644
index 0..a37a8c5701219
--- /dev/null
+++ b/include/dt-bindings/input/cros-ec-keyboard.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This header provides the constants of the standard Chrome OS key matrix
+ * for cros-ec keyboard-controller bindings.
+ *
+ * Copyright (c) 2021 Google, Inc
+ */
+
+#ifndef _CROS_EC_KEYBOARD_H
+#define _CROS_EC_KEYBOARD_H
+
+#define CROS_STD_TOP_ROW_KEYMAP\
+   MATRIX_KEY(0x00, 0x02, KEY_F1)  \
+   MATRIX_KEY(0x03, 0x02, KEY_F2)  \
+   MATRIX_KEY(0x02, 0x02, KEY_F3)  \
+   MATRIX_KEY(0x01, 0x02, KEY_F4)  \
+   MATRIX_KEY(0x03, 0x04, KEY_F5)  \
+   MATRIX_KEY(0x02, 0x04, KEY_F6)  \
+   MATRIX_KEY(0x01, 0x04, KEY_F7)  \
+   MATRIX_KEY(0x02, 0x09, KEY_F8)  \
+   MATRIX_KEY(0x01, 0x09, KEY_F9)  \
+   MATRIX_KEY(0x00, 0x04, KEY_F10) \
+   MATRIX_KEY(0x03, 0x09, KEY_F13)
+
+#define CROS_STD_MAIN_KEYMAP   \
+   MATRIX_KEY(0x00, 0x01, KEY_LEFTMETA)\
+   MATRIX_KEY(0x00, 0x03, KEY_B)   \
+   MATRIX_KEY(0x00, 0x05, KEY_RO)  \
+   MATRIX_KEY(0x00, 0x06, KEY_N)   \
+   MATRIX_KEY(0x00, 0x08, KEY_EQUAL)   \
+   MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT)\
+   MATRIX_KEY(0x01, 0x01, KEY_ESC) \
+   MATRIX_KEY(0x01, 0x03, KEY_G)   \
+   MATRIX_KEY(0x01, 0x06, KEY_H)   \
+   MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE)  \
+   MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE)   \
+   MATRIX_KEY(0x01, 0x0c, KEY_HENKAN)  \
+   \
+   MATRIX_KEY(0x02, 0x00, KEY_LEFTCTRL)\
+   MATRIX_KEY(0x02, 0x01, KEY_TAB) \
+   MATRIX_KEY(0x02, 0x03, KEY_T)   \
+   MATRIX_KEY(0x02, 0x05, KEY_RIGHTBRACE)  \
+   MATRIX_KEY(0x02, 0x06, KEY_Y)   \
+   MATRIX_KEY(0x02, 0x07, KEY_102ND)   \
+   MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE)   \
+   MATRIX_KEY(0x02, 0x0a, KEY_YEN) \
+   \
+   MATRIX_KEY(0x03, 0x00, KEY_LEFTMETA)\
+   MATRIX_KEY(0x03, 0x01, KEY_GRAVE)   \
+   MATRIX_KEY(0x03, 0x03, KEY_5)   \
+   MATRIX_KEY(0x03, 0x06, KEY_6)   \
+   MATRIX_KEY(0x03, 0x08, KEY_MINUS)   \
+   MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH)   \
+   MATRIX_KEY(0x03, 0x0c, KEY_MUHENKAN)\
+   \
+   MATRIX_KEY(0x04, 0x00, KEY_RIGHTCTRL)   \
+   MATRIX_KEY(0x04, 0x01, KEY_A)   \
+   MATRIX_KEY(0x04, 0x02, KEY_D)   \
+   MATRIX_KEY(0x04, 0x03, KEY_F)   \
+   MATRIX_KEY(0x04, 0x04, KEY_S)   \
+   MATRIX_KEY(0x04, 0x05, KEY_K)   \
+   MATRIX_KEY(0x04, 0x06, KEY_J)   \
+   MATRIX_KEY(0x04, 0x08, KEY_SEMICOLON)   \
+   MATRIX_KEY(0x04, 0x09, KEY_L)   \
+   MATRIX_KEY(0x04, 0x0a, KEY_BACKSLASH)   \
+   MATRIX_KEY(0x04, 0x0b, KEY_ENTER)   \
+   \
+   MATRIX_KEY(0x05, 0x01, KEY_Z)   \
+   MATRIX_KEY(0x05, 0x02, KEY_C)   \
+   MATRIX_KEY(0x05, 0x03, KEY_V)   \
+   MATRIX_KEY(0x05, 0x04, KEY_X)   \
+   MATRIX_KEY(0x05, 0x05, KEY_COMMA)   \
+   MATRIX_KEY(0x05, 0x06, KEY_M)   \
+   MATRIX_KEY(0x05, 0x07, KEY_LEFTSHIFT)   \
+   MATRIX_KEY(0x05, 0x08, KEY_SLASH)   \
+   MATRIX_KEY(0x05, 0x09, KEY_DOT) \
+   MATRIX_KEY(0x05, 0x0b, KEY_SPACE)   \
+   \
+   MATRIX_KEY(0x06, 0x01, KEY_1)   \
+   MATRIX_KEY(0x06, 0x02, KEY_3)   \
+   MATRIX_KEY(0x06, 0x03, KEY_4)   \
+   MATRIX_KEY(0x06, 0x04, KEY_2)   \
+   MATRIX_KEY(0x06, 0x05, KEY_8)   \
+   MATRIX_KEY(0x06, 0x06, KEY_7)   \
+   MATRIX_KEY(0x06, 0x08, KEY_0)   \
+   MATRIX_KEY(0x06, 0x09, KEY_9)   \
+   MATRIX_KEY(0x06, 0x0a, KEY_LEFTALT) \
+   MATRIX_KEY(0x06, 0x0b, KEY_DOWN)\
+   MATRIX_KEY(0x06, 0x0c, KEY_RIGHT)   \
+ 

Covid-19 Relief F und

2021-01-08 Thread Relief Center
We know that this message may come to you as a surprise,You have been chosen  
to receive Covid-19 Relief Fund.

Reply toMr. Rei  Hoffman,  for  the release of the fund  and further 
direction immediately via  this Email:  mec...@xcontrol.it

Thanks,
Jeff  Lean


Re: [GIT PULL] Documentation fixes

2021-01-08 Thread pr-tracker-bot
The pull request you sent on Fri, 8 Jan 2021 10:31:20 -0700:

> git://git.lwn.net/linux.git tags/docs-5.11-3

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/50dbd96e4f31e28fc2fcc80abaabab4fb277227c

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html


Re: [RFC PATCH v2] pinctrl: add helper to expose pinctrl state in debugfs

2021-01-08 Thread Linus Walleij
Hi Drew,

sorry for belated review. The approach is so uncommon so it had me
confused.

On Thu, Dec 24, 2020 at 9:36 PM Drew Fustini  wrote:

> > > I used the compatible string "pinctrl,state-helper" but would appreciate
> > > advice on how to best name this. Should I create a new vendor prefix?
> >
> > Here is the first concern. Why does this require to be a driver with a
> > compatible string?
>
> I have not been able to figure out how to have different active pinctrl
> states for each header pins (for example P2 header pin 3) unless they
> are represented as DT nodes with their own compatible for this helper
> driver such as:
>
>  {
> P2_03_pinmux {
> compatible = "pinctrl,state-helper";
> pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", 
> "gpio_input", "pwm";
> pinctrl-0 = <_03_default_pin>;
> pinctrl-1 = <_03_gpio_pin>;
> pinctrl-2 = <_03_gpio_pu_pin>;
> pinctrl-3 = <_03_gpio_pd_pin>;
> pinctrl-4 = <_03_gpio_input_pin>;
> pinctrl-5 = <_03_pwm_pin>;
> };
> }

I do not think the DT people are going to appreciate this pseudo-device.

Can you not just represent them as pin control hogs and have the debugfs
code with the other debugfs code in drivers/pinctrl/core.c?

Normal drivers cannot play around with the state assigned to a
hog, but debugfs can certainly do that so go ahead and patch
the core.

> I can assign pinctrl states in the pin controller DT node which has
> compatible pinctrl-single (line 301 arch/arm/boot/dts/am33xx-l4.dtsi):
>
> _pinmux {
>
> pinctrl-names = "default", "gpio", "pwm";
> pinctrl-0 =   < _03_default_pin _34_default_pin 
> _19_default_pin _24_default_pin
> _33_default_pin _22_default_pin 
> _18_default_pin _10_default_pin
> _06_default_pin _04_default_pin 
> _02_default_pin _08_default_pin
> _17_default_pin >;
> pinctrl-1 =   < _03_gpio_pin _34_gpio_pin _19_gpio_pin 
> _24_gpio_pin
> _33_gpio_pin _22_gpio_pin _18_gpio_pin 
> _10_gpio_pin
> _06_gpio_pin _04_gpio_pin _02_gpio_pin 
> _08_gpio_pin
> _17_gpio_pin >;
> pinctrl-2 =   < _03_pwm _34_pwm _19_pwm _24_pwm
> _33_pwm _22_pwm _18_pwm _10_pwm
> _06_pwm _04_pwm _02_pwm _08_pwm
> _17_pwm >;
>
> }
>
> However, there is no way to later select "gpio" for P2.03 and select
> "pwm" for P1.34 at the same time.  Thus, I can not figure out a way to
> select independent states per pin unless I make a node for each pin that
> binds to a helper driver.
>
> It feels like there may be a simpler soluation but I can't see to figure
> it out.  Suggestions welcome!

I think maybe there is no solution because you are solving a problem
that only pinctrl-single while trying to stay generic? The single
driver is special in that it requires all states of pins to be encoded
into the device tree, but for debugging that is kind of unfriendly
which was mentioned in its inception. For deep debugging it is good
to let the core know of all available functions and groups and
single does not IIUC.

Yours,
Linus Walleij


[PATCH v4 2/2] ARM: dts: cros-ec-keyboard: Use keymap marcos

2021-01-08 Thread Philip Chen
The common cros-ec keymap has been defined as macros. This patch uses
the macros to simply linux,keymap in cros-ec-keyboard.dtsi file.

This patch also creates an alias for keyboard-controller to make it
easier to override the keymap in board-specific dts later.

Signed-off-by: Philip Chen 
---

Changes in v4:
- Fix a typo

Changes in v3:
- Create an alias for keyboard-controller

Changes in v2:
- Replace CROS_STD_NON_TOP_ROW_KEYMAP with CROS_STD_MAIN_KEYMAP

 arch/arm/boot/dts/cros-ec-keyboard.dtsi | 93 ++---
 1 file changed, 4 insertions(+), 89 deletions(-)

diff --git a/arch/arm/boot/dts/cros-ec-keyboard.dtsi 
b/arch/arm/boot/dts/cros-ec-keyboard.dtsi
index 165c5bcd510e5..55c4744fa7e7a 100644
--- a/arch/arm/boot/dts/cros-ec-keyboard.dtsi
+++ b/arch/arm/boot/dts/cros-ec-keyboard.dtsi
@@ -6,103 +6,18 @@
 */
 
 #include 
+#include 
 
 _ec {
-   keyboard-controller {
+   keyboard_controller: keyboard-controller {
compatible = "google,cros-ec-keyb";
keypad,num-rows = <8>;
keypad,num-columns = <13>;
google,needs-ghost-filter;
 
linux,keymap = <
-   MATRIX_KEY(0x00, 0x01, KEY_LEFTMETA)
-   MATRIX_KEY(0x00, 0x02, KEY_F1)
-   MATRIX_KEY(0x00, 0x03, KEY_B)
-   MATRIX_KEY(0x00, 0x04, KEY_F10)
-   MATRIX_KEY(0x00, 0x05, KEY_RO)
-   MATRIX_KEY(0x00, 0x06, KEY_N)
-   MATRIX_KEY(0x00, 0x08, KEY_EQUAL)
-   MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT)
-
-   MATRIX_KEY(0x01, 0x01, KEY_ESC)
-   MATRIX_KEY(0x01, 0x02, KEY_F4)
-   MATRIX_KEY(0x01, 0x03, KEY_G)
-   MATRIX_KEY(0x01, 0x04, KEY_F7)
-   MATRIX_KEY(0x01, 0x06, KEY_H)
-   MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE)
-   MATRIX_KEY(0x01, 0x09, KEY_F9)
-   MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE)
-   MATRIX_KEY(0x01, 0x0c, KEY_HENKAN)
-
-   MATRIX_KEY(0x02, 0x00, KEY_LEFTCTRL)
-   MATRIX_KEY(0x02, 0x01, KEY_TAB)
-   MATRIX_KEY(0x02, 0x02, KEY_F3)
-   MATRIX_KEY(0x02, 0x03, KEY_T)
-   MATRIX_KEY(0x02, 0x04, KEY_F6)
-   MATRIX_KEY(0x02, 0x05, KEY_RIGHTBRACE)
-   MATRIX_KEY(0x02, 0x06, KEY_Y)
-   MATRIX_KEY(0x02, 0x07, KEY_102ND)
-   MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE)
-   MATRIX_KEY(0x02, 0x09, KEY_F8)
-   MATRIX_KEY(0x02, 0x0a, KEY_YEN)
-
-   MATRIX_KEY(0x03, 0x00, KEY_LEFTMETA)
-   MATRIX_KEY(0x03, 0x01, KEY_GRAVE)
-   MATRIX_KEY(0x03, 0x02, KEY_F2)
-   MATRIX_KEY(0x03, 0x03, KEY_5)
-   MATRIX_KEY(0x03, 0x04, KEY_F5)
-   MATRIX_KEY(0x03, 0x06, KEY_6)
-   MATRIX_KEY(0x03, 0x08, KEY_MINUS)
-   MATRIX_KEY(0x03, 0x09, KEY_F13)
-   MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH)
-   MATRIX_KEY(0x03, 0x0c, KEY_MUHENKAN)
-
-   MATRIX_KEY(0x04, 0x00, KEY_RIGHTCTRL)
-   MATRIX_KEY(0x04, 0x01, KEY_A)
-   MATRIX_KEY(0x04, 0x02, KEY_D)
-   MATRIX_KEY(0x04, 0x03, KEY_F)
-   MATRIX_KEY(0x04, 0x04, KEY_S)
-   MATRIX_KEY(0x04, 0x05, KEY_K)
-   MATRIX_KEY(0x04, 0x06, KEY_J)
-   MATRIX_KEY(0x04, 0x08, KEY_SEMICOLON)
-   MATRIX_KEY(0x04, 0x09, KEY_L)
-   MATRIX_KEY(0x04, 0x0a, KEY_BACKSLASH)
-   MATRIX_KEY(0x04, 0x0b, KEY_ENTER)
-
-   MATRIX_KEY(0x05, 0x01, KEY_Z)
-   MATRIX_KEY(0x05, 0x02, KEY_C)
-   MATRIX_KEY(0x05, 0x03, KEY_V)
-   MATRIX_KEY(0x05, 0x04, KEY_X)
-   MATRIX_KEY(0x05, 0x05, KEY_COMMA)
-   MATRIX_KEY(0x05, 0x06, KEY_M)
-   MATRIX_KEY(0x05, 0x07, KEY_LEFTSHIFT)
-   MATRIX_KEY(0x05, 0x08, KEY_SLASH)
-   MATRIX_KEY(0x05, 0x09, KEY_DOT)
-   MATRIX_KEY(0x05, 0x0b, KEY_SPACE)
-
-   MATRIX_KEY(0x06, 0x01, KEY_1)
-   MATRIX_KEY(0x06, 0x02, KEY_3)
-   MATRIX_KEY(0x06, 0x03, KEY_4)
-   MATRIX_KEY(0x06, 0x04, KEY_2)
-   MATRIX_KEY(0x06, 0x05, KEY_8)
-   MATRIX_KEY(0x06, 0x06, KEY_7)
-   MATRIX_KEY(0x06, 0x08, KEY_0)
-   MATRIX_KEY(0x06, 0x09, KEY_9)
-   

Re: [GIT PULL] KUnit update for Linux 5.11-rc3

2021-01-08 Thread pr-tracker-bot
The pull request you sent on Fri, 8 Jan 2021 11:57:56 -0700:

> git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest 
> tags/linux-kselftest-kunit-fixes-5.11-rc3

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/263da3330f6c0e4af603ec62f291e43eb3001f7b

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html


Re: [GIT PULL] Kselftest fixes for Linux 5.11-rc3

2021-01-08 Thread pr-tracker-bot
The pull request you sent on Fri, 8 Jan 2021 11:14:39 -0700:

> git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest 
> tags/linux-kselftest-next-5.11-rc3

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/8f3d8491d03594823a7f7d71d5063e1bcd03c75c

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html


Re: [PATCH RFC cpumask 4/5] cpumask: Add "last" alias for cpu list specifications

2021-01-08 Thread Peter Zijlstra
On Thu, Jan 07, 2021 at 06:47:57AM -0800, Paul E. McKenney wrote:
> > I don't really see the use of the ranges thing, CPU enumeration just
> > isn't sane like that. Also, I should really add that randomization pass
> > to the CPU enumeration :-)
> 
> Please don't!!!

Why not, the BIOS more or less already does that on a per machine basis
anyway. Doing it per boot just makes things more reliably screwy ;-)


[PATCH v3 1/2] dt-bindings: input: Create macros for cros-ec keymap

2021-01-08 Thread Philip Chen
In Chrome OS, the keyboard matrix can be split to two groups:

The keymap for the top row keys can be customized based on OEM
preference, while the keymap for the other keys is generic/fixed
across boards.

This patch creates marcos for the keymaps of these two groups, making
it easier to reuse the generic portion of keymap when we override the
keymap in the board-specific dts for custom top row design.

Signed-off-by: Philip Chen 
---

(no changes since v2)

Changes in v2:
- Rename CROS_STD_NON_TOP_ROW_KEYMAP to CROS_STD_MAIN_KEYMAP

 include/dt-bindings/input/cros-ec-keyboard.h | 103 +++
 1 file changed, 103 insertions(+)
 create mode 100644 include/dt-bindings/input/cros-ec-keyboard.h

diff --git a/include/dt-bindings/input/cros-ec-keyboard.h 
b/include/dt-bindings/input/cros-ec-keyboard.h
new file mode 100644
index 0..a37a8c5701219
--- /dev/null
+++ b/include/dt-bindings/input/cros-ec-keyboard.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This header provides the constants of the standard Chrome OS key matrix
+ * for cros-ec keyboard-controller bindings.
+ *
+ * Copyright (c) 2021 Google, Inc
+ */
+
+#ifndef _CROS_EC_KEYBOARD_H
+#define _CROS_EC_KEYBOARD_H
+
+#define CROS_STD_TOP_ROW_KEYMAP\
+   MATRIX_KEY(0x00, 0x02, KEY_F1)  \
+   MATRIX_KEY(0x03, 0x02, KEY_F2)  \
+   MATRIX_KEY(0x02, 0x02, KEY_F3)  \
+   MATRIX_KEY(0x01, 0x02, KEY_F4)  \
+   MATRIX_KEY(0x03, 0x04, KEY_F5)  \
+   MATRIX_KEY(0x02, 0x04, KEY_F6)  \
+   MATRIX_KEY(0x01, 0x04, KEY_F7)  \
+   MATRIX_KEY(0x02, 0x09, KEY_F8)  \
+   MATRIX_KEY(0x01, 0x09, KEY_F9)  \
+   MATRIX_KEY(0x00, 0x04, KEY_F10) \
+   MATRIX_KEY(0x03, 0x09, KEY_F13)
+
+#define CROS_STD_MAIN_KEYMAP   \
+   MATRIX_KEY(0x00, 0x01, KEY_LEFTMETA)\
+   MATRIX_KEY(0x00, 0x03, KEY_B)   \
+   MATRIX_KEY(0x00, 0x05, KEY_RO)  \
+   MATRIX_KEY(0x00, 0x06, KEY_N)   \
+   MATRIX_KEY(0x00, 0x08, KEY_EQUAL)   \
+   MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT)\
+   MATRIX_KEY(0x01, 0x01, KEY_ESC) \
+   MATRIX_KEY(0x01, 0x03, KEY_G)   \
+   MATRIX_KEY(0x01, 0x06, KEY_H)   \
+   MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE)  \
+   MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE)   \
+   MATRIX_KEY(0x01, 0x0c, KEY_HENKAN)  \
+   \
+   MATRIX_KEY(0x02, 0x00, KEY_LEFTCTRL)\
+   MATRIX_KEY(0x02, 0x01, KEY_TAB) \
+   MATRIX_KEY(0x02, 0x03, KEY_T)   \
+   MATRIX_KEY(0x02, 0x05, KEY_RIGHTBRACE)  \
+   MATRIX_KEY(0x02, 0x06, KEY_Y)   \
+   MATRIX_KEY(0x02, 0x07, KEY_102ND)   \
+   MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE)   \
+   MATRIX_KEY(0x02, 0x0a, KEY_YEN) \
+   \
+   MATRIX_KEY(0x03, 0x00, KEY_LEFTMETA)\
+   MATRIX_KEY(0x03, 0x01, KEY_GRAVE)   \
+   MATRIX_KEY(0x03, 0x03, KEY_5)   \
+   MATRIX_KEY(0x03, 0x06, KEY_6)   \
+   MATRIX_KEY(0x03, 0x08, KEY_MINUS)   \
+   MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH)   \
+   MATRIX_KEY(0x03, 0x0c, KEY_MUHENKAN)\
+   \
+   MATRIX_KEY(0x04, 0x00, KEY_RIGHTCTRL)   \
+   MATRIX_KEY(0x04, 0x01, KEY_A)   \
+   MATRIX_KEY(0x04, 0x02, KEY_D)   \
+   MATRIX_KEY(0x04, 0x03, KEY_F)   \
+   MATRIX_KEY(0x04, 0x04, KEY_S)   \
+   MATRIX_KEY(0x04, 0x05, KEY_K)   \
+   MATRIX_KEY(0x04, 0x06, KEY_J)   \
+   MATRIX_KEY(0x04, 0x08, KEY_SEMICOLON)   \
+   MATRIX_KEY(0x04, 0x09, KEY_L)   \
+   MATRIX_KEY(0x04, 0x0a, KEY_BACKSLASH)   \
+   MATRIX_KEY(0x04, 0x0b, KEY_ENTER)   \
+   \
+   MATRIX_KEY(0x05, 0x01, KEY_Z)   \
+   MATRIX_KEY(0x05, 0x02, KEY_C)   \
+   MATRIX_KEY(0x05, 0x03, KEY_V)   \
+   MATRIX_KEY(0x05, 0x04, KEY_X)   \
+   MATRIX_KEY(0x05, 0x05, KEY_COMMA)   \
+   MATRIX_KEY(0x05, 0x06, KEY_M)   \
+   MATRIX_KEY(0x05, 0x07, KEY_LEFTSHIFT)   \
+   MATRIX_KEY(0x05, 0x08, KEY_SLASH)   \
+   MATRIX_KEY(0x05, 0x09, KEY_DOT) \
+   MATRIX_KEY(0x05, 0x0b, KEY_SPACE)   \
+   \
+   MATRIX_KEY(0x06, 0x01, KEY_1)   \
+   MATRIX_KEY(0x06, 0x02, KEY_3)   \
+   MATRIX_KEY(0x06, 0x03, KEY_4)   \
+   MATRIX_KEY(0x06, 0x04, KEY_2)   \
+   MATRIX_KEY(0x06, 0x05, KEY_8)   \
+   MATRIX_KEY(0x06, 0x06, KEY_7)   \
+   MATRIX_KEY(0x06, 0x08, KEY_0)   \
+   MATRIX_KEY(0x06, 0x09, KEY_9)   \
+   MATRIX_KEY(0x06, 0x0a, KEY_LEFTALT) \
+   MATRIX_KEY(0x06, 0x0b, KEY_DOWN)\
+   MATRIX_KEY(0x06, 0x0c, KEY_RIGHT)   \
+ 

Re: [PATCH] mmc: sdhci-iproc: Add ACPI bindings for the rpi4

2021-01-08 Thread kernel test robot
Hi Jeremy,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.11-rc2 next-20210108]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Jeremy-Linton/mmc-sdhci-iproc-Add-ACPI-bindings-for-the-rpi4/20210109-051645
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
6279d812eab67a6df6b22fa495201db6f2305924
config: riscv-randconfig-r012-20210108 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 
bc556e5685c0f97e79fb7b3c6f15cc5062db8e36)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install riscv cross compiling tool for clang build
# apt-get install binutils-riscv64-linux-gnu
# 
https://github.com/0day-ci/linux/commit/659eacf5a5de971ea94390dd6c7443c82d53ea5e
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Jeremy-Linton/mmc-sdhci-iproc-Add-ACPI-bindings-for-the-rpi4/20210109-051645
git checkout 659eacf5a5de971ea94390dd6c7443c82d53ea5e
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=riscv 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

   In file included from drivers/mmc/host/sdhci-iproc.c:24:
   In file included from drivers/mmc/host/sdhci-pltfm.h:13:
   In file included from drivers/mmc/host/sdhci.h:13:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/riscv/include/asm/io.h:149:
   include/asm-generic/io.h:556:9: warning: performing pointer arithmetic on a 
null pointer has undefined behavior [-Wnull-pointer-arithmetic]
   return inb(addr);
  ^
   arch/riscv/include/asm/io.h:55:76: note: expanded from macro 'inb'
   #define inb(c)  ({ u8  __v; __io_pbr(); __v = 
readb_cpu((void*)(PCI_IOBASE + (c))); __io_par(__v); __v; })
   
~~ ^
   arch/riscv/include/asm/mmio.h:87:48: note: expanded from macro 'readb_cpu'
   #define readb_cpu(c)({ u8  __r = __raw_readb(c); __r; })
^
   In file included from drivers/mmc/host/sdhci-iproc.c:24:
   In file included from drivers/mmc/host/sdhci-pltfm.h:13:
   In file included from drivers/mmc/host/sdhci.h:13:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/riscv/include/asm/io.h:149:
   include/asm-generic/io.h:564:9: warning: performing pointer arithmetic on a 
null pointer has undefined behavior [-Wnull-pointer-arithmetic]
   return inw(addr);
  ^
   arch/riscv/include/asm/io.h:56:76: note: expanded from macro 'inw'
   #define inw(c)  ({ u16 __v; __io_pbr(); __v = 
readw_cpu((void*)(PCI_IOBASE + (c))); __io_par(__v); __v; })
   
~~ ^
   arch/riscv/include/asm/mmio.h:88:76: note: expanded from macro 'readw_cpu'
   #define readw_cpu(c)({ u16 __r = le16_to_cpu((__force 
__le16)__raw_readw(c)); __r; })

^
   include/uapi/linux/byteorder/little_endian.h:36:51: note: expanded from 
macro '__le16_to_cpu'
   #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
 ^
   In file included from drivers/mmc/host/sdhci-iproc.c:24:
   In file included from drivers/mmc/host/sdhci-pltfm.h:13:
   In file included from drivers/mmc/host/sdhci.h:13:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/riscv/include/asm/io.h:149:
   include/asm-generic/io.h:572:9: warning: performing pointer arithmetic on a 
null pointer has undefined behavior [-Wnull-pointer-arithmetic]
   return inl(addr);
  ^
   arch/riscv/include/asm/io.h:57:76: note: expanded from macro 'inl'
   #define inl(c)  ({ u32 __v; __io_pbr(); __v = 
readl_cpu((void*)(PCI_IOBASE + (c))); __io_par(__v); __v; })
   
~~ ^
   arch/riscv/include/asm/mmio.h:89:76: note: expanded from macro 'readl_cpu'
   #define readl_cpu(c)({ u32 __r = le32_to_cpu((__force 
__le32)__raw_readl(c)); __r; })

^
   include/uapi/linux/byteorder/little_endian.h:34:51

[PATCH v3 2/2] ARM: dts: cros-ec-keyboard: Use keymap marcos

2021-01-08 Thread Philip Chen
The common cros-ec keymap has been defined as macros. This patch uses
the macros to simply linux,keymap in cros-ec-keyboard.dtsi file.

This patch also creates an alias for keyboard-controller to make it
easier to override the keymap in board-specific dts later.

Signed-off-by: Philip Chen 
---

Changes in v3:
- Create an alias for keyboard-controller

Changes in v2:
- Replace CROS_STD_NON_TOP_ROW_KEYMAP with CROS_STD_MAIN_KEYMAP

 arch/arm/boot/dts/cros-ec-keyboard.dtsi | 93 ++---
 1 file changed, 4 insertions(+), 89 deletions(-)

diff --git a/arch/arm/boot/dts/cros-ec-keyboard.dtsi 
b/arch/arm/boot/dts/cros-ec-keyboard.dtsi
index 165c5bcd510e5..dae2727cec4c8 100644
--- a/arch/arm/boot/dts/cros-ec-keyboard.dtsi
+++ b/arch/arm/boot/dts/cros-ec-keyboard.dtsi
@@ -6,103 +6,18 @@
 */
 
 #include 
+#include 
 
 _ec {
-   keyboard-controller {
+   kkeyboard_controller: keyboard-controller {
compatible = "google,cros-ec-keyb";
keypad,num-rows = <8>;
keypad,num-columns = <13>;
google,needs-ghost-filter;
 
linux,keymap = <
-   MATRIX_KEY(0x00, 0x01, KEY_LEFTMETA)
-   MATRIX_KEY(0x00, 0x02, KEY_F1)
-   MATRIX_KEY(0x00, 0x03, KEY_B)
-   MATRIX_KEY(0x00, 0x04, KEY_F10)
-   MATRIX_KEY(0x00, 0x05, KEY_RO)
-   MATRIX_KEY(0x00, 0x06, KEY_N)
-   MATRIX_KEY(0x00, 0x08, KEY_EQUAL)
-   MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT)
-
-   MATRIX_KEY(0x01, 0x01, KEY_ESC)
-   MATRIX_KEY(0x01, 0x02, KEY_F4)
-   MATRIX_KEY(0x01, 0x03, KEY_G)
-   MATRIX_KEY(0x01, 0x04, KEY_F7)
-   MATRIX_KEY(0x01, 0x06, KEY_H)
-   MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE)
-   MATRIX_KEY(0x01, 0x09, KEY_F9)
-   MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE)
-   MATRIX_KEY(0x01, 0x0c, KEY_HENKAN)
-
-   MATRIX_KEY(0x02, 0x00, KEY_LEFTCTRL)
-   MATRIX_KEY(0x02, 0x01, KEY_TAB)
-   MATRIX_KEY(0x02, 0x02, KEY_F3)
-   MATRIX_KEY(0x02, 0x03, KEY_T)
-   MATRIX_KEY(0x02, 0x04, KEY_F6)
-   MATRIX_KEY(0x02, 0x05, KEY_RIGHTBRACE)
-   MATRIX_KEY(0x02, 0x06, KEY_Y)
-   MATRIX_KEY(0x02, 0x07, KEY_102ND)
-   MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE)
-   MATRIX_KEY(0x02, 0x09, KEY_F8)
-   MATRIX_KEY(0x02, 0x0a, KEY_YEN)
-
-   MATRIX_KEY(0x03, 0x00, KEY_LEFTMETA)
-   MATRIX_KEY(0x03, 0x01, KEY_GRAVE)
-   MATRIX_KEY(0x03, 0x02, KEY_F2)
-   MATRIX_KEY(0x03, 0x03, KEY_5)
-   MATRIX_KEY(0x03, 0x04, KEY_F5)
-   MATRIX_KEY(0x03, 0x06, KEY_6)
-   MATRIX_KEY(0x03, 0x08, KEY_MINUS)
-   MATRIX_KEY(0x03, 0x09, KEY_F13)
-   MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH)
-   MATRIX_KEY(0x03, 0x0c, KEY_MUHENKAN)
-
-   MATRIX_KEY(0x04, 0x00, KEY_RIGHTCTRL)
-   MATRIX_KEY(0x04, 0x01, KEY_A)
-   MATRIX_KEY(0x04, 0x02, KEY_D)
-   MATRIX_KEY(0x04, 0x03, KEY_F)
-   MATRIX_KEY(0x04, 0x04, KEY_S)
-   MATRIX_KEY(0x04, 0x05, KEY_K)
-   MATRIX_KEY(0x04, 0x06, KEY_J)
-   MATRIX_KEY(0x04, 0x08, KEY_SEMICOLON)
-   MATRIX_KEY(0x04, 0x09, KEY_L)
-   MATRIX_KEY(0x04, 0x0a, KEY_BACKSLASH)
-   MATRIX_KEY(0x04, 0x0b, KEY_ENTER)
-
-   MATRIX_KEY(0x05, 0x01, KEY_Z)
-   MATRIX_KEY(0x05, 0x02, KEY_C)
-   MATRIX_KEY(0x05, 0x03, KEY_V)
-   MATRIX_KEY(0x05, 0x04, KEY_X)
-   MATRIX_KEY(0x05, 0x05, KEY_COMMA)
-   MATRIX_KEY(0x05, 0x06, KEY_M)
-   MATRIX_KEY(0x05, 0x07, KEY_LEFTSHIFT)
-   MATRIX_KEY(0x05, 0x08, KEY_SLASH)
-   MATRIX_KEY(0x05, 0x09, KEY_DOT)
-   MATRIX_KEY(0x05, 0x0b, KEY_SPACE)
-
-   MATRIX_KEY(0x06, 0x01, KEY_1)
-   MATRIX_KEY(0x06, 0x02, KEY_3)
-   MATRIX_KEY(0x06, 0x03, KEY_4)
-   MATRIX_KEY(0x06, 0x04, KEY_2)
-   MATRIX_KEY(0x06, 0x05, KEY_8)
-   MATRIX_KEY(0x06, 0x06, KEY_7)
-   MATRIX_KEY(0x06, 0x08, KEY_0)
-   MATRIX_KEY(0x06, 0x09, KEY_9)
-   MATRIX_KEY(0x06, 0x0a, KEY_LEFTALT)
-  

[rcu:dev.2021.01.05a] BUILD SUCCESS 9affdc6963f87b92130cc76a2728e49aaf4beab8

2021-01-08 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git  
dev.2021.01.05a
branch HEAD: 9affdc6963f87b92130cc76a2728e49aaf4beab8  squash! x86/mce: Make 
mce_timed_out() identify holdout CPUs

elapsed time: 1409m

configs tested: 131
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm defconfig
arm64allyesconfig
arm64   defconfig
arm  allyesconfig
arm  allmodconfig
nios2allyesconfig
powerpcmvme5100_defconfig
mips  ath25_defconfig
sh shx3_defconfig
m68k  multi_defconfig
sh  lboxre2_defconfig
arc haps_hs_smp_defconfig
um   x86_64_defconfig
sparc64  alldefconfig
armoxnas_v6_defconfig
sh   se7721_defconfig
powerpc  pcm030_defconfig
powerpc mpc832x_rdb_defconfig
m68k  amiga_defconfig
m68k   bvme6000_defconfig
xtensa  iss_defconfig
powerpc  makalu_defconfig
arm   h3600_defconfig
m68k  atari_defconfig
sparc   sparc64_defconfig
c6xevmc6472_defconfig
xtensageneric_kc705_defconfig
mips  decstation_64_defconfig
shsh7785lcr_defconfig
mipsworkpad_defconfig
mipsomega2p_defconfig
shtitan_defconfig
powerpc  ppc64e_defconfig
shdreamcast_defconfig
arm socfpga_defconfig
powerpc mpc8560_ads_defconfig
powerpc taishan_defconfig
powerpc  mpc885_ads_defconfig
mipsmalta_kvm_guest_defconfig
i386 allyesconfig
powerpc   ppc64_defconfig
powerpc tqm8560_defconfig
mips cobalt_defconfig
arm at91_dt_defconfig
powerpcgamecube_defconfig
arm orion5x_defconfig
arm  collie_defconfig
arm   sunxi_defconfig
nios2 3c120_defconfig
mips bigsur_defconfig
powerpc  storcenter_defconfig
powerpcmpc7448_hpc2_defconfig
m68km5272c3_defconfig
sh ecovec24_defconfig
arm eseries_pxa_defconfig
sh  r7780mp_defconfig
powerpc  acadia_defconfig
arc  axs103_defconfig
mips   ip27_defconfig
m68km5307c3_defconfig
arm  badge4_defconfig
sh   rts7751r2dplus_defconfig
ia64 allmodconfig
ia64defconfig
ia64 allyesconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
arc  allyesconfig
nds32 allnoconfig
c6x  allyesconfig
nds32   defconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
parisc   allyesconfig
s390defconfig
sparcallyesconfig
sparc   defconfig
i386   tinyconfig
i386defconfig
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
x86_64   randconfig-a004-20210108
x86_64   randconfig-a006-20210108
x86_64   randconfig-a001-20210108
x86_64   randconfig-a002-20210108
x86_64   randconfig-a003-20210108
x86_64   randconfig-a005-20210108
i386 randconfig-a005

Re: [PATCH RFC cpumask 4/5] cpumask: Add "last" alias for cpu list specifications

2021-01-08 Thread Peter Zijlstra
On Wed, Jan 06, 2021 at 01:16:50PM -0800, Yury Norov wrote:
> On Wed, Jan 6, 2021 at 1:50 AM Peter Zijlstra  wrote:

> > Aside from the comments Yury made, on how all this is better in
> > bitmap_parselist(), how about doing s/last/N/ here? For me something
> > like: "4-N" reads much saner than "4-last".
> >
> > Also, it might make sense to teach all this about core/node topology,
> > but that's going to be messy. Imagine something like "Core1-CoreN" or
> > "Nore1-NodeN" to mean the mask all/{Core,Node}0.
> 
> If you just want to teach bitmap_parselist() to "s/Core0/0-4",  I think
> it's doable if we add a hook to a proper subsystem in bitmap_parselist().
> 
> > And that is another feature that seems to be missing from parselist,
> > all/except.
> 
> We already support groups in a range. I think it partially covers the
> proposed all/except.
> 
> Can you share examples on what you miss?

The obvious one is the "all/Core0" example above, which would be
equivalent to "Core1-CoreN".

Another case that I don't think we can do today is something like, give
me SMT0 of each core.

I don't really see the use of the ranges thing, CPU enumeration just
isn't sane like that. Also, I should really add that randomization pass
to the CPU enumeration :-)



[PATCH v9 2/2] PCI/ERR: Split the fatal and non-fatal error recovery handling

2021-01-08 Thread Kuppuswamy Sathyanarayanan
Commit bdb5ac85777d ("PCI/ERR: Handle fatal error recovery")
merged fatal and non-fatal error recovery paths, and also made
recovery code depend on hotplug handler for "remove affected
device + rescan" support. But this change also complicated the
error recovery path and which in turn led to the following
issues.

1. We depend on hotplug handler for removing the affected
devices/drivers on DLLSC LINK down event (on DPC event
trigger) and DPC handler for handling the error recovery. Since
both handlers operate on same set of affected devices, it leads
to race condition, which in turn leads to  NULL pointer
exceptions or error recovery failures.You can find more details
about this issue in following link.

https://lore.kernel.org/linux-pci/20201007113158.48933-1-haifeng.z...@intel.com/T/#t

2. For non-hotplug capable devices fatal (DPC) error recovery
is currently broken. Current fatal error recovery implementation
relies on PCIe hotplug (pciehp) handler for detaching and
re-enumerating the affected devices/drivers. So when dealing with
non-hotplug capable devices, recovery code does not restore the state
of the affected devices correctly. You can find more details about
this issue in the following links.

https://lore.kernel.org/linux-pci/20200527083130.4137-1-zhiqiang@nxp.com/
https://lore.kernel.org/linux-pci/12115.1588207324@famine/
https://lore.kernel.org/linux-pci/0e6f89cd6b9e4a72293cc90fafe93487d7c2d295.158584.git.sathyanarayanan.kuppusw...@linux.intel.com/

In order to fix the above two issues, we should stop relying on hotplug
handler for cleaning the affected devices/drivers and let error recovery
handler own this functionality. So this patch reverts Commit bdb5ac85777d
("PCI/ERR: Handle fatal error recovery") and re-introduce the  "remove
affected device + rescan"  functionality in fatal error recovery handler.

Also holding pci_lock_rescan_remove() will prevent the race between hotplug
and DPC handler.

Fixes: bdb5ac85777d ("PCI/ERR: Handle fatal error recovery")
Signed-off-by: Kuppuswamy Sathyanarayanan 

Reviewed-by: Sinan Kaya 
---
 Changes since v8:
  * Rebased on top of v5.11-rc1.

 Changes since v7:
  * Rebased on top of v5.10-rc1.

 Changes since v6:
  * Renamed pcie_do_nonfatal_recovery() to pcie_nonfatal_recovery().
  * Renamed pcie_do_fatal_recovery() to pcie_fatal_recovery().
  * Addressed some format issues.

 Changes since v5:
  * Fixed static/non-static declartion issue.

 Changes since v4:
  * Added new interfaces for error recovery (pcie_do_fatal_recovery()
and pcie_do_nonfatal_recovery()).
 Documentation/PCI/pci-error-recovery.rst | 47 +++--
 Documentation/PCI/pcieaer-howto.rst  |  2 +-
 drivers/pci/pci.h|  7 +--
 drivers/pci/pcie/aer.c   | 10 ++--
 drivers/pci/pcie/dpc.c   |  2 +-
 drivers/pci/pcie/edr.c   |  2 +-
 drivers/pci/pcie/err.c   | 67 +---
 7 files changed, 91 insertions(+), 46 deletions(-)

diff --git a/Documentation/PCI/pci-error-recovery.rst 
b/Documentation/PCI/pci-error-recovery.rst
index 84ceebb08cac..830c8af5838b 100644
--- a/Documentation/PCI/pci-error-recovery.rst
+++ b/Documentation/PCI/pci-error-recovery.rst
@@ -115,7 +115,7 @@ The actual steps taken by a platform to recover from a PCI 
error
 event will be platform-dependent, but will follow the general
 sequence described below.
 
-STEP 0: Error Event
+STEP 0: Error Event: ERR_NONFATAL
 ---
 A PCI bus error is detected by the PCI hardware.  On powerpc, the slot
 is isolated, in that all I/O is blocked: all reads return 0x,
@@ -160,10 +160,10 @@ particular, if the platform doesn't isolate slots), and 
recovery
 proceeds to STEP 2 (MMIO Enable).
 
 If any driver requested a slot reset (by returning PCI_ERS_RESULT_NEED_RESET),
-then recovery proceeds to STEP 4 (Slot Reset).
+then recovery proceeds to STEP 3 (Slot Reset).
 
 If the platform is unable to recover the slot, the next step
-is STEP 6 (Permanent Failure).
+is STEP 5 (Permanent Failure).
 
 .. note::
 
@@ -198,7 +198,7 @@ reset or some such, but not restart operations. This 
callback is made if
 all drivers on a segment agree that they can try to recover and if no automatic
 link reset was performed by the HW. If the platform can't just re-enable IOs
 without a slot reset or a link reset, it will not call this callback, and
-instead will have gone directly to STEP 3 (Link Reset) or STEP 4 (Slot Reset)
+instead will have gone directly to STEP 3 (Slot Reset)
 
 .. note::
 
@@ -233,18 +233,12 @@ The driver should return one of the following result 
codes:
 
 The next step taken depends on the results returned by the drivers.
 If all drivers returned PCI_ERS_RESULT_RECOVERED, then the platform
-proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations).
+proceeds to STEP 4 (Resume Operations).
 
 If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform
-proceeds to STEP 4 

[PATCH v9 1/2] PCI/ERR: Call pci_bus_reset() before calling ->slot_reset() callback

2021-01-08 Thread Kuppuswamy Sathyanarayanan
Currently if report_error_detected() or report_mmio_enabled()
functions requests PCI_ERS_RESULT_NEED_RESET, current
pcie_do_recovery() implementation does not do the requested
explicit device reset, but instead just calls the
report_slot_reset() on all affected devices. Notifying about the
reset via report_slot_reset() without doing the actual device
reset is incorrect. So call pci_bus_reset() before triggering
->slot_reset() callback.

Signed-off-by: Kuppuswamy Sathyanarayanan 

Reviewed-by: Sinan Kaya 
Reviewed-by: Ashok Raj 
---
 Changes since v7:
  * Rebased on top of v5.11-rc1.

 Changes since v7:
  * Rebased on top of v5.10-rc1.

 Changes since v6:
  * None.

 Changes since v5:
  * Added Ashok's Reviewed-by tag.

 Changes since v4:
  * Added check for pci_reset_bus() return value.

 drivers/pci/pcie/err.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 510f31f0ef6d..6c19e9948232 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -177,6 +177,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
struct pci_dev *bridge;
pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+   int ret;
 
/*
 * If the error was detected by a Root Port, Downstream Port, RCEC,
@@ -214,11 +215,12 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
}
 
if (status == PCI_ERS_RESULT_NEED_RESET) {
-   /*
-* TODO: Should call platform-specific
-* functions to reset slot before calling
-* drivers' slot_reset callbacks?
-*/
+   ret = pci_reset_bus(bridge);
+   if (ret < 0) {
+   pci_err(dev, "Failed to reset %d\n", ret);
+   status = PCI_ERS_RESULT_DISCONNECT;
+   goto failed;
+   }
status = PCI_ERS_RESULT_RECOVERED;
pci_dbg(bridge, "broadcast slot_reset message\n");
pci_walk_bridge(bridge, report_slot_reset, );
-- 
2.25.1



Re: [PATCH 1/1] mm/madvise: replace ptrace attach requirement for process_madvise

2021-01-08 Thread David Rientjes
On Fri, 8 Jan 2021, Suren Baghdasaryan wrote:

> > > @@ -1197,12 +1197,22 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, 
> > > const struct iovec __user *, vec,
> > >   goto release_task;
> > >   }
> > >
> > > - mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
> > > + /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
> > > + mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
> > >   if (IS_ERR_OR_NULL(mm)) {
> > >   ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
> > >   goto release_task;
> > >   }
> > >
> > > + /*
> > > +  * Require CAP_SYS_NICE for influencing process performance. Note 
> > > that
> > > +  * only non-destructive hints are currently supported.
> > > +  */
> > > + if (!capable(CAP_SYS_NICE)) {
> > > + ret = -EPERM;
> > > + goto release_task;
> >
> > mmput?
> 
> Ouch! Thanks for pointing it out! Will include in the next respin.
> 

With the fix, feel free to add:

Acked-by: David Rientjes 

Thanks Suren!


Re: [RFC PATCH v2] selinux: security: Move selinux_state to a separate page

2021-01-08 Thread Nick Desaulniers
Via:
https://lore.kernel.org/lkml/1610099389-28329-1-git-send-email-pna...@codeaurora.org/

> diff --git a/include/linux/init.h b/include/linux/init.h
> index 7b53cb3..617adcf 100644
> --- a/include/linux/init.h
> +++ b/include/linux/init.h
> @@ -300,6 +300,10 @@ void __init parse_early_options(char *cmdline);
>  /* Data marked not to be saved by software suspend */
>  #define __nosavedata __section(".data..nosave")
>  
> +#ifdef CONFIG_SECURITY_RTIC
> +#define __rticdata  __section(".bss.rtic")

if you put:

#else
#define __rticdata

here, then you wouldn't need to label each datum you put in there.

> +#endif
> +
>  #ifdef MODULE
>  #define __exit_p(x) x
>  #else

> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -104,7 +104,11 @@
>  #include "audit.h"
>  #include "avc_ss.h"
>  
> +#ifdef CONFIG_SECURITY_RTIC
> +struct selinux_state selinux_state __rticdata;
> +#else
>  struct selinux_state selinux_state;
> +#endif

so you could then drop the if-def here.


Happy to see this resolved when building with LLD+LTO, which has been a
problem in the past.

Disabling selinux is a common attack vector on Android devices, so happy
to see some effort towards mitigation.  You might want to communicate
the feature more to existing OEMs that are using your chipsets that
support this feature.


[PATCH v2 2/3] hv_netvsc: Wait for completion on request SWITCH_DATA_PATH

2021-01-08 Thread Long Li
From: Long Li 

The completion indicates if NVSP_MSG4_TYPE_SWITCH_DATA_PATH has been
processed by the VSP. The traffic is steered to VF or synthetic after we
receive this completion.

Signed-off-by: Long Li 
Reported-by: kernel test robot 
---
Change from v1:
Fixed warnings from kernel test robot.

 drivers/net/hyperv/netvsc.c | 37 ++---
 drivers/net/hyperv/netvsc_drv.c |  1 -
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 2350342b961f..3a3db2f0134d 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -37,6 +37,10 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
struct nvsp_message *init_pkt = _dev->channel_init_pkt;
 
+   /* Block sending traffic to VF if it's about to be gone */
+   if (!vf)
+   net_device_ctx->data_path_is_vf = vf;
+
memset(init_pkt, 0, sizeof(struct nvsp_message));
init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
if (vf)
@@ -50,8 +54,11 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 
vmbus_sendpacket(dev->channel, init_pkt,
   sizeof(struct nvsp_message),
-  VMBUS_RQST_ID_NO_RESPONSE,
-  VM_PKT_DATA_INBAND, 0);
+  (unsigned long)init_pkt,
+  VM_PKT_DATA_INBAND,
+  VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+   wait_for_completion(_dev->channel_init_wait);
+   net_device_ctx->data_path_is_vf = vf;
 }
 
 /* Worker to setup sub channels on initial setup
@@ -754,8 +761,31 @@ static void netvsc_send_completion(struct net_device *ndev,
   const struct vmpacket_descriptor *desc,
   int budget)
 {
-   const struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
+   const struct nvsp_message *nvsp_packet;
u32 msglen = hv_pkt_datalen(desc);
+   struct nvsp_message *pkt_rqst;
+   u64 cmd_rqst;
+
+   /* First check if this is a VMBUS completion without data payload */
+   if (!msglen) {
+   cmd_rqst = vmbus_request_addr(_channel->requestor,
+ (u64)desc->trans_id);
+   if (cmd_rqst == VMBUS_RQST_ERROR) {
+   netdev_err(ndev, "Invalid transaction id\n");
+   return;
+   }
+
+   pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst;
+   switch (pkt_rqst->hdr.msg_type) {
+   case NVSP_MSG4_TYPE_SWITCH_DATA_PATH:
+   complete(_device->channel_init_wait);
+   break;
+
+   default:
+   netdev_err(ndev, "Unexpected VMBUS completion!!\n");
+   }
+   return;
+   }
 
/* Ensure packet is big enough to read header fields */
if (msglen < sizeof(struct nvsp_message_header)) {
@@ -763,6 +793,7 @@ static void netvsc_send_completion(struct net_device *ndev,
return;
}
 
+   nvsp_packet = hv_pkt_data(desc);
switch (nvsp_packet->hdr.msg_type) {
case NVSP_MSG_TYPE_INIT_COMPLETE:
if (msglen < sizeof(struct nvsp_message_header) +
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 5dd4f37afa3d..64ae5f4e974e 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2400,7 +2400,6 @@ static int netvsc_vf_changed(struct net_device *vf_netdev)
 
if (net_device_ctx->data_path_is_vf == vf_is_up)
return NOTIFY_OK;
-   net_device_ctx->data_path_is_vf = vf_is_up;
 
netvsc_switch_datapath(ndev, vf_is_up);
netdev_info(ndev, "Data path switched %s VF: %s\n",
-- 
2.27.0



[PATCH v2 3/3] hv_netvsc: Process NETDEV_GOING_DOWN on VF hot remove

2021-01-08 Thread Long Li
From: Long Li 

On VF hot remove, NETDEV_GOING_DOWN is sent to notify the VF is about to
go down. At this time, the VF is still sending/receiving traffic and we
request the VSP to switch datapath.

On completion, the datapath is switched to synthetic and we can proceed
with VF hot remove.

Signed-off-by: Long Li 
Reviewed-by: Haiyang Zhang 
---
 drivers/net/hyperv/netvsc_drv.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 64ae5f4e974e..75b4d6703cf1 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2382,12 +2382,15 @@ static int netvsc_register_vf(struct net_device 
*vf_netdev)
  * During hibernation, if a VF NIC driver (e.g. mlx5) preserves the network
  * interface, there is only the CHANGE event and no UP or DOWN event.
  */
-static int netvsc_vf_changed(struct net_device *vf_netdev)
+static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event)
 {
struct net_device_context *net_device_ctx;
struct netvsc_device *netvsc_dev;
struct net_device *ndev;
-   bool vf_is_up = netif_running(vf_netdev);
+   bool vf_is_up = false;
+
+   if (event != NETDEV_GOING_DOWN)
+   vf_is_up = netif_running(vf_netdev);
 
ndev = get_netvsc_byref(vf_netdev);
if (!ndev)
@@ -2716,7 +2719,8 @@ static int netvsc_netdev_event(struct notifier_block 
*this,
case NETDEV_UP:
case NETDEV_DOWN:
case NETDEV_CHANGE:
-   return netvsc_vf_changed(event_dev);
+   case NETDEV_GOING_DOWN:
+   return netvsc_vf_changed(event_dev, event);
default:
return NOTIFY_DONE;
}
-- 
2.27.0



[PATCH v2 1/3] hv_netvsc: Check VF datapath when sending traffic to VF

2021-01-08 Thread Long Li
From: Long Li 

The driver needs to check if the datapath has been switched to VF before
sending traffic to VF.

Signed-off-by: Long Li 
Reviewed-by: Haiyang Zhang 
---
 drivers/net/hyperv/netvsc_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index f32f28311d57..5dd4f37afa3d 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -539,7 +539,8 @@ static int netvsc_xmit(struct sk_buff *skb, struct 
net_device *net, bool xdp_tx)
 */
vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev);
if (vf_netdev && netif_running(vf_netdev) &&
-   netif_carrier_ok(vf_netdev) && !netpoll_tx_running(net))
+   netif_carrier_ok(vf_netdev) && !netpoll_tx_running(net) &&
+   net_device_ctx->data_path_is_vf)
return netvsc_vf_xmit(net, vf_netdev, skb);
 
/* We will atmost need two pages to describe the rndis
-- 
2.27.0



[PATCH v2 0/3] hv_netvsc: Prevent packet loss during VF add/remove

2021-01-08 Thread Long Li
From: Long Li 

This patch set fixes issues with packet loss on VF add/remove.

Long Li (3):
  hv_netvsc: Check VF datapath when sending traffic to VF
  hv_netvsc: Wait for completion on request SWITCH_DATA_PATH
  hv_netvsc: Process NETDEV_GOING_DOWN on VF hot remove

 drivers/net/hyperv/netvsc.c | 37 ++---
 drivers/net/hyperv/netvsc_drv.c | 14 -
 2 files changed, 43 insertions(+), 8 deletions(-)

-- 
2.27.0



[PATCH 12/13] KVM: SVM: Remove an unnecessary prototype declaration of sev_flush_asids()

2021-01-08 Thread Sean Christopherson
Remove the forward declaration of sev_flush_asids(), which is only a few
lines above the function itself.

No functional change intended.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/svm/sev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 1b9174a49b65..b4a9c12cf8ce 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -36,7 +36,6 @@ static bool sev_es_enabled = 
IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
 module_param_named(sev_es, sev_es_enabled, bool, 0444);
 
 static u8 sev_enc_bit;
-static int sev_flush_asids(void);
 static DECLARE_RWSEM(sev_deactivate_lock);
 static DEFINE_MUTEX(sev_bitmap_lock);
 unsigned int max_sev_asid;
-- 
2.30.0.284.gd98b1dd5eaa7-goog



[PATCH 11/13] KVM: SVM: Drop redundant svm_sev_enabled() helper

2021-01-08 Thread Sean Christopherson
Replace calls to svm_sev_enabled() with direct checks on sev_enabled, or
in the case of svm_mem_enc_op, simply drop the call to svm_sev_enabled().
This effectively replaces checks against a valid max_sev_asid with checks
against sev_enabled.  sev_enabled is forced off by sev_hardware_setup()
if max_sev_asid is invalid, all call sites are guaranteed to run after
sev_hardware_setup(), and all of the checks care about SEV being fully
enabled (as opposed to intentionally handling the scenario where
max_sev_asid is valid but SEV enabling fails due to OOM).

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/svm/sev.c | 6 +++---
 arch/x86/kvm/svm/svm.h | 5 -
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 8c34c467a09d..1b9174a49b65 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1052,7 +1052,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
struct kvm_sev_cmd sev_cmd;
int r;
 
-   if (!svm_sev_enabled() || !sev_enabled)
+   if (!sev_enabled)
return -ENOTTY;
 
if (!argp)
@@ -1314,7 +1314,7 @@ void __init sev_hardware_setup(void)
 
 void sev_hardware_teardown(void)
 {
-   if (!svm_sev_enabled())
+   if (!sev_enabled)
return;
 
bitmap_free(sev_asid_bitmap);
@@ -1325,7 +1325,7 @@ void sev_hardware_teardown(void)
 
 int sev_cpu_init(struct svm_cpu_data *sd)
 {
-   if (!svm_sev_enabled())
+   if (!sev_enabled)
return 0;
 
sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1, sizeof(void *),
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 4eb4bab0ca3e..8cb4395b58a0 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -569,11 +569,6 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
 
 extern unsigned int max_sev_asid;
 
-static inline bool svm_sev_enabled(void)
-{
-   return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0;
-}
-
 void sev_vm_destroy(struct kvm *kvm);
 int svm_mem_enc_op(struct kvm *kvm, void __user *argp);
 int svm_register_enc_region(struct kvm *kvm,
-- 
2.30.0.284.gd98b1dd5eaa7-goog



[PATCH 10/13] KVM: SVM: Move SEV VMCB tracking allocation to sev.c

2021-01-08 Thread Sean Christopherson
Move the allocation of the SEV VMCB array to sev.c to help pave the way
toward encapsulating SEV enabling wholly within sev.c.

No functional change intended.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/svm/sev.c | 13 +
 arch/x86/kvm/svm/svm.c | 17 -
 arch/x86/kvm/svm/svm.h |  1 +
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 3d25d24bcb48..8c34c467a09d 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1323,6 +1323,19 @@ void sev_hardware_teardown(void)
sev_flush_asids();
 }
 
+int sev_cpu_init(struct svm_cpu_data *sd)
+{
+   if (!svm_sev_enabled())
+   return 0;
+
+   sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1, sizeof(void *),
+ GFP_KERNEL | __GFP_ZERO);
+   if (!sd->sev_vmcbs)
+   return -ENOMEM;
+
+   return 0;
+}
+
 /*
  * Pages used by hardware to hold guest encrypted state must be flushed before
  * returning them to the system.
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index bb7b99743bea..89b95fb87a0c 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -552,23 +552,22 @@ static void svm_cpu_uninit(int cpu)
 static int svm_cpu_init(int cpu)
 {
struct svm_cpu_data *sd;
+   int ret;
 
sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
if (!sd)
return -ENOMEM;
sd->cpu = cpu;
sd->save_area = alloc_page(GFP_KERNEL);
-   if (!sd->save_area)
+   if (!sd->save_area) {
+   ret = -ENOMEM;
goto free_cpu_data;
+   }
clear_page(page_address(sd->save_area));
 
-   if (svm_sev_enabled()) {
-   sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
- sizeof(void *),
- GFP_KERNEL | __GFP_ZERO);
-   if (!sd->sev_vmcbs)
-   goto free_save_area;
-   }
+   ret = sev_cpu_init(sd);
+   if (ret)
+   goto free_save_area;
 
per_cpu(svm_data, cpu) = sd;
 
@@ -578,7 +577,7 @@ static int svm_cpu_init(int cpu)
__free_page(sd->save_area);
 free_cpu_data:
kfree(sd);
-   return -ENOMEM;
+   return ret;
 
 }
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 8e169835f52a..4eb4bab0ca3e 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -583,6 +583,7 @@ int svm_unregister_enc_region(struct kvm *kvm,
 void pre_sev_run(struct vcpu_svm *svm, int cpu);
 void __init sev_hardware_setup(void);
 void sev_hardware_teardown(void);
+int sev_cpu_init(struct svm_cpu_data *sd);
 void sev_free_vcpu(struct kvm_vcpu *vcpu);
 int sev_handle_vmgexit(struct vcpu_svm *svm);
 int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int 
in);
-- 
2.30.0.284.gd98b1dd5eaa7-goog



  1   2   3   4   5   6   7   8   9   10   >