date:20200715

Re: [PATCH v3 04/12] ppc64/kexec_file: avoid stomping memory used by special regions

2020-07-15 Thread Thiago Jung Bauermann



Thiago Jung Bauermann  writes:

> Hari Bathini  writes:
>
>> diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h 
>> b/arch/powerpc/include/asm/crashdump-ppc64.h
>> new file mode 100644
>> index 000..90deb46
>> --- /dev/null
>> +++ b/arch/powerpc/include/asm/crashdump-ppc64.h
>> @@ -0,0 +1,10 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H
>> +#define _ASM_POWERPC_CRASHDUMP_PPC64_H
>> +
>> +/* min & max addresses for kdump load segments */
>> +#define KDUMP_BUF_MIN   (crashk_res.start)
>> +#define KDUMP_BUF_MAX   ((crashk_res.end < ppc64_rma_size) ? \
>> + crashk_res.end : (ppc64_rma_size - 1))
>> +
>> +#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */
>> diff --git a/arch/powerpc/include/asm/kexec.h 
>> b/arch/powerpc/include/asm/kexec.h
>> index 7008ea1..bf47a01 100644
>> --- a/arch/powerpc/include/asm/kexec.h
>> +++ b/arch/powerpc/include/asm/kexec.h
>> @@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long 
>> indirection_page, unsigned long reboot_co
>>  #ifdef CONFIG_KEXEC_FILE
>>  extern const struct kexec_file_ops kexec_elf64_ops;
>>
>> -#ifdef CONFIG_IMA_KEXEC
>>  #define ARCH_HAS_KIMAGE_ARCH
>>
>>  struct kimage_arch {
>> +struct crash_mem *exclude_ranges;
>> +
>> +#ifdef CONFIG_IMA_KEXEC
>>  phys_addr_t ima_buffer_addr;
>>  size_t ima_buffer_size;
>> -};
>>  #endif
>> +};
>>
>>  int setup_purgatory(struct kimage *image, const void *slave_code,
>>  const void *fdt, unsigned long kernel_load_addr,
>> @@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void 
>> *fdt,
>>  unsigned long initrd_load_addr,
>>  unsigned long initrd_len, const char *cmdline);
>>  #endif /* CONFIG_PPC64 */
>> +
>>  #endif /* CONFIG_KEXEC_FILE */
>>
>>  #else /* !CONFIG_KEXEC_CORE */
>> diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
>> index 23ad04c..c695f94 100644
>> --- a/arch/powerpc/kexec/elf_64.c
>> +++ b/arch/powerpc/kexec/elf_64.c
>> @@ -22,6 +22,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>
>>  static void *elf64_load(struct kimage *image, char *kernel_buf,
>>  unsigned long kernel_len, char *initrd,
>> @@ -46,6 +47,12 @@ static void *elf64_load(struct kimage *image, char 
>> *kernel_buf,
>>  if (ret)
>>  goto out;
>>
>> +if (image->type == KEXEC_TYPE_CRASH) {
>> +/* min & max buffer values for kdump case */
>> +kbuf.buf_min = pbuf.buf_min = KDUMP_BUF_MIN;
>> +kbuf.buf_max = pbuf.buf_max = KDUMP_BUF_MAX;
>
> This is only my personal opinion and an actual maintainer may disagree,
> but just looking at the lines above, I would assume that KDUMP_BUF_MIN
> and KDUMP_BUF_MAX were constants, when in fact they aren't.
>
> I suggest using static inline macros in , for
> example:
>
> static inline resource_size_t get_kdump_buf_min(void)
> {
>   return crashk_res.start;
> }
>
> static inline resource_size_t get_kdump_buf_max(void)
> {
>   return (crashk_res.end < ppc64_rma_size) ? \
>crashk_res.end : (ppc64_rma_size - 1)
> }

I later noticed that KDUMP_BUF_MIN and KDUMP_BUF_MAX are only used here.
In this case, I think the best option is to avoid the macros and inline
functions and just use the actual expressions in the code.

-- 
Thiago Jung Bauermann
IBM Linux Technology Center

Re: [PATCH V3 3/3] platform/x86: Intel PMT Telemetry capability driver

2020-07-15 Thread Alexey Budankov



On 16.07.2020 2:59, David E. Box wrote:
> On Wed, 2020-07-15 at 10:39 +0300, Alexey Budankov wrote:
>> Hi David,
>>
>> On 14.07.2020 9:23, David E. Box wrote:
> 
> ...
> 
>>>
>>> +static int pmt_telem_open(struct inode *inode, struct file *filp)
>>> +{
>>> +   struct pmt_telem_priv *priv;
>>> +   struct pmt_telem_entry *entry;
>>> +   struct pci_driver *pci_drv;
>>> +   struct pci_dev *pci_dev;
>>> +
>>> +   if (!capable(CAP_SYS_ADMIN))
>>
>> Thanks for supplying these patches.
>> Are there any reasons not to expose this feature to CAP_PERFMON
>> privileged
>> processes too that currently have access to performance monitoring
>> features
>> of the kernel without root/CAP_SYS_ADMIN credentials? This could be
>> done by
>> pefmon_capable() function call starting from v5.8+.
> 
> The new capability is well suited for this feature. I'll make the
> change. Thanks.

I appreciate your cooperation. Thanks!

Alexei

Re: [PATCH v4] phy: samsung: Use readl_poll_timeout function

2020-07-15 Thread Anand Moon

Hi Vinod,

On Thu, 16 Jul 2020 at 11:20, Vinod Koul  wrote:
>
> On 13-07-20, 07:42, Anand Moon wrote:
> > Instead of a busy waiting while loop using udelay
> > use readl_poll_timeout function to check the condition
> > is met or timeout occurs in crport_handshake function.
> > readl_poll_timeout is called in non atomic context so
> > it safe to sleep until the condition is met.
> >
> > Fixes: d8c80bb3b55b ("phy: exynos5-usbdrd: Calibrate LOS levels for 
> > exynos5420/5800")
> > Signed-off-by: Anand Moon 
> > ---
> > Changes v4:
> > Rebased on to of patch [0] https://patchwork.kernel.org/patch/11651673/
> > --Fix the commit message.
> > --Fix the error timeout condition for -ETIMEDOUT
> > ---
> > Changes v3:
> > --Fix the commit message.
> > --Drop the variable, used the value directly.
> > Changes v2:
> > --used the default timeout values.
> > --Added missing Fixed tags.
> > ---
> >  drivers/phy/samsung/phy-exynos5-usbdrd.c | 39 
> >  1 file changed, 12 insertions(+), 27 deletions(-)
> >
> > diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c 
> > b/drivers/phy/samsung/phy-exynos5-usbdrd.c
> > index 7f6279fb4f8f..ad81aa65cdff 100644
> > --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c
> > +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c
> > @@ -16,6 +16,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -556,41 +557,25 @@ static int exynos5_usbdrd_phy_power_off(struct phy 
> > *phy)
> >  static int crport_handshake(struct exynos5_usbdrd_phy *phy_drd,
> >   u32 val, u32 cmd)
> >  {
> > - u32 usec = 100;
> >   unsigned int result;
> > + int err;
> >
> >   writel(val | cmd, phy_drd->reg_phy + EXYNOS5_DRD_PHYREG0);
> >
> > - do {
> > - result = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYREG1);
> > - if (result & PHYREG1_CR_ACK)
> > - break;
> > -
> > - udelay(1);
> > - } while (usec-- > 0);
> > -
> > - if (!usec) {
> > - dev_err(phy_drd->dev,
> > - "CRPORT handshake timeout1 (0x%08x)\n", val);
> > - return -ETIME;
> > + err = readl_poll_timeout(phy_drd->reg_phy + EXYNOS5_DRD_PHYREG1,
> > + result, (result & PHYREG1_CR_ACK), 1, 100);
>
> pls align this line to opening brace of preceding line:
>
> err = readl_poll_timeout(phy_drd->reg_phy + EXYNOS5_DRD_PHYREG1,
>  result, (result & PHYREG1_CR_ACK), 1, 100);
>
> This is recommended way of splitting lines, see
> Documentation/process/coding-style.rst and run checkpatch.pl with
> --strict option

Ok, I will do this, just waiting for some more feedback on these changes.
>
> thanks
> --
> ~Vinod

-Anand

Re: [PATCH v2 bpf-next 1/2] bpf: separate bpf_get_[stack|stackid] for perf events BPF

2020-07-15 Thread Andrii Nakryiko

On Tue, Jul 14, 2020 at 11:08 PM Song Liu  wrote:
>
> Calling get_perf_callchain() on perf_events from PEBS entries may cause
> unwinder errors. To fix this issue, the callchain is fetched early. Such
> perf_events are marked with __PERF_SAMPLE_CALLCHAIN_EARLY.
>
> Similarly, calling bpf_get_[stack|stackid] on perf_events from PEBS may
> also cause unwinder errors. To fix this, add separate version of these
> two helpers, bpf_get_[stack|stackid]_pe. These two hepers use callchain in
> bpf_perf_event_data_kern->data->callchain.
>
> Signed-off-by: Song Liu 
> ---
>  include/linux/bpf.h  |   2 +
>  kernel/bpf/stackmap.c| 204 +++
>  kernel/trace/bpf_trace.c |   4 +-
>  3 files changed, 190 insertions(+), 20 deletions(-)
>

Glad this approach worked out! Few minor bugs below, though.

[...]

> +   if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
> +  BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
> +   return -EINVAL;
> +
> +   user = flags & BPF_F_USER_STACK;
> +   kernel = !user;
> +
> +   has_kernel = !event->attr.exclude_callchain_kernel;
> +   has_user = !event->attr.exclude_callchain_user;
> +
> +   if ((kernel && !has_kernel) || (user && !has_user))
> +   return -EINVAL;
> +
> +   trace = ctx->data->callchain;
> +   if (!trace || (!has_kernel && !has_user))

(!has_kernel && !has_user) can never happen, it's checked by if above
(one of kernel or user is always true => one of has_user or has_kernel
is always true).

> +   return -EFAULT;
> +
> +   if (has_kernel && has_user) {
> +   __u64 nr_kernel = count_kernel_ip(trace);
> +   int ret;
> +
> +   if (kernel) {
> +   __u64 nr = trace->nr;
> +
> +   trace->nr = nr_kernel;
> +   ret = __bpf_get_stackid(map, trace, flags);
> +
> +   /* restore nr */
> +   trace->nr = nr;
> +   } else { /* user */
> +   u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
> +
> +   skip += nr_kernel;
> +   if (skip > ~BPF_F_SKIP_FIELD_MASK)

something fishy here: ~BPF_F_SKIP_FIELD_MASK is a really big number,
were you going to check that skip is not bigger than 255 (i.e., fits
within BPF_F_SKIP_FIELD_MASK)?

> +   return -EFAULT;
> +
> +   flags = (flags & ~BPF_F_SKIP_FIELD_MASK) |
> +   (skip  & BPF_F_SKIP_FIELD_MASK);
> +   ret = __bpf_get_stackid(map, trace, flags);
> +   }
> +   return ret;
> +   }
> +   return __bpf_get_stackid(map, trace, flags);
> +}
> +

[...]

> +
> +   has_kernel = !event->attr.exclude_callchain_kernel;
> +   has_user = !event->attr.exclude_callchain_user;
> +
> +   if ((kernel && !has_kernel) || (user && !has_user))
> +   goto clear;
> +
> +   err = -EFAULT;
> +   trace = ctx->data->callchain;
> +   if (!trace || (!has_kernel && !has_user))
> +   goto clear;

same as above for bpf_get_stackid, probably can be simplified

> +
> +   if (has_kernel && has_user) {
> +   __u64 nr_kernel = count_kernel_ip(trace);
> +   int ret;
> +
> +   if (kernel) {
> +   __u64 nr = trace->nr;
> +
> +   trace->nr = nr_kernel;
> +   ret = __bpf_get_stack(ctx->regs, NULL, trace, buf,
> + size, flags);
> +
> +   /* restore nr */
> +   trace->nr = nr;
> +   } else { /* user */
> +   u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
> +
> +   skip += nr_kernel;
> +   if (skip > ~BPF_F_SKIP_FIELD_MASK)
> +   goto clear;
> +

and here

> +   flags = (flags & ~BPF_F_SKIP_FIELD_MASK) |
> +   (skip  & BPF_F_SKIP_FIELD_MASK);

actually if you check that skip <= BPF_F_SKIP_FIELD_MASK, you don't
need to mask it here, just `| skip`

> +   ret = __bpf_get_stack(ctx->regs, NULL, trace, buf,
> + size, flags);
> +   }
> +   return ret;
> +   }
> +   return __bpf_get_stack(ctx->regs, NULL, trace, buf, size, flags);
> +clear:
> +   memset(buf, 0, size);
> +   return err;
> +
> +}
> +

[...]

Re: [PATCH v4] phy: samsung: Use readl_poll_timeout function

2020-07-15 Thread Vinod Koul

On 13-07-20, 07:42, Anand Moon wrote:
> Instead of a busy waiting while loop using udelay
> use readl_poll_timeout function to check the condition
> is met or timeout occurs in crport_handshake function.
> readl_poll_timeout is called in non atomic context so
> it safe to sleep until the condition is met.
> 
> Fixes: d8c80bb3b55b ("phy: exynos5-usbdrd: Calibrate LOS levels for 
> exynos5420/5800")
> Signed-off-by: Anand Moon 
> ---
> Changes v4:
> Rebased on to of patch [0] https://patchwork.kernel.org/patch/11651673/
> --Fix the commit message.
> --Fix the error timeout condition for -ETIMEDOUT
> ---
> Changes v3:
> --Fix the commit message.
> --Drop the variable, used the value directly.
> Changes v2:
> --used the default timeout values.
> --Added missing Fixed tags.
> ---
>  drivers/phy/samsung/phy-exynos5-usbdrd.c | 39 
>  1 file changed, 12 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c 
> b/drivers/phy/samsung/phy-exynos5-usbdrd.c
> index 7f6279fb4f8f..ad81aa65cdff 100644
> --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c
> +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c
> @@ -16,6 +16,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -556,41 +557,25 @@ static int exynos5_usbdrd_phy_power_off(struct phy *phy)
>  static int crport_handshake(struct exynos5_usbdrd_phy *phy_drd,
>   u32 val, u32 cmd)
>  {
> - u32 usec = 100;
>   unsigned int result;
> + int err;
>  
>   writel(val | cmd, phy_drd->reg_phy + EXYNOS5_DRD_PHYREG0);
>  
> - do {
> - result = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYREG1);
> - if (result & PHYREG1_CR_ACK)
> - break;
> -
> - udelay(1);
> - } while (usec-- > 0);
> -
> - if (!usec) {
> - dev_err(phy_drd->dev,
> - "CRPORT handshake timeout1 (0x%08x)\n", val);
> - return -ETIME;
> + err = readl_poll_timeout(phy_drd->reg_phy + EXYNOS5_DRD_PHYREG1,
> + result, (result & PHYREG1_CR_ACK), 1, 100);

pls align this line to opening brace of preceding line:

err = readl_poll_timeout(phy_drd->reg_phy + EXYNOS5_DRD_PHYREG1,
 result, (result & PHYREG1_CR_ACK), 1, 100);

This is recommended way of splitting lines, see
Documentation/process/coding-style.rst and run checkpatch.pl with
--strict option

thanks
-- 
~Vinod

Re: [PATCH v1] driver core: Fix scheduling while atomic warnings during device link deletion

2020-07-15 Thread Marek Szyprowski

Hi

On 16.07.2020 07:30, Guenter Roeck wrote:
> On 7/15/20 10:08 PM, Saravana Kannan wrote:
>> Marek and Guenter reported that commit 287905e68dd2 ("driver core:
>> Expose device link details in sysfs") caused sleeping/scheduling while
>> atomic warnings.
>>
>> BUG: sleeping function called from invalid context at 
>> kernel/locking/mutex.c:935
>> in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 12, name: kworker/0:1
>> 2 locks held by kworker/0:1/12:
>>#0: ee8074a8 ((wq_completion)rcu_gp){+.+.}-{0:0}, at: 
>> process_one_work+0x174/0x7dc
>>#1: ee921f20 ((work_completion)(>work)){+.+.}-{0:0}, at: 
>> process_one_work+0x174/0x7dc
>> Preemption disabled at:
>> [] srcu_invoke_callbacks+0xc0/0x154
>> - 8< - SNIP
>> [] (device_del) from [] (device_unregister+0x24/0x64)
>> [] (device_unregister) from [] 
>> (srcu_invoke_callbacks+0xcc/0x154)
>> [] (srcu_invoke_callbacks) from [] 
>> (process_one_work+0x234/0x7dc)
>> [] (process_one_work) from [] (worker_thread+0x44/0x51c)
>> [] (worker_thread) from [] (kthread+0x158/0x1a0)
>> [] (kthread) from [] (ret_from_fork+0x14/0x20)
>> Exception stack(0xee921fb0 to 0xee921ff8)
>>
>> This was caused by the device link device being released in the context
>> of srcu_invoke_callbacks().  There is no need to wait till the RCU
>> callback to release the device link device.  So release the device
>> earlier and revert the RCU callback code to what it was before
>> commit 287905e68dd2 ("driver core: Expose device link details in sysfs")
>>
>> Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs")
>> Reported-by: Marek Szyprowski 
>> Reported-by: Guenter Roeck 
>> Signed-off-by: Saravana Kannan 
>> ---
>> Marek and Guenter,
>>
>> It haven't had a chance to test this yet. Can one of you please test it
>> and confirm it fixes the issue?
>>
> With this patch applied, the original warning is gone, but I get lots
> of other warnings.
>
> WARNING: CPU: 0 PID: 1 at drivers/base/core.c:1790 device_release+0x94/0xa4^M
> Device 'regulators:regulator@0:50038000.ethernet' does not have a release() 
> function, it is broken and must be fixed.
>
> WARNING: CPU: 0 PID: 1 at drivers/base/core.c:1790 device_release+0x94/0xa4
> Device '53f9c000.gpio:50038000.ethernet' does not have a release() function, 
> it is broken and must be fixed.
>
> WARNING: CPU: 0 PID: 1 at drivers/base/core.c:1790 device_release+0x94/0xa4^M
> Device '5003.tscadc:50030400.tcq' does not have a release() function, it 
> is broken and must be fixed.

I confirm that I also get such warnings for every platform device in the 
system with this patch applied to linux next-20200715:

[ cut here ]
WARNING: CPU: 0 PID: 1 at drivers/base/core.c:1790 device_release+0x94/0x98
Device '10023c40.power-domain:1362.sysmmu' does not have a release() 
function, it is broken and must be fixed. See 
Documentation/core-api/kobject.rst.
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 
5.8.0-rc5-next-20200715-2-g0f637964c4b0 #1270
Hardware name: Samsung Exynos (Flattened Device Tree)
[] (unwind_backtrace) from [] (show_stack+0x10/0x14)
[] (show_stack) from [] (dump_stack+0xbc/0xe8)
[] (dump_stack) from [] (__warn+0xf0/0x108)
[] (__warn) from [] (warn_slowpath_fmt+0x74/0xb8)
[] (warn_slowpath_fmt) from [] 
(device_release+0x94/0x98)
[] (device_release) from [] (kobject_put+0x104/0x288)
[] (kobject_put) from [] (__device_link_del+0x38/0xac)
[] (__device_link_del) from [] 
(device_links_driver_bound+0x260/0x26c)
[] (device_links_driver_bound) from [] 
(driver_bound+0x5c/0x110)
[] (driver_bound) from [] (really_probe+0x2d4/0x4fc)
[] (really_probe) from [] 
(driver_probe_device+0x78/0x1fc)
[] (driver_probe_device) from [] 
(bus_for_each_drv+0x74/0xb8)
[] (bus_for_each_drv) from [] 
(__device_attach+0xd4/0x16c)
[] (__device_attach) from [] 
(bus_probe_device+0x88/0x90)
[] (bus_probe_device) from [] 
(fw_devlink_resume+0xa0/0x134)
[] (fw_devlink_resume) from [] 
(of_platform_default_populate_init+0xa8/0xc0)
[] (of_platform_default_populate_init) from [] 
(do_one_initcall+0x8c/0x424)
[] (do_one_initcall) from [] 
(kernel_init_freeable+0x190/0x204)
[] (kernel_init_freeable) from [] 
(kernel_init+0x8/0x118)
[] (kernel_init) from [] (ret_from_fork+0x14/0x20)
Exception stack(0xef0dffb0 to 0xef0dfff8)
ffa0:    

ffc0:        

ffe0:     0013 
irq event stamp: 40543
hardirqs last  enabled at (40551): [] console_unlock+0x430/0x6cc
hardirqs last disabled at (40568): [] console_unlo

[PATCH] remoteproc: qcom: pil-info: Fix shift overflow

2020-07-15 Thread Bjorn Andersson

On platforms with 32-bit phys_addr_t the shift to get the upper word of
the base address of the memory region is invalid. Cast the base to 64
bit to resolv this.

Fixes: 549b67da660d ("remoteproc: qcom: Introduce helper to store pil info in 
IMEM")
Reported-by: Lee Jones 
Reported-by: Nathan Chancellor 
Signed-off-by: Bjorn Andersson 
---
 drivers/remoteproc/qcom_pil_info.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/remoteproc/qcom_pil_info.c 
b/drivers/remoteproc/qcom_pil_info.c
index 0536e3904669..5521c4437ffa 100644
--- a/drivers/remoteproc/qcom_pil_info.c
+++ b/drivers/remoteproc/qcom_pil_info.c
@@ -108,7 +108,7 @@ int qcom_pil_info_store(const char *image, phys_addr_t 
base, size_t size)
 found_existing:
/* Use two writel() as base is only aligned to 4 bytes on odd entries */
writel(base, entry + PIL_RELOC_NAME_LEN);
-   writel(base >> 32, entry + PIL_RELOC_NAME_LEN + 4);
+   writel((u64)base >> 32, entry + PIL_RELOC_NAME_LEN + 4);
writel(size, entry + PIL_RELOC_NAME_LEN + sizeof(__le64));
mutex_unlock(_reloc_lock);
 
-- 
2.26.2

[PATCH v2 1/4] dt-bindings: media: venus: Add an optional power domain for perf voting

2020-07-15 Thread Rajendra Nayak

Add an optional power domain which when specified can be used for
setting the performance state of Venus.

Signed-off-by: Rajendra Nayak 
---
This is a resend of https://lore.kernel.org/patchwork/patch/1241077/

 Documentation/devicetree/bindings/media/qcom,sc7180-venus.yaml| 6 +-
 Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml | 6 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/media/qcom,sc7180-venus.yaml 
b/Documentation/devicetree/bindings/media/qcom,sc7180-venus.yaml
index 55f2d67..1e8675b 100644
--- a/Documentation/devicetree/bindings/media/qcom,sc7180-venus.yaml
+++ b/Documentation/devicetree/bindings/media/qcom,sc7180-venus.yaml
@@ -25,12 +25,16 @@ properties:
 maxItems: 1
 
   power-domains:
-maxItems: 2
+minItems: 2
+maxItems: 3
 
   power-domain-names:
+minItems: 2
+maxItems: 3
 items:
   - const: venus
   - const: vcodec0
+  - const: opp-pd
 
   clocks:
 maxItems: 5
diff --git a/Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml 
b/Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml
index 157dff8..437286d 100644
--- a/Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml
+++ b/Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml
@@ -25,13 +25,17 @@ properties:
 maxItems: 1
 
   power-domains:
-maxItems: 3
+minItems: 3
+maxItems: 4
 
   power-domain-names:
+minItems: 3
+maxItems: 4
 items:
   - const: venus
   - const: vcodec0
   - const: vcodec1
+  - const: opp-pd
 
   clocks:
 maxItems: 7
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

[PATCH v2 4/4] arm64: dts: sc7180: Add OPP tables and power-domains for venus

2020-07-15 Thread Rajendra Nayak

Add the OPP tables in order to be able to vote on the performance state
of a power-domain

Signed-off-by: Rajendra Nayak 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 35 +--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index 126e2fc..c560ad2 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -2664,8 +2664,10 @@
reg = <0 0x0aa0 0 0xff000>;
interrupts = ;
power-domains = < VENUS_GDSC>,
-   < VCODEC0_GDSC>;
-   power-domain-names = "venus", "vcodec0";
+   < VCODEC0_GDSC>,
+   < SC7180_CX>;
+   power-domain-names = "venus", "vcodec0", "opp-pd";
+   operating-points-v2 = <_opp_table>;
clocks = < VIDEO_CC_VENUS_CTL_CORE_CLK>,
 < VIDEO_CC_VENUS_AHB_CLK>,
 < VIDEO_CC_VENUS_CTL_AXI_CLK>,
@@ -2686,6 +2688,35 @@
video-encoder {
compatible = "venus-encoder";
};
+
+   venus_opp_table: venus-opp-table {
+   compatible = "operating-points-v2";
+
+   opp-15000 {
+   opp-hz = /bits/ 64 <15000>;
+   required-opps = <_opp_low_svs>;
+   };
+
+   opp-27000 {
+   opp-hz = /bits/ 64 <27000>;
+   required-opps = <_opp_svs>;
+   };
+
+   opp-34000 {
+   opp-hz = /bits/ 64 <34000>;
+   required-opps = <_opp_svs_l1>;
+   };
+
+   opp-43400 {
+   opp-hz = /bits/ 64 <43400>;
+   required-opps = <_opp_nom>;
+   };
+
+   opp-5 {
+   opp-hz = /bits/ 64 <5>;
+   required-opps = <_opp_turbo>;
+   };
+   };
};
 
videocc: clock-controller@ab0 {
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

[PATCH v2 2/4] media: venus: core: Add support for opp tables/perf voting

2020-07-15 Thread Rajendra Nayak

Add support to add OPP tables and perf voting on the OPP powerdomain.
This is needed so venus votes on the corresponding performance state
for the OPP powerdomain along with setting the core clock rate.

Signed-off-by: Rajendra Nayak 
Reviewed-by: Matthias Kaehlcke 
---
 drivers/media/platform/qcom/venus/core.c   | 43 +---
 drivers/media/platform/qcom/venus/core.h   |  5 +++
 drivers/media/platform/qcom/venus/pm_helpers.c | 54 --
 3 files changed, 92 insertions(+), 10 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/core.c 
b/drivers/media/platform/qcom/venus/core.c
index 203c653..630f61b 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -216,21 +217,37 @@ static int venus_probe(struct platform_device *pdev)
if (!core->pm_ops)
return -ENODEV;
 
+   core->opp_table = dev_pm_opp_set_clkname(dev, "core");
+   if (IS_ERR(core->opp_table))
+   return PTR_ERR(core->opp_table);
+
+   if (core->res->opp_pmdomain) {
+   ret = dev_pm_opp_of_add_table(dev);
+   if (!ret) {
+   core->has_opp_table = true;
+   } else if (ret != -ENODEV) {
+   dev_err(dev, "invalid OPP table in device tree\n");
+   return ret;
+   }
+   }
+
if (core->pm_ops->core_get) {
ret = core->pm_ops->core_get(dev);
if (ret)
-   return ret;
+   goto err_opp_cleanup;
}
 
ret = dma_set_mask_and_coherent(dev, core->res->dma_mask);
if (ret)
-   return ret;
+   goto err_opp_cleanup;
 
if (!dev->dma_parms) {
dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms),
  GFP_KERNEL);
-   if (!dev->dma_parms)
-   return -ENOMEM;
+   if (!dev->dma_parms) {
+   ret = -ENOMEM;
+   goto err_opp_cleanup;
+   }
}
dma_set_max_seg_size(dev, DMA_BIT_MASK(32));
 
@@ -242,11 +259,11 @@ static int venus_probe(struct platform_device *pdev)
IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
"venus", core);
if (ret)
-   return ret;
+   goto err_opp_cleanup;
 
ret = hfi_create(core, _core_ops);
if (ret)
-   return ret;
+   goto err_opp_cleanup;
 
pm_runtime_enable(dev);
 
@@ -302,6 +319,10 @@ static int venus_probe(struct platform_device *pdev)
pm_runtime_set_suspended(dev);
pm_runtime_disable(dev);
hfi_destroy(core);
+err_opp_cleanup:
+   if (core->has_opp_table)
+   dev_pm_opp_of_remove_table(dev);
+   dev_pm_opp_put_clkname(core->opp_table);
return ret;
 }
 
@@ -326,6 +347,10 @@ static int venus_remove(struct platform_device *pdev)
pm_runtime_put_sync(dev);
pm_runtime_disable(dev);
 
+   if (core->has_opp_table)
+   dev_pm_opp_of_remove_table(dev);
+   dev_pm_opp_put_clkname(core->opp_table);
+
if (pm_ops->core_put)
pm_ops->core_put(dev);
 
@@ -355,6 +380,10 @@ static __maybe_unused int venus_runtime_suspend(struct 
device *dev)
if (ret)
return ret;
 
+   /* Drop the performance state vote */
+   if (core->opp_pmdomain)
+   dev_pm_opp_set_rate(dev, 0);
+
if (pm_ops->core_power)
ret = pm_ops->core_power(dev, POWER_OFF);
 
@@ -520,6 +549,7 @@ static const struct venus_resources sdm845_res_v2 = {
.vcodec_clks_num = 2,
.vcodec_pmdomains = { "venus", "vcodec0", "vcodec1" },
.vcodec_pmdomains_num = 3,
+   .opp_pmdomain = (const char *[]) { "opp-pd", NULL },
.vcodec_num = 2,
.max_load = 3110400,/* 4096x2160@90 */
.hfi_version = HFI_VERSION_4XX,
@@ -565,6 +595,7 @@ static const struct venus_resources sc7180_res = {
.vcodec_clks_num = 2,
.vcodec_pmdomains = { "venus", "vcodec0" },
.vcodec_pmdomains_num = 2,
+   .opp_pmdomain = (const char *[]) { "opp-pd", NULL },
.vcodec_num = 1,
.hfi_version = HFI_VERSION_4XX,
.vmem_id = VIDC_RESOURCE_NONE,
diff --git a/drivers/media/platform/qcom/venus/core.h 
b/drivers/media/platform/qcom/venus/core.h
index 7118612..b0cc544 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -62,6 +62,7 @@ struct venus_resources {
unsigned int vcodec_clks_num;
const char * const vcodec_pmdomains[VIDC_PMDOMAINS_NUM_MAX];
unsigned int vcodec_pmdomains_num;
+   const char **opp_pmdomain;

[PATCH v2 3/4] arm64: dts: sdm845: Add OPP tables and power-domains for venus

2020-07-15 Thread Rajendra Nayak

Add the OPP tables in order to be able to vote on the performance state of
a power-domain.

Signed-off-by: Rajendra Nayak 
---
 arch/arm64/boot/dts/qcom/sdm845.dtsi | 40 ++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index 759cdd0..d410cda 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -3631,8 +3631,10 @@
interrupts = ;
power-domains = < VENUS_GDSC>,
< VCODEC0_GDSC>,
-   < VCODEC1_GDSC>;
-   power-domain-names = "venus", "vcodec0", "vcodec1";
+   < VCODEC1_GDSC>,
+   < SDM845_CX>;
+   power-domain-names = "venus", "vcodec0", "vcodec1", 
"opp-pd";
+   operating-points-v2 = <_opp_table>;
clocks = < VIDEO_CC_VENUS_CTL_CORE_CLK>,
 < VIDEO_CC_VENUS_AHB_CLK>,
 < VIDEO_CC_VENUS_CTL_AXI_CLK>,
@@ -3654,6 +3656,40 @@
video-core1 {
compatible = "venus-encoder";
};
+
+   venus_opp_table: venus-opp-table {
+   compatible = "operating-points-v2";
+
+   opp-1 {
+   opp-hz = /bits/ 64 <1>;
+   required-opps = <_opp_min_svs>;
+   };
+
+   opp-2 {
+   opp-hz = /bits/ 64 <2>;
+   required-opps = <_opp_low_svs>;
+   };
+
+   opp-32000 {
+   opp-hz = /bits/ 64 <32000>;
+   required-opps = <_opp_svs>;
+   };
+
+   opp-38000 {
+   opp-hz = /bits/ 64 <38000>;
+   required-opps = <_opp_svs_l1>;
+   };
+
+   opp-44400 {
+   opp-hz = /bits/ 64 <44400>;
+   required-opps = <_opp_nom>;
+   };
+
+   opp-53300 {
+   opp-hz = /bits/ 64 <53300>;
+   required-opps = <_opp_turbo>;
+   };
+   };
};
 
videocc: clock-controller@ab0 {
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

[PATCH v2 0/4] DVFS support for Venus

2020-07-15 Thread Rajendra Nayak

v2: Fixed up the labels of OPP nodes in patch 4
Included the bindings update patch as part of this series,
a resend of https://lore.kernel.org/patchwork/patch/1241077/

These patches add DVFS support for Venus

Patch 1 will need to be picked by Rob.
Patch 2 will need to be picked by Stan,
Patch 3 and 4 should land via the qcom tree.

Rajendra Nayak (4):
  dt-bindings: media: venus: Add an optional power domain for perf
voting
  media: venus: core: Add support for opp tables/perf voting
  arm64: dts: sdm845: Add OPP tables and power-domains for venus
  arm64: dts: sc7180: Add OPP tables and power-domains for venus

 .../bindings/media/qcom,sc7180-venus.yaml  |  6 ++-
 .../bindings/media/qcom,sdm845-venus-v2.yaml   |  6 ++-
 arch/arm64/boot/dts/qcom/sc7180.dtsi   | 35 +-
 arch/arm64/boot/dts/qcom/sdm845.dtsi   | 40 +++-
 drivers/media/platform/qcom/venus/core.c   | 43 ++---
 drivers/media/platform/qcom/venus/core.h   |  5 ++
 drivers/media/platform/qcom/venus/pm_helpers.c | 54 --
 7 files changed, 173 insertions(+), 16 deletions(-)

-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

Re: [PATCH v3 12/12] ppc64/kexec_file: fix kexec load failure with lack of memory hole

2020-07-15 Thread Thiago Jung Bauermann

Hari Bathini  writes:

> The kexec purgatory has to run in real mode. Only the first memory
> block maybe accessible in real mode. And, unlike the case with panic
> kernel, no memory is set aside for regular kexec load. Another thing
> to note is, the memory for crashkernel is reserved at an offset of
> 128MB. So, when crashkernel memory is reserved, the memory ranges to
> load kexec segments shrink further as the generic code only looks for
> memblock free memory ranges and in all likelihood only a tiny bit of
> memory from 0 to 128MB would be available to load kexec segments.
>
> With kdump being used by default in general, kexec file load is likely
> to fail almost always.

Ah. I wasn't aware of this problem.

> This can be fixed by changing the memory hole
> lookup logic for regular kexec to use the same method as kdump.

Right. It doesn't make that much sense to use memblock to find free
memory areas for the kexec kernel, because memblock tracks which memory
areas are free for the currently running kernel. But that's not what
matters for the kernel that will be kexec'd into. In this case, regions
which may be reserved for the current OS instance may well be free for a
freshly started kernel. The kdump method is better at knowing which
memory regions are actually reserved by the firmware/hardware.

> This
> would mean that most kexec segments will overlap with crashkernel
> memory region. That should still be ok as the pages, whose destination
> address isn't available while loading, are placed in an intermediate
> location till a flush to the actual destination address happens during
> kexec boot sequence.

Yes, since the kdump kernel and the "regular" kexec kernel can't be both
booted at the same time, it's not a problem if both plan to use the same
region of memory.

>
> Signed-off-by: Hari Bathini 
> Tested-by: Pingfan Liu 

Reviewed-by: Thiago Jung Bauermann 

> ---
>
> v2 -> v3:
> * Unchanged. Added Tested-by tag from Pingfan.
>
> v1 -> v2:
> * New patch to fix locating memory hole for kexec_file_load (kexec -s -l)
>   when memory is reserved for crashkernel.
>
>
>  arch/powerpc/kexec/file_load_64.c |   33 ++---
>  1 file changed, 14 insertions(+), 19 deletions(-)

-- 
Thiago Jung Bauermann
IBM Linux Technology Center

RE: [PATCH 16/24] scsi: megaraid: Fix a whole bunch of function header formatting issues

2020-07-15 Thread Chandrakanth Patil

Acked-by: Chandrakanth Patil 

-Original Message-
From: linux-scsi-ow...@vger.kernel.org 
On Behalf Of Lee Jones
Sent: Thursday, July 9, 2020 10:46 AM
To: j...@linux.ibm.com; martin.peter...@oracle.com;
linux-s...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org; Lee Jones ;
Kashyap Desai ; Sumit Saxena
; Shivasharan S
; Christoph Hellwig ;
seokmann...@lsil.com; s...@lsil.com; megaraidlinux@broadcom.com
Subject: [PATCH 16/24] scsi: megaraid: Fix a whole bunch of function
header formatting issues

Plus a couple of API catch-ups.

Fixes the following W=1 kernel build warning(s):

 drivers/scsi/megaraid.c:133: warning: Function parameter or member
'adapter' not described in 'mega_setup_mailbox'
 drivers/scsi/megaraid.c:356: warning: Function parameter or member
'adapter' not described in 'mega_runpendq'
 drivers/scsi/megaraid.c:424: warning: Function parameter or member
'adapter' not described in 'mega_allocate_scb'
 drivers/scsi/megaraid.c:424: warning: Function parameter or member 'cmd'
not described in 'mega_allocate_scb'
 drivers/scsi/megaraid.c:456: warning: Function parameter or member
'adapter' not described in 'mega_get_ldrv_num'
 drivers/scsi/megaraid.c:456: warning: Function parameter or member 'cmd'
not described in 'mega_get_ldrv_num'
 drivers/scsi/megaraid.c:456: warning: Function parameter or member
'channel' not described in 'mega_get_ldrv_num'
 drivers/scsi/megaraid.c:519: warning: Function parameter or member
'adapter' not described in 'mega_build_cmd'
 drivers/scsi/megaraid.c:519: warning: Function parameter or member 'cmd'
not described in 'mega_build_cmd'
 drivers/scsi/megaraid.c:519: warning: Function parameter or member 'busy'
not described in 'mega_build_cmd'
 drivers/scsi/megaraid.c:951: warning: Function parameter or member
'adapter' not described in 'mega_prepare_passthru'
 drivers/scsi/megaraid.c:951: warning: Function parameter or member 'scb'
not described in 'mega_prepare_passthru'
 drivers/scsi/megaraid.c:951: warning: Function parameter or member 'cmd'
not described in 'mega_prepare_passthru'
 drivers/scsi/megaraid.c:951: warning: Function parameter or member
'channel' not described in 'mega_prepare_passthru'
 drivers/scsi/megaraid.c:951: warning: Function parameter or member
'target' not described in 'mega_prepare_passthru'
 drivers/scsi/megaraid.c:1016: warning: Function parameter or member
'adapter' not described in 'mega_prepare_extpassthru'
 drivers/scsi/megaraid.c:1016: warning: Function parameter or member 'scb'
not described in 'mega_prepare_extpassthru'
 drivers/scsi/megaraid.c:1016: warning: Function parameter or member 'cmd'
not described in 'mega_prepare_extpassthru'
 drivers/scsi/megaraid.c:1016: warning: Function parameter or member
'channel' not described in 'mega_prepare_extpassthru'
 drivers/scsi/megaraid.c:1016: warning: Function parameter or member
'target' not described in 'mega_prepare_extpassthru'
 drivers/scsi/megaraid.c:1097: warning: Function parameter or member
'adapter' not described in 'issue_scb'
 drivers/scsi/megaraid.c:1097: warning: Function parameter or member 'scb'
not described in 'issue_scb'
 drivers/scsi/megaraid.c:1176: warning: Function parameter or member
'adapter' not described in 'issue_scb_block'
 drivers/scsi/megaraid.c:1176: warning: Function parameter or member
'raw_mbox' not described in 'issue_scb_block'
 drivers/scsi/megaraid.c:1259: warning: Function parameter or member 'irq'
not described in 'megaraid_isr_iomapped'
 drivers/scsi/megaraid.c:1259: warning: Function parameter or member
'devp' not described in 'megaraid_isr_iomapped'
 drivers/scsi/megaraid.c:1335: warning: Function parameter or member 'irq'
not described in 'megaraid_isr_memmapped'
 drivers/scsi/megaraid.c:1335: warning: Function parameter or member
'devp' not described in 'megaraid_isr_memmapped'
 drivers/scsi/megaraid.c:1413: warning: Function parameter or member
'adapter' not described in 'mega_cmd_done'
 drivers/scsi/megaraid.c:1413: warning: Function parameter or member
'completed' not described in 'mega_cmd_done'
 drivers/scsi/megaraid.c:1413: warning: Function parameter or member
'nstatus' not described in 'mega_cmd_done'
 drivers/scsi/megaraid.c:1413: warning: Function parameter or member
'status' not described in 'mega_cmd_done'
 drivers/scsi/megaraid.c:1933: warning: Function parameter or member
'adapter' not described in 'megaraid_abort_and_reset'
 drivers/scsi/megaraid.c:1933: warning: Function parameter or member 'cmd'
not described in 'megaraid_abort_and_reset'
 drivers/scsi/megaraid.c:1933: warning: Function parameter or member 'aor'
not described in 'megaraid_abort_and_reset'
 drivers/scsi/megaraid.c:2031: warning: Function parameter or member
'dma_handle' not described in 'mega_allocate_inquiry'
 drivers/scsi/megaraid.c:2031: warning: Function parameter or member
'pdev' not described in 'mega_allocate_inquiry'
 drivers/scsi/megaraid.c:2055: warning: Function parameter or member 'm'
not described in 'proc_show_config'

Re: [PATCH RFC don't apply] vdpa_sim: endian-ness for config space

2020-07-15 Thread Michael S. Tsirkin

On Wed, Jul 15, 2020 at 10:02:32PM +0800, Jason Wang wrote:
> 
> On 2020/7/15 下午9:58, Michael S. Tsirkin wrote:
> > VDPA sim stores config space as native endian, but that
> > is wrong: modern guests expect LE.
> > I coded up the following to fix it up, but it is wrong too:
> > vdpasim_create is called before guest features are known.
> > 
> > So what should we do? New ioctl to specify the interface used?
> > More ideas?
> > 
> > Signed-off-by: Michael S. Tsirkin 
> 
> 
> Can we do the endian conversion in set_config/get_config()?
> 
> Thanks

That is problematic at least from static checking point of view.
It would be reasonable to do it in vdpasim_set_features, except
legacy guests might not set features at all.
So my proposal is:
- set config in vdpasim_set_features
- document that this is where devices should initialize config
- vdpa core will maintain a "features set" flag, if get/set config
  is called without set features, core will call set features
  automatically with 0 value.

Thoughts?


> 
> > 
> > 
> > ---
> >   drivers/vdpa/vdpa_sim/vdpa_sim.c | 22 --
> >   1 file changed, 20 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c 
> > b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> > index a9bc5e0fb353..cc754ae0ec15 100644
> > --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> > +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> > @@ -24,6 +24,7 @@
> >   #include 
> >   #include 
> >   #include 
> > +#include 
> >   #include 
> >   #include 
> >   #include 
> > @@ -72,6 +73,23 @@ struct vdpasim {
> > u64 features;
> >   };
> > +/* TODO: cross-endian support */
> > +static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim)
> > +{
> > +   return virtio_legacy_is_little_endian() ||
> > +   (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1));
> > +}
> > +
> > +static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val)
> > +{
> > +   return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val);
> > +}
> > +
> > +static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val)
> > +{
> > +   return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val);
> > +}
> > +
> >   static struct vdpasim *vdpasim_dev;
> >   static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
> > @@ -332,8 +350,8 @@ static struct vdpasim *vdpasim_create(void)
> > goto err_iommu;
> > config = >config;
> > -   config->mtu = 1500;
> > -   config->status = VIRTIO_NET_S_LINK_UP;
> > +   config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
> > +   config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
> > eth_random_addr(config->mac);
> > vringh_set_iotlb(>vqs[0].vring, vdpasim->iommu);

Re: [PATCH 2/2] Input: elan_i2c - Modify the IAP related function for page sizes 128, 512 bytes.

2020-07-15 Thread Dmitry Torokhov

Hi Jingle,

On Tue, Jul 14, 2020 at 06:56:41AM -0400, Jingle Wu wrote:
> + if (!iap)
> + cmd = ETP_I2C_FW_VERSION_CMD;
> + else if (pattern_ver == 0)
> + cmd = ETP_I2C_IAP_VERSION_CMD_OLD;

Just to confirm, the older devices (I assume that pattern 0 means older)
have version command that is numerically higher than the one for the
newer (pattern >= 1) devices?

> + else
> + cmd = ETP_I2C_IAP_VERSION_CMD;
>  
> - error = elan_i2c_read_cmd(client,
> -   iap ? ETP_I2C_IAP_VERSION_CMD :
> - ETP_I2C_FW_VERSION_CMD,
> -   val);
> + error = elan_i2c_read_cmd(client, cmd, val);
>   if (error) {
>   dev_err(>dev, "failed to get %s version: %d\n",
>   iap ? "IAP" : "FW", error);
>   return error;
>   }
>  
> - if (pattern_ver == 0x01)
> + if (pattern_ver >= 0x01)
>   *version = iap ? val[1] : val[0];
>   else
>   *version = val[0];
> @@ -298,7 +316,7 @@ static int elan_i2c_get_sm_version(struct i2c_client 
> *client,
>   return error;
>   }
>  
> - if (pattern_ver == 0x01) {
> + if (pattern_ver >= 0x01) {
>   error = elan_i2c_read_cmd(client, ETP_I2C_IC_TYPE_CMD, val);
>   if (error) {
>   dev_err(>dev, "failed to get ic type: %d\n",
> @@ -324,7 +342,14 @@ static int elan_i2c_get_sm_version(struct i2c_client 
> *client,
>   return error;
>   }
>   *version = val[0];
> - *ic_type = val[1];
> +
> + error = elan_i2c_read_cmd(client, ETP_I2C_IAP_VERSION_CMD, val);
> + if (error) {
> + dev_err(>dev, "failed to get ic type: %d\n",
> + error);
> + return error;
> + }

Could you please tell me why this chunk is needed?

Thanks.

-- 
Dmitry

[PATCH] rtc: pl031: fix alarm support

2020-07-15 Thread Marek Szyprowski

Commit 75a472845196 ("rtc: pl031: switch to rtc_time64_to_tm/rtc_tm_to_time64")
adjusted driver to the new API, but during the conversion a call to enable
alarm irq in set_alarm() was lost. Restore it, what fixes alarm support in
the PL031 RTC driver.

Fixes: 75a472845196 ("rtc: pl031: switch to rtc_time64_to_tm/rtc_tm_to_time64")
Signed-off-by: Marek Szyprowski 
---
 drivers/rtc/rtc-pl031.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index 40d7450a1ce4..c6b89273feba 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -275,6 +275,7 @@ static int pl031_set_alarm(struct device *dev, struct 
rtc_wkalrm *alarm)
struct pl031_local *ldata = dev_get_drvdata(dev);
 
writel(rtc_tm_to_time64(>time), ldata->base + RTC_MR);
+   pl031_alarm_irq_enable(dev, alarm->enabled);
 
return 0;
 }
-- 
2.17.1

[PATCH] clk: qcom: ipq8074: Add correct index for PCIe clocks

2020-07-15 Thread Sivaprakash Murugesan

The PCIe clocks GCC_PCIE0_AXI_S_BRIDGE_CLK, GCC_PCIE0_RCHNG_CLK_SRC,
GCC_PCIE0_RCHNG_CLK are wrongly added to the gcc reset group.

Move them to the gcc clock group.

Reported-by: kernel test robot 
Signed-off-by: Sivaprakash Murugesan 
---
 include/dt-bindings/clock/qcom,gcc-ipq8074.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/dt-bindings/clock/qcom,gcc-ipq8074.h 
b/include/dt-bindings/clock/qcom,gcc-ipq8074.h
index e3e018565add..8e2bec1c91bf 100644
--- a/include/dt-bindings/clock/qcom,gcc-ipq8074.h
+++ b/include/dt-bindings/clock/qcom,gcc-ipq8074.h
@@ -230,6 +230,9 @@
 #define GCC_GP1_CLK221
 #define GCC_GP2_CLK222
 #define GCC_GP3_CLK223
+#define GCC_PCIE0_AXI_S_BRIDGE_CLK 224
+#define GCC_PCIE0_RCHNG_CLK_SRC225
+#define GCC_PCIE0_RCHNG_CLK226
 
 #define GCC_BLSP1_BCR  0
 #define GCC_BLSP1_QUP1_BCR 1
@@ -363,8 +366,5 @@
 #define GCC_PCIE1_AHB_ARES 129
 #define GCC_PCIE1_AXI_MASTER_STICKY_ARES   130
 #define GCC_PCIE0_AXI_SLAVE_STICKY_ARES131
-#define GCC_PCIE0_AXI_S_BRIDGE_CLK 132
-#define GCC_PCIE0_RCHNG_CLK_SRC133
-#define GCC_PCIE0_RCHNG_CLK134
 
 #endif
-- 
2.7.4

Re: [PATCH] drm/i915/display: Ensure that ret is always initialized in icl_combo_phy_verify_state

2020-07-15 Thread Matt Roper

On Wed, Jul 15, 2020 at 09:27:42PM -0700, Nathan Chancellor wrote:
> Clang warns:
> 
> drivers/gpu/drm/i915/display/intel_combo_phy.c:268:3: warning: variable
> 'ret' is uninitialized when used here [-Wuninitialized]
> ret &= check_phy_reg(dev_priv, phy, ICL_PORT_TX_DW8_LN0(phy),
> ^~~
> drivers/gpu/drm/i915/display/intel_combo_phy.c:261:10: note: initialize
> the variable 'ret' to silence this warning
> bool ret;
> ^
>  = 0
> 1 warning generated.
> 
> In practice, the bug this warning appears to be concerned with would not
> actually matter because ret gets initialized to the return value of
> cnl_verify_procmon_ref_values. However, that does appear to be a bug
> since it means the first hunk of the patch this fixes won't actually do
> anything (since the values of check_phy_reg won't factor into the final
> ret value). Initialize ret to true then make all of the assignments a
> bitwise AND with itself so that the function always does what it should
> do.
> 
> Fixes: 239bef676d8e ("drm/i915/display: Implement new combo phy 
> initialization step")
> Link: https://github.com/ClangBuiltLinux/linux/issues/1094
> Signed-off-by: Nathan Chancellor 

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/display/intel_combo_phy.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c 
> b/drivers/gpu/drm/i915/display/intel_combo_phy.c
> index eccaa79cb4a9..a4b8aa6d0a9e 100644
> --- a/drivers/gpu/drm/i915/display/intel_combo_phy.c
> +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c
> @@ -258,7 +258,7 @@ static bool phy_is_master(struct drm_i915_private 
> *dev_priv, enum phy phy)
>  static bool icl_combo_phy_verify_state(struct drm_i915_private *dev_priv,
>  enum phy phy)
>  {
> - bool ret;
> + bool ret = true;
>   u32 expected_val = 0;
>  
>   if (!icl_combo_phy_enabled(dev_priv, phy))
> @@ -276,7 +276,7 @@ static bool icl_combo_phy_verify_state(struct 
> drm_i915_private *dev_priv,
>DCC_MODE_SELECT_CONTINUOSLY);
>   }
>  
> - ret = cnl_verify_procmon_ref_values(dev_priv, phy);
> + ret &= cnl_verify_procmon_ref_values(dev_priv, phy);
>  
>   if (phy_is_master(dev_priv, phy)) {
>   ret &= check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW8(phy),
> 
> base-commit: ca0e494af5edb59002665bf12871e94b4163a257
> -- 
> 2.28.0.rc0
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation
(916) 356-2795

Re: [PATCH v1] driver core: Fix scheduling while atomic warnings during device link deletion

2020-07-15 Thread Guenter Roeck

On 7/15/20 10:08 PM, Saravana Kannan wrote:
> Marek and Guenter reported that commit 287905e68dd2 ("driver core:
> Expose device link details in sysfs") caused sleeping/scheduling while
> atomic warnings.
> 
> BUG: sleeping function called from invalid context at 
> kernel/locking/mutex.c:935
> in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 12, name: kworker/0:1
> 2 locks held by kworker/0:1/12:
>   #0: ee8074a8 ((wq_completion)rcu_gp){+.+.}-{0:0}, at: 
> process_one_work+0x174/0x7dc
>   #1: ee921f20 ((work_completion)(>work)){+.+.}-{0:0}, at: 
> process_one_work+0x174/0x7dc
> Preemption disabled at:
> [] srcu_invoke_callbacks+0xc0/0x154
> - 8< - SNIP
> [] (device_del) from [] (device_unregister+0x24/0x64)
> [] (device_unregister) from [] 
> (srcu_invoke_callbacks+0xcc/0x154)
> [] (srcu_invoke_callbacks) from [] 
> (process_one_work+0x234/0x7dc)
> [] (process_one_work) from [] (worker_thread+0x44/0x51c)
> [] (worker_thread) from [] (kthread+0x158/0x1a0)
> [] (kthread) from [] (ret_from_fork+0x14/0x20)
> Exception stack(0xee921fb0 to 0xee921ff8)
> 
> This was caused by the device link device being released in the context
> of srcu_invoke_callbacks().  There is no need to wait till the RCU
> callback to release the device link device.  So release the device
> earlier and revert the RCU callback code to what it was before
> commit 287905e68dd2 ("driver core: Expose device link details in sysfs")
> 
> Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs")
> Reported-by: Marek Szyprowski 
> Reported-by: Guenter Roeck 
> Signed-off-by: Saravana Kannan 
> ---
> Marek and Guenter,
> 
> It haven't had a chance to test this yet. Can one of you please test it
> and confirm it fixes the issue?
> 

With this patch applied, the original warning is gone, but I get lots
of other warnings.

WARNING: CPU: 0 PID: 1 at drivers/base/core.c:1790 device_release+0x94/0xa4^M
Device 'regulators:regulator@0:50038000.ethernet' does not have a release() 
function, it is broken and must be fixed.

WARNING: CPU: 0 PID: 1 at drivers/base/core.c:1790 device_release+0x94/0xa4
Device '53f9c000.gpio:50038000.ethernet' does not have a release() function, it 
is broken and must be fixed.

WARNING: CPU: 0 PID: 1 at drivers/base/core.c:1790 device_release+0x94/0xa4^M
Device '5003.tscadc:50030400.tcq' does not have a release() function, it is 
broken and must be fixed.

and so on. I don't know if this is caused by this patch or by
some other patch in -next.

Guenter

> Thanks,
> Saravana
> 
>  drivers/base/core.c | 10 +++---
>  1 file changed, 3 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index 5373ddd029f6..ccb2ce11f5b5 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -306,16 +306,10 @@ static struct attribute *devlink_attrs[] = {
>  };
>  ATTRIBUTE_GROUPS(devlink);
>  
> -static void devlink_dev_release(struct device *dev)
> -{
> - kfree(to_devlink(dev));
> -}
> -
>  static struct class devlink_class = {
>   .name = "devlink",
>   .owner = THIS_MODULE,
>   .dev_groups = devlink_groups,
> - .dev_release = devlink_dev_release,
>  };
>  
>  static int devlink_add_symlinks(struct device *dev,
> @@ -737,7 +731,7 @@ static void device_link_free(struct device_link *link)
>  
>   put_device(link->consumer);
>   put_device(link->supplier);
> - device_unregister(>link_dev);
> + kfree(link);
>  }
>  
>  #ifdef CONFIG_SRCU
> @@ -756,6 +750,7 @@ static void __device_link_del(struct kref *kref)
>   if (link->flags & DL_FLAG_PM_RUNTIME)
>   pm_runtime_drop_link(link->consumer);
>  
> + device_unregister(>link_dev);
>   list_del_rcu(>s_node);
>   list_del_rcu(>c_node);
>   call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu);
> @@ -771,6 +766,7 @@ static void __device_link_del(struct kref *kref)
>   if (link->flags & DL_FLAG_PM_RUNTIME)
>   pm_runtime_drop_link(link->consumer);
>  
> + device_unregister(>link_dev);
>   list_del(>s_node);
>   list_del(>c_node);
>   device_link_free(link);
>

Re: [PATCH] time/sched_clock: Use raw_read_seqcount_latch()

2020-07-15 Thread Leo Yan

Hi Peter, Ahemd,

On Wed, Jul 15, 2020 at 05:58:50PM +0200, Peter Zijlstra wrote:

[...]

> > > diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
> > > index fa3f800d7d76..ea007928d681 100644
> > > --- a/kernel/time/sched_clock.c
> > > +++ b/kernel/time/sched_clock.c
> > > @@ -100,7 +100,7 @@ unsigned long long notrace sched_clock(void)
> > >   struct clock_read_data *rd;
> > > 
> > >   do {
> > > - seq = raw_read_seqcount();
> > > + seq = raw_read_seqcount_latch();
> > 
> > Understand this is doing the same thing with __ktime_get_fast_ns() and
> > I saw Peter acked to make change for this.
> > 
> > Just want to confirm, since this patch introduces conflict with the
> > patch set "arm64: perf: Proper cap_user_time* support" [1], I should
> > rebase the patch set on top of this patch, right?
> 
> Or rebase this patch on top of yours and include it, either way.

Have rebased this patch and included it in the patch set v3 for
"arm64: perf: Proper cap_user_time* support" [1].

Thanks!
Leo

[1] 
https://lore.kernel.org/linux-arm-kernel/20200716051130.4359-3-leo@linaro.org/T/#u

Re: [RFC PATCH 4/7] x86: use exit_lazy_tlb rather than membarrier_mm_sync_core_before_usermode

2020-07-15 Thread Andy Lutomirski




> On Jul 15, 2020, at 9:15 PM, Nicholas Piggin  wrote:
> 
> Excerpts from Mathieu Desnoyers's message of July 14, 2020 12:13 am:
>> - On Jul 13, 2020, at 9:47 AM, Nicholas Piggin npig...@gmail.com wrote:
>> 
>>> Excerpts from Nicholas Piggin's message of July 13, 2020 2:45 pm:
 Excerpts from Andy Lutomirski's message of July 11, 2020 3:04 am:
> Also, as it stands, I can easily see in_irq() ceasing to promise to
> serialize.  There are older kernels for which it does not promise to
> serialize.  And I have plans to make it stop serializing in the
> nearish future.
 
 You mean x86's return from interrupt? Sounds fun... you'll konw where to
 update the membarrier sync code, at least :)
>>> 
>>> Oh, I should actually say Mathieu recently clarified a return from
>>> interrupt doesn't fundamentally need to serialize in order to support
>>> membarrier sync core.
>> 
>> Clarification to your statement:
>> 
>> Return from interrupt to kernel code does not need to be context serializing
>> as long as kernel serializes before returning to user-space.
>> 
>> However, return from interrupt to user-space needs to be context serializing.
> 
> Hmm, I'm not sure it's enough even with the sync in the exit_lazy_tlb
> in the right places.
> 
> A kernel thread does a use_mm, then it blocks and the user process with
> the same mm runs on that CPU, and then it calls into the kernel, blocks,
> the kernel thread runs again, another CPU issues a membarrier which does
> not IPI this one because it's running a kthread, and then the kthread
> switches back to the user process (still without having unused the mm),
> and then the user process returns from syscall without having done a 
> core synchronising instruction.
> 
> The cause of the problem is you want to avoid IPI'ing kthreads. Why?
> I'm guessing it really only matters as an optimisation in case of idle
> threads. Idle thread is easy (well, easier) because it won't use_mm, so 
> you could check for rq->curr == rq->idle in your loop (in a suitable 
> sched accessor function).
> 
> But... I'm not really liking this subtlety in the scheduler for all this 
> (the scheduler still needs the barriers when switching out of idle).
> 
> Can it be improved somehow? Let me forget x86 core sync problem for now
> (that _may_ be a bit harder), and step back and look at what we're doing.
> The memory barrier case would actually suffer from the same problem as
> core sync, because in the same situation it has no implicit mmdrop in
> the scheduler switch code either.
> 
> So what are we doing with membarrier? We want any activity caused by the 
> set of CPUs/threads specified that can be observed by this thread before 
> calling membarrier is appropriately fenced from activity that can be 
> observed to happen after the call returns.
> 
> CPU0 CPU1
> 1. user stuff
> a. membarrier()  2. enter kernel
> b. read rq->curr 3. rq->curr switched to kthread
> c. is kthread, skip IPI  4. switch_to kthread
> d. return to user5. rq->curr switched to user thread
> 6. switch_to user thread
> 7. exit kernel
> 8. more user stuff
> 
> As far as I can see, the problem is CPU1 might reorder step 5 and step
> 8, so you have mmdrop of lazy mm be a mb after step 6.
> 
> But why? The membarrier call only cares that there is a full barrier
> between 1 and 8, right? Which it will get from the previous context
> switch to the kthread.
> 
> I must say the memory barrier comments in membarrier could be improved
> a bit (unless I'm missing where the main comment is). It's fine to know
> what barriers pair with one another, but we need to know which exact
> memory accesses it is ordering
> 
>   /*
> * Matches memory barriers around rq->curr modification in
> * scheduler.
> */
> 
> Sure, but it doesn't say what else is being ordered. I think it's just
> the user memory accesses, but would be nice to make that a bit more
> explicit. If we had such comments then we might know this case is safe.
> 
> I think the funny powerpc barrier is a similar case of this. If we
> ever see remote_rq->curr->flags & PF_KTHREAD, then we _know_ that
> CPU has or will have issued a memory barrier between running user
> code.
> 
> So AFAIKS all this membarrier stuff in kernel/sched/core.c could
> just go away. Except x86 because thread switch doesn't imply core
> sync, so CPU1 between 1 and 8 may never issue a core sync instruction
> the same way a context switch must be a full mb.
> 
> Before getting to x86 -- Am I right, or way off track here?

I find it hard to believe that this is x86 only. Why would thread switch imply 
core sync on any architecture?  Is x86 unique in having a stupid expensive core 
sync that is heavier than smp_mb()?

But I’m wondering if all this deferred sync stuff is wrong. In the brave new 
world of io_uring and such, perhaps

[PATCH v3 2/2] dt-bindings: mfd: Add DT compatible string "google,cros_ec_uart"

2020-07-15 Thread Bhanu Prakash Maiya

From: Bhanu Prakash Maiya 

Add DT compatible string in
Documentation/devicetree/bindings/mfd/cros_ec.txt

Signed-off-by: Bhanu Prakash Maiya 
---

Changes in v3:
- Rebased changes on google,cros-ec.yaml

Changes in v2:
- No change

 Documentation/devicetree/bindings/mfd/google,cros-ec.yaml | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml 
b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
index 6a7279a85ec1c..552d1c9bf3de4 100644
--- a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
+++ b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
@@ -10,11 +10,12 @@ maintainers:
   - Benson Leung 
   - Enric Balletbo i Serra 
   - Guenter Roeck 
+  - Bhanu Prakash Maiya 
 
 description:
   Google's ChromeOS EC is a microcontroller which talks to the AP and
   implements various functions such as keyboard and battery charging.
-  The EC can be connected through various interfaces (I2C, SPI, and others)
+  The EC can be connected through various interfaces (I2C, SPI, UART and 
others)
   and the compatible string specifies which interface is being used.
 
 properties:
@@ -29,6 +30,9 @@ properties:
   - description:
   For implementations of the EC is connected through RPMSG.
 const: google,cros-ec-rpmsg
+  - description:
+  For implementations of the EC is connected through UART.
+const: google,cros-ec-uart
 
   google,cros-ec-spi-pre-delay:
 description:
-- 
2.26.2

[PATCH v3 1/2] platform/chrome: cros_ec_uart: Add cros-ec-uart transport layer

2020-07-15 Thread Bhanu Prakash Maiya

From: Bhanu Prakash Maiya 

This patch enables uart transport layer for cros_ec framework.
The cros-ec-uart binds with EC device working on uart transport to
send request and receive response.

Signed-off-by: Bhanu Prakash Maiya 
---

Changes in v3:
- checkpatch.pl script warns about char len 80 even though we have
  relaxed view guildeline for line below 100 chars. Currently sticking
  with 80 chars in v3.
- Fixed style issues

Changes in v2:
- Fixed build error on v1.
- Changed EC timeout for response packet to 3 Sec and added comments.
- Fixed cros_ec_uart_rx_bytes function to handle rx buffer < size of response 
header.

 MAINTAINERS|   6 +
 drivers/platform/chrome/Kconfig|  10 +
 drivers/platform/chrome/Makefile   |   1 +
 drivers/platform/chrome/cros_ec_uart.c | 410 +
 4 files changed, 427 insertions(+)
 create mode 100644 drivers/platform/chrome/cros_ec_uart.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 50659d76976b7..ee28a1da41c50 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4033,6 +4033,12 @@ S:   Maintained
 F: Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml
 F: sound/soc/codecs/cros_ec_codec.*
 
+CHROMEOS EC UART DRIVER
+M: Bhanu Prakash Maiya 
+R: Enric Balletbo i Serra 
+S: Maintained
+F: drivers/platform/chrome/cros_ec_uart.c
+
 CHROMEOS EC SUBDRIVERS
 M: Benson Leung 
 M: Enric Balletbo i Serra 
diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index 3822e5e111caa..2082fafe08a6a 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -125,6 +125,16 @@ config CROS_EC_SPI
  response time cannot be guaranteed, we support ignoring
  'pre-amble' bytes before the response actually starts.
 
+config CROS_EC_UART
+   tristate "ChromeOS Embedded Controller (UART)"
+   depends on CROS_EC && ACPI && SERIAL_DEV_BUS
+   help
+ If you say Y here, you get support for talking to the ChromeOS EC
+ through a UART, using a byte-level protocol.
+
+ To compile this driver as a module, choose M here: the
+ module will be called cros_ec_uart.
+
 config CROS_EC_LPC
tristate "ChromeOS Embedded Controller (LPC)"
depends on CROS_EC && ACPI && (X86 || COMPILE_TEST)
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 8ed1e33033b38..fc449351fc794 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CROS_EC_I2C) += cros_ec_i2c.o
 obj-$(CONFIG_CROS_EC_ISHTP)+= cros_ec_ishtp.o
 obj-$(CONFIG_CROS_EC_RPMSG)+= cros_ec_rpmsg.o
 obj-$(CONFIG_CROS_EC_SPI)  += cros_ec_spi.o
+obj-$(CONFIG_CROS_EC_UART) += cros_ec_uart.o
 cros_ec_lpcs-objs  := cros_ec_lpc.o cros_ec_lpc_mec.o
 obj-$(CONFIG_CROS_EC_TYPEC)+= cros_ec_typec.o
 obj-$(CONFIG_CROS_EC_LPC)  += cros_ec_lpcs.o
diff --git a/drivers/platform/chrome/cros_ec_uart.c 
b/drivers/platform/chrome/cros_ec_uart.c
new file mode 100644
index 0..45755061370a7
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_uart.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * UART interface for ChromeOS Embedded Controller
+ *
+ * Copyright 2020 Google LLC.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "cros_ec.h"
+
+/*
+ * EC sends contiguous bytes of response packet on UART AP RX.
+ * TTY driver in AP accumulates incoming bytes and calls the registered 
callback
+ * function. Byte count can range from 1 to Max count supported by TTY driver.
+ * This driver should wait for long time for all callbacks to be processed.
+ * Considering the worst case scenario, wait for ~3 secs. This timeout should
+ * account for max latency and some additional guard time.
+ * In case the packet is received in ms, wait queue will be released and packet
+ * will be processed.
+ */
+#define EC_MSG_DEADLINE_MS (300 * 10)
+
+/**
+ * struct response_info - Encapsulate EC response related
+ * information for passing between function
+ * cros_ec_uart_pkt_xfer() and cros_ec_uart_rx_bytes()
+ * callback.
+ * @data:  Copy the data received from EC here.
+ * @max_size:  Max size allocated for the @data buffer. If the
+ * received data exceeds this value, we log an error.
+ * @size:  Actual size of data received from EC. This is also
+ * used to accumulate byte count with response is received
+ * in dma chunks.
+ * @exp_len:   Expected bytes of response from EC including header.
+ * @error: 0 for success, negative error code for a failure.
+ * @received:  Set

[PATCH v3 6/7] arm64: perf: Add cap_user_time_short

2020-07-15 Thread Leo Yan

From: Peter Zijlstra 

This completes the ARM64 cap_user_time support.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Leo Yan 
---
 arch/arm64/kernel/perf_event.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index c016b116ae33..888bcb5d1388 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1174,6 +1174,7 @@ void arch_perf_update_userpage(struct perf_event *event,
 
userpg->cap_user_time = 0;
userpg->cap_user_time_zero = 0;
+   userpg->cap_user_time_short = 0;
 
do {
rd = sched_clock_read_begin();
@@ -1184,13 +1185,13 @@ void arch_perf_update_userpage(struct perf_event *event,
userpg->time_mult = rd->mult;
userpg->time_shift = rd->shift;
userpg->time_zero = rd->epoch_ns;
+   userpg->time_cycles = rd->epoch_cyc;
+   userpg->time_mask = rd->sched_clock_mask;
 
/*
-* This isn't strictly correct, the ARM64 counter can be
-* 'short' and then we get funnies when it wraps. The correct
-* thing would be to extend the perf ABI with a cycle and mask
-* value, but because wrapping on ARM64 is very rare in
-* practise this 'works'.
+* Subtract the cycle base, such that software that
+* doesn't know about cap_user_time_short still 'works'
+* assuming no wraps.
 */
ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
userpg->time_zero -= ns;
@@ -1216,4 +1217,5 @@ void arch_perf_update_userpage(struct perf_event *event,
 */
userpg->cap_user_time = 1;
userpg->cap_user_time_zero = 1;
+   userpg->cap_user_time_short = 1;
 }
-- 
2.17.1

[PATCH v3 7/7] tools headers UAPI: Update tools's copy of linux/perf_event.h

2020-07-15 Thread Leo Yan

To get the changes in the commit:

  "perf: Add perf_event_mmap_page::cap_user_time_short ABI"

This update is a prerequisite to add support for short clock counters
related ABI extension.

Signed-off-by: Leo Yan 
---
 tools/include/uapi/linux/perf_event.h | 23 ---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/tools/include/uapi/linux/perf_event.h 
b/tools/include/uapi/linux/perf_event.h
index 7b2d6fc9e6ed..21a1edd08cbe 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -532,9 +532,10 @@ struct perf_event_mmap_page {
cap_bit0_is_deprecated  : 1, /* Always 1, 
signals that bit 0 is zero */
 
cap_user_rdpmc  : 1, /* The RDPMC 
instruction can be used to read counts */
-   cap_user_time   : 1, /* The time_* 
fields are used */
+   cap_user_time   : 1, /* The 
time_{shift,mult,offset} fields are used */
cap_user_time_zero  : 1, /* The time_zero 
field is used */
-   cap_res : 59;
+   cap_user_time_short : 1, /* the 
time_{cycle,mask} fields are used */
+   cap_res : 58;
};
};
 
@@ -593,13 +594,29 @@ struct perf_event_mmap_page {
 *   ((rem * time_mult) >> time_shift);
 */
__u64   time_zero;
+
__u32   size;   /* Header size up to __reserved[] 
fields. */
+   __u32   __reserved_1;
+
+   /*
+* If cap_usr_time_short, the hardware clock is less than 64bit wide
+* and we must compute the 'cyc' value, as used by cap_usr_time, as:
+*
+*   cyc = time_cycles + ((cyc - time_cycles) & time_mask)
+*
+* NOTE: this form is explicitly chosen such that cap_usr_time_short
+*   is a correction on top of cap_usr_time, and code that doesn't
+*   know about cap_usr_time_short still works under the assumption
+*   the counter doesn't wrap.
+*/
+   __u64   time_cycles;
+   __u64   time_mask;
 
/*
 * Hole for extension of the self monitor capabilities
 */
 
-   __u8__reserved[118*8+4];/* align to 1k. */
+   __u8__reserved[116*8];  /* align to 1k. */
 
/*
 * Control data for the mmap() data buffer.
-- 
2.17.1

Re: [PATCH 2/2] debugfs: Add access restriction option

2020-07-15 Thread Randy Dunlap

Hi Peter,

Here are a few more comments/corrections.

On 7/15/20 9:54 PM, Peter Enderborg wrote:
> Since debugfs include sensitive information it need to be treated
> carefully. But it also has many very useful debug functions for userspace.
> With this option we can have same configuration for system with
> need of debugfs and a way to turn it off. This gives a extra protection
> for exposure on systems where user-space services with system
> access are attacked.
> 
> It is controlled by a configurable default value that can be override
> with a kernel command line parameter. (debugfs=)
> 
> It can be on or off, but also internally on but not seen from user-space.
> This no-mount mode do not register a debugfs as filesystem, but client can
> register their parts in the internal structures. This data can be readed
> with a debugger or saved with a crashkernel. When it is off clients
> get EPERM error when accessing the functions for registering their
> components.
> 
> Signed-off-by: Peter Enderborg 
> ---
>  .../admin-guide/kernel-parameters.txt | 15 
>  fs/debugfs/inode.c| 37 +++
>  fs/debugfs/internal.h | 14 +++
>  lib/Kconfig.debug | 32 
>  4 files changed, 98 insertions(+)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt 
> b/Documentation/admin-guide/kernel-parameters.txt
> index fb95fad81c79..779d6cdc9627 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -827,6 +827,21 @@
>   useful to also enable the page_owner functionality.
>   on: enable the feature
>  
> + debugfs=[KNL] This parameter enables what is exposed to 
> userspace
> + and debugfs internal clients.
> + Format: { on, no-mount, off }
> + on: All functions are enabled.
> + no-mount:
> + Filesystem is not registered but kernel clients 
> can
> + access APIs and a crashkernel can be used to 
> read
> + its content. There is nothing to mount.
> + off:Filesystem is not registered and clients
> + get a -EPERM as result when trying to register 
> files
> + or directories within debugfs.
> + This is equilivant of the runtime functionality 
> if

equivalent

> + debugfs was not enabled in the kernel at all.
> + Default value is set in build-time with a kernel 
> configuration.
> +
>   debugpat[X86] Enable PAT debugging
>  
>   decnet.addr=[HW,NET]
> diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
> index b7f2e971ecbc..02d08b17d0e6 100644
> --- a/fs/debugfs/inode.c
> +++ b/fs/debugfs/inode.c

> @@ -786,10 +808,25 @@ bool debugfs_initialized(void)
>  }
>  EXPORT_SYMBOL_GPL(debugfs_initialized);
>  

I would add some "else"s here:

> +static int __init debugfs_kernel(char *str)
> +{
> + if (str && !strcmp(str, "on"))
> + debugfs_allow = DEBUGFS_ALLOW_API | DEBUGFS_ALLOW_MOUNT;
else if ...

> + if (str && !strcmp(str, "no-mount"))
> + debugfs_allow = DEBUGFS_ALLOW_API;
else if ...

> + if (str && !strcmp(str, "off"))
> + debugfs_allow = 0;
> +
> + return 0;
> +}
> +early_param("debugfs", debugfs_kernel);

> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 9ad9210d70a1..ebe670fdf1bd 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -476,6 +476,38 @@ config DEBUG_FS
>  
> If unsure, say N.
>  
> +choice
> + prompt "Debugfs default access"
> + depends on DEBUG_FS
> + default DEBUG_FS_ALLOW_ALL
> + help
> +   This selects the default access restrictions for debugfs.
> +   It can be overridden with kernel command line option
> +   debugfs=[on,no-mount,off]. The restrictions apply for API access
> +   and filesystem registration. .

stray '.' there.

> +
> +config DEBUG_FS_ALLOW_ALL
> + bool "Access normal"
> + help
> +   No restrictions apply. Both API and filesystem registration
> +   is on. This is the normal default operation.
> +
> +config DEBUG_FS_DISALLOW_MOUNT
> + bool "Do not register debugfs as filesystem"
> + help
> +   The API is open but filesystem not loaded. Client can still do

  but filesystem is not loaded. Clients can still do

> +   their work and read with debug tools that do not need
> +   debugfs filesystem.
> +
> +config DEBUG_FS_ALLOW_NONE
> + bool "No access"
> + help
> +   Access is off. Clients get -PERM when trying to create nodes in
> +   debugfs tree and debugfs is not

[PATCH v3 5/7] perf: Add perf_event_mmap_page::cap_user_time_short ABI

2020-07-15 Thread Leo Yan

From: Peter Zijlstra 

In order to support short clock counters, provide an ABI extension.

As a whole:

u64 time, delta, cyc = read_cycle_counter();

+   if (cap_user_time_short)
+   cyc = time_cycle + ((cyc - time_cycle) & time_mask);

delta = mul_u64_u32_shr(cyc, time_mult, time_shift);

if (cap_user_time_zero)
time = time_zero + delta;

delta += time_offset;

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Leo Yan 
---
 include/uapi/linux/perf_event.h | 23 ---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 7b2d6fc9e6ed..21a1edd08cbe 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -532,9 +532,10 @@ struct perf_event_mmap_page {
cap_bit0_is_deprecated  : 1, /* Always 1, 
signals that bit 0 is zero */
 
cap_user_rdpmc  : 1, /* The RDPMC 
instruction can be used to read counts */
-   cap_user_time   : 1, /* The time_* 
fields are used */
+   cap_user_time   : 1, /* The 
time_{shift,mult,offset} fields are used */
cap_user_time_zero  : 1, /* The time_zero 
field is used */
-   cap_res : 59;
+   cap_user_time_short : 1, /* the 
time_{cycle,mask} fields are used */
+   cap_res : 58;
};
};
 
@@ -593,13 +594,29 @@ struct perf_event_mmap_page {
 *   ((rem * time_mult) >> time_shift);
 */
__u64   time_zero;
+
__u32   size;   /* Header size up to __reserved[] 
fields. */
+   __u32   __reserved_1;
+
+   /*
+* If cap_usr_time_short, the hardware clock is less than 64bit wide
+* and we must compute the 'cyc' value, as used by cap_usr_time, as:
+*
+*   cyc = time_cycles + ((cyc - time_cycles) & time_mask)
+*
+* NOTE: this form is explicitly chosen such that cap_usr_time_short
+*   is a correction on top of cap_usr_time, and code that doesn't
+*   know about cap_usr_time_short still works under the assumption
+*   the counter doesn't wrap.
+*/
+   __u64   time_cycles;
+   __u64   time_mask;
 
/*
 * Hole for extension of the self monitor capabilities
 */
 
-   __u8__reserved[118*8+4];/* align to 1k. */
+   __u8__reserved[116*8];  /* align to 1k. */
 
/*
 * Control data for the mmap() data buffer.
-- 
2.17.1

[PATCH v3 4/7] arm64: perf: Only advertise cap_user_time for arch_timer

2020-07-15 Thread Leo Yan

From: Peter Zijlstra 

When sched_clock is running on anything other than arch_timer, don't
advertise cap_user_time*.

Requested-by: Will Deacon 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Leo Yan 
---
 arch/arm64/kernel/perf_event.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 47db6c7cae6a..c016b116ae33 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -13,6 +13,8 @@
 #include 
 #include 
 
+#include 
+
 #include 
 #include 
 #include 
@@ -1170,16 +1172,15 @@ void arch_perf_update_userpage(struct perf_event *event,
unsigned int seq;
u64 ns;
 
-   /*
-* Internal timekeeping for enabled/running/stopped times
-* is always computed with the sched_clock.
-*/
-   userpg->cap_user_time = 1;
-   userpg->cap_user_time_zero = 1;
+   userpg->cap_user_time = 0;
+   userpg->cap_user_time_zero = 0;
 
do {
rd = sched_clock_read_begin();
 
+   if (rd->read_sched_clock != arch_timer_read_counter)
+   return;
+
userpg->time_mult = rd->mult;
userpg->time_shift = rd->shift;
userpg->time_zero = rd->epoch_ns;
@@ -1209,4 +1210,10 @@ void arch_perf_update_userpage(struct perf_event *event,
userpg->time_mult >>= 1;
}
 
+   /*
+* Internal timekeeping for enabled/running/stopped times
+* is always computed with the sched_clock.
+*/
+   userpg->cap_user_time = 1;
+   userpg->cap_user_time_zero = 1;
 }
-- 
2.17.1

[PATCH v3 1/7] sched_clock: Expose struct clock_read_data

2020-07-15 Thread Leo Yan

From: Peter Zijlstra 

In order to support perf_event_mmap_page::cap_time features, an
architecture needs, aside from a userspace readable counter register,
to expose the exact clock data so that userspace can convert the
counter register into a correct timestamp.

Provide struct clock_read_data and two (seqcount) helpers so that
architectures (arm64 in specific) can expose the numbers to userspace.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Leo Yan 
---
 include/linux/sched_clock.h | 28 +
 kernel/time/sched_clock.c   | 41 -
 2 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index 0bb04a96a6d4..528718e4ed52 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -6,6 +6,34 @@
 #define LINUX_SCHED_CLOCK
 
 #ifdef CONFIG_GENERIC_SCHED_CLOCK
+/**
+ * struct clock_read_data - data required to read from sched_clock()
+ *
+ * @epoch_ns:  sched_clock() value at last update
+ * @epoch_cyc: Clock cycle value at last update.
+ * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
+ * clocks.
+ * @read_sched_clock:  Current clock source (or dummy source when suspended).
+ * @mult:  Multipler for scaled math conversion.
+ * @shift: Shift value for scaled math conversion.
+ *
+ * Care must be taken when updating this structure; it is read by
+ * some very hot code paths. It occupies <=40 bytes and, when combined
+ * with the seqcount used to synchronize access, comfortably fits into
+ * a 64 byte cache line.
+ */
+struct clock_read_data {
+   u64 epoch_ns;
+   u64 epoch_cyc;
+   u64 sched_clock_mask;
+   u64 (*read_sched_clock)(void);
+   u32 mult;
+   u32 shift;
+};
+
+extern struct clock_read_data *sched_clock_read_begin(unsigned int *seq);
+extern int sched_clock_read_retry(unsigned int seq);
+
 extern void generic_sched_clock_init(void);
 
 extern void sched_clock_register(u64 (*read)(void), int bits,
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index fa3f800d7d76..0acaadc3156c 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -19,31 +19,6 @@
 
 #include "timekeeping.h"
 
-/**
- * struct clock_read_data - data required to read from sched_clock()
- *
- * @epoch_ns:  sched_clock() value at last update
- * @epoch_cyc: Clock cycle value at last update.
- * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
- * clocks.
- * @read_sched_clock:  Current clock source (or dummy source when suspended).
- * @mult:  Multipler for scaled math conversion.
- * @shift: Shift value for scaled math conversion.
- *
- * Care must be taken when updating this structure; it is read by
- * some very hot code paths. It occupies <=40 bytes and, when combined
- * with the seqcount used to synchronize access, comfortably fits into
- * a 64 byte cache line.
- */
-struct clock_read_data {
-   u64 epoch_ns;
-   u64 epoch_cyc;
-   u64 sched_clock_mask;
-   u64 (*read_sched_clock)(void);
-   u32 mult;
-   u32 shift;
-};
-
 /**
  * struct clock_data - all data needed for sched_clock() (including
  * registration of a new clock source)
@@ -93,6 +68,17 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 
shift)
return (cyc * mult) >> shift;
 }
 
+struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
+{
+   *seq = raw_read_seqcount();
+   return cd.read_data + (*seq & 1);
+}
+
+int sched_clock_read_retry(unsigned int seq)
+{
+   return read_seqcount_retry(, seq);
+}
+
 unsigned long long notrace sched_clock(void)
 {
u64 cyc, res;
@@ -100,13 +86,12 @@ unsigned long long notrace sched_clock(void)
struct clock_read_data *rd;
 
do {
-   seq = raw_read_seqcount();
-   rd = cd.read_data + (seq & 1);
+   rd = sched_clock_read_begin();
 
cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
  rd->sched_clock_mask;
res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
-   } while (read_seqcount_retry(, seq));
+   } while (sched_clock_read_retry(seq));
 
return res;
 }
-- 
2.17.1

[PATCH v3 2/7] time/sched_clock: Use raw_read_seqcount_latch()

2020-07-15 Thread Leo Yan

From: "Ahmed S. Darwish" 

sched_clock uses seqcount_t latching to switch between two storage
places protected by the sequence counter. This allows it to have
interruptible, NMI-safe, seqcount_t write side critical sections.

Since 7fc26327b756 ("seqlock: Introduce raw_read_seqcount_latch()"),
raw_read_seqcount_latch() became the standardized way for seqcount_t
latch read paths. Due to the dependent load, it also has one read
memory barrier less than the currently used raw_read_seqcount() API.

Use raw_read_seqcount_latch() for the seqcount_t latch read path.

Link: 
https://lkml.kernel.org/r/20200625085745.gd117...@hirez.programming.kicks-ass.net
Link: 
https://lkml.kernel.org/r/20200715092345.ga231...@debian-buster-darwi.lab.linutronix.de
References: 1809bfa44e10 ("timers, sched/clock: Avoid deadlock during read from 
NMI")
Signed-off-by: Ahmed S. Darwish 
Signed-off-by: Leo Yan 
---
 kernel/time/sched_clock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 0acaadc3156c..0deaf4b79fb4 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -70,7 +70,7 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 
shift)
 
 struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
 {
-   *seq = raw_read_seqcount();
+   *seq = raw_read_seqcount_latch();
return cd.read_data + (*seq & 1);
 }
 
-- 
2.17.1

[PATCH v3 3/7] arm64: perf: Implement correct cap_user_time

2020-07-15 Thread Leo Yan

From: Peter Zijlstra 

As reported by Leo; the existing implementation is broken when the
clock and counter don't intersect at 0.

Use the sched_clock's struct clock_read_data information to correctly
implement cap_user_time and cap_user_time_zero.

Note that the ARM64 counter is architecturally only guaranteed to be
56bit wide (implementations are allowed to be wider) and the existing
perf ABI cannot deal with wrap-around.

This implementation should also be faster than the old; seeing how we
don't need to recompute mult and shift all the time.

[leoyan: Use mul_u64_u32_shr() to convert cyc to ns to avoid overflow]

Reported-by: Leo Yan 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Leo Yan 
---
 arch/arm64/kernel/perf_event.c | 38 ++
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 4d7879484cec..47db6c7cae6a 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /* ARMv8 Cortex-A53 specific event types. */
@@ -1165,28 +1166,47 @@ device_initcall(armv8_pmu_driver_init)
 void arch_perf_update_userpage(struct perf_event *event,
   struct perf_event_mmap_page *userpg, u64 now)
 {
-   u32 freq;
-   u32 shift;
+   struct clock_read_data *rd;
+   unsigned int seq;
+   u64 ns;
 
/*
 * Internal timekeeping for enabled/running/stopped times
 * is always computed with the sched_clock.
 */
-   freq = arch_timer_get_rate();
userpg->cap_user_time = 1;
+   userpg->cap_user_time_zero = 1;
+
+   do {
+   rd = sched_clock_read_begin();
+
+   userpg->time_mult = rd->mult;
+   userpg->time_shift = rd->shift;
+   userpg->time_zero = rd->epoch_ns;
+
+   /*
+* This isn't strictly correct, the ARM64 counter can be
+* 'short' and then we get funnies when it wraps. The correct
+* thing would be to extend the perf ABI with a cycle and mask
+* value, but because wrapping on ARM64 is very rare in
+* practise this 'works'.
+*/
+   ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+   userpg->time_zero -= ns;
+
+   } while (sched_clock_read_retry(seq));
+
+   userpg->time_offset = userpg->time_zero - now;
 
-   clocks_calc_mult_shift(>time_mult, , freq,
-   NSEC_PER_SEC, 0);
/*
 * time_shift is not expected to be greater than 31 due to
 * the original published conversion algorithm shifting a
 * 32-bit value (now specifies a 64-bit value) - refer
 * perf_event_mmap_page documentation in perf_event.h.
 */
-   if (shift == 32) {
-   shift = 31;
+   if (userpg->time_shift == 32) {
+   userpg->time_shift = 31;
userpg->time_mult >>= 1;
}
-   userpg->time_shift = (u16)shift;
-   userpg->time_offset = -now;
+
 }
-- 
2.17.1

[PATCH v2] ASoC: soc-component: Add missed return for calling soc_component_ret

2020-07-15 Thread Shengjiu Wang

Add missed return for calling soc_component_ret, otherwise the return
value is wrong.

Fixes: e2329eeba45f ("ASoC: soc-component: add soc_component_err()")
Signed-off-by: Shengjiu Wang 
---
changes in v2
- add missed return in snd_soc_pcm_component_sync_stop and
- snd_soc_pcm_component_new

 sound/soc/soc-component.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index af9909c5492f..9565a0dd7cb6 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -647,7 +647,7 @@ int snd_soc_pcm_component_sync_stop(struct 
snd_pcm_substream *substream)
ret = component->driver->sync_stop(component,
   substream);
if (ret < 0)
-   soc_component_ret(component, ret);
+   return soc_component_ret(component, ret);
}
}
 
@@ -705,7 +705,7 @@ int snd_soc_pcm_component_mmap(struct snd_pcm_substream 
*substream,
/* FIXME. it returns 1st mmap now */
for_each_rtd_components(rtd, i, component)
if (component->driver->mmap)
-   soc_component_ret(
+   return soc_component_ret(
component,
component->driver->mmap(component,
substream, vma));
@@ -723,7 +723,7 @@ int snd_soc_pcm_component_new(struct snd_soc_pcm_runtime 
*rtd)
if (component->driver->pcm_construct) {
ret = component->driver->pcm_construct(component, rtd);
if (ret < 0)
-   soc_component_ret(component, ret);
+   return soc_component_ret(component, ret);
}
}
 
-- 
2.27.0

[PATCH trivial] spi: Fix SPI NOR and SPI NAND acronyms

2020-07-15 Thread Tudor Ambarus

The industry refers to these flash types as "SPI NOR" and
"SPI NAND". Be consistent and use the same acronyms.

Signed-off-by: Tudor Ambarus 
---
 drivers/spi/Kconfig| 10 +-
 drivers/spi/spi-fsl-qspi.c |  2 +-
 drivers/spi/spi-orion.c|  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index b89d03a36cbd..c11c6c5cb442 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -169,7 +169,7 @@ config SPI_BCM_QSPI
help
  Enables support for the Broadcom SPI flash and MSPI controller.
  Select this option for any one of BRCMSTB, iProc NSP and NS2 SoCs
- based platforms. This driver works for both SPI master for spi-nor
+ based platforms. This driver works for both SPI master for SPI NOR
  flash device as well as MSPI device.
 
 config SPI_BITBANG
@@ -311,11 +311,11 @@ config SPI_FSL_QUADSPI
  supports the high-level SPI memory interface.
 
 config SPI_HISI_SFC_V3XX
-   tristate "HiSilicon SPI-NOR Flash Controller for Hi16XX chipsets"
+   tristate "HiSilicon SPI NOR Flash Controller for Hi16XX chipsets"
depends on (ARM64 && ACPI) || COMPILE_TEST
depends on HAS_IOMEM
help
- This enables support for HiSilicon v3xx SPI-NOR flash controller
+ This enables support for HiSilicon v3xx SPI NOR flash controller
  found in hi16xx chipsets.
 
 config SPI_NXP_FLEXSPI
@@ -477,9 +477,9 @@ config SPI_MTK_NOR
depends on ARCH_MEDIATEK || COMPILE_TEST
help
  This enables support for SPI NOR controller found on MediaTek
- ARM SoCs. This is a controller specifically for SPI-NOR flash.
+ ARM SoCs. This is a controller specifically for SPI NOR flash.
  It can perform generic SPI transfers up to 6 bytes via generic
- SPI interface as well as several SPI-NOR specific instructions
+ SPI interface as well as several SPI NOR specific instructions
  via SPI MEM interface.
 
 config SPI_NPCM_FIU
diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c
index 6766262d7e75..9851551ebbe0 100644
--- a/drivers/spi/spi-fsl-qspi.c
+++ b/drivers/spi/spi-fsl-qspi.c
@@ -15,7 +15,7 @@
  * Yogesh Gaur 
  * Suresh Gupta 
  *
- * Based on the original fsl-quadspi.c spi-nor driver:
+ * Based on the original fsl-quadspi.c SPI NOR driver:
  * Author: Freescale Semiconductor, Inc.
  *
  */
diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c
index 43f73db22f21..b57b8b3cc26e 100644
--- a/drivers/spi/spi-orion.c
+++ b/drivers/spi/spi-orion.c
@@ -708,7 +708,7 @@ static int orion_spi_probe(struct platform_device *pdev)
/*
 * Only map one page for direct access. This is enough for the
 * simple TX transfer which only writes to the first word.
-* This needs to get extended for the direct SPI-NOR / SPI-NAND
+* This needs to get extended for the direct SPI NOR / SPI NAND
 * support, once this gets implemented.
 */
dir_acc = >child[cs].direct_access;
-- 
2.25.1

Re: [PATCH v12 2/2] phy: samsung-ufs: add UFS PHY driver for samsung SoC

2020-07-15 Thread Vinod Koul

Hi Alim,

On 16-07-20, 06:47, Alim Akhtar wrote:

> > > +static int samsung_ufs_phy_symbol_clk_init(struct samsung_ufs_phy
> > > +*phy) {
> > > + int ret = 0;
> > 
> > superfluous init, am sure I flagged it before as well
> > 
> Yes, you did, but 0-DAY CI kernel test gave warning [1], so I kept this as
> it is.
> [1] https://lkml.org/lkml/2020/7/3/81

But you moved away from return below to goto, so that is no longer
the case. First use of ret is for clk_prepare_enable() call below which
is not conditional hence this is superfluous. Earlier it was not as it was
conditional so required to be initialized

> 
> > > +
> > > + phy->tx0_symbol_clk = devm_clk_get(phy->dev, "tx0_symbol_clk");
> > > + if (IS_ERR(phy->tx0_symbol_clk)) {
> > > + dev_err(phy->dev, "failed to get tx0_symbol_clk clock\n");
> > > + goto out;
> > > + }
> > > +
> > > + phy->rx0_symbol_clk = devm_clk_get(phy->dev, "rx0_symbol_clk");
> > > + if (IS_ERR(phy->rx0_symbol_clk)) {
> > > + dev_err(phy->dev, "failed to get rx0_symbol_clk clock\n");
> > > + goto out;
> > > + }
> > > +
> > > + phy->rx1_symbol_clk = devm_clk_get(phy->dev, "rx1_symbol_clk");
> > > + if (IS_ERR(phy->rx0_symbol_clk)) {
> > > + dev_err(phy->dev, "failed to get rx1_symbol_clk clock\n");
> > > + goto out;
> > > + }
> > > +
> > > + ret = clk_prepare_enable(phy->tx0_symbol_clk);
> > > + if (ret) {
> > > + dev_err(phy->dev, "%s: tx0_symbol_clk enable failed %d\n",
> > __func__, ret);
> > > + goto out;
> > > + }
-- 
~Vinod

[PATCH v3 0/7] arm64: perf: Proper cap_user_time* support

2020-07-15 Thread Leo Yan

This patch set is rebased for Peter's patch set to support
cap_user_time/cap_user_time_short ABI for Arm64, and export Arm arch
timer counter related parameters from kernel to Perf tool.

After get feedback from Ahmed, this patch set contains Ahmed's new patch
to refine sched clock data accessing with raw_read_seqcount_latch().

This patch set has been rebased on mainline kernel with the latest
commit 994e99a96c9b ("Merge tag 'platform-drivers-x86-v5.8-2' of
git://git.infradead.org/linux-platform-drivers-x86 into master"); it
has been verified with Perf tool for Arm SPE timestamp enabling.


Changes from v2:
- Included Ahmed's patch to use raw_read_seqcount_latch() for
  sched_clock's seqcount latching;
- Changed to use mul_u64_u32_shr() for converting counter to ns
  in Arm64's arch_perf_update_userpage() (PeterZ).


Ahmed S. Darwish (1):
  time/sched_clock: Use raw_read_seqcount_latch()

Leo Yan (1):
  tools headers UAPI: Update tools's copy of linux/perf_event.h

Peter Zijlstra (5):
  sched_clock: Expose struct clock_read_data
  arm64: perf: Implement correct cap_user_time
  arm64: perf: Only advertise cap_user_time for arch_timer
  perf: Add perf_event_mmap_page::cap_user_time_short ABI
  arm64: perf: Add cap_user_time_short

 arch/arm64/kernel/perf_event.c| 57 ---
 include/linux/sched_clock.h   | 28 +
 include/uapi/linux/perf_event.h   | 23 +--
 kernel/time/sched_clock.c | 41 ++-
 tools/include/uapi/linux/perf_event.h | 23 +--
 5 files changed, 124 insertions(+), 48 deletions(-)

-- 
2.17.1

RE: [PATCH v2] powercap: Add Power Limit4 support

2020-07-15 Thread Pawnikar, Sumeet R


> -Original Message-
> From: Srinivas Pandruvada 
> Sent: Wednesday, July 15, 2020 5:44 AM
> To: Rafael J. Wysocki ; Pawnikar, Sumeet R
> 
> Cc: Rafael J. Wysocki ; Zhang, Rui
> ; Linux PM ; Linux Kernel
> Mailing List ; Shevchenko, Andriy
> 
> Subject: Re: [PATCH v2] powercap: Add Power Limit4 support
> 
> On Tue, 2020-07-14 at 15:21 +0200, Rafael J. Wysocki wrote:
> > >
> 
> [...]
> 
> > On Tue, Jul 14, 2020 at 10:22 AM Sumeet Pawnikar
> >  wrote:
> >
> > Srinivas, does the patch look good to you?
> 
> Some minor comments then Summet can add my
> 
> Reviewed-and-tested-by: Srinivas Pandruvada <
> srinivas.pandruv...@linux.intel.com>
> 

Thanks Srinivas for testing this. 
I will address your below comment
and submit v3 with above tag. 

> > > ---
> > > Changes in v2:
> > >  - Addressed review comments from Rafael.
> > >  - Made the commit message more clearer.
> > >  - Updated powercap documentation.
> > > ---
> > >
> 
> [...]
> 
> > > 0,1).
> > > +Depending on different power zones, the Intel RAPL technology
> > > allows
> > > +one or multiple constraints like short term, long term and peak
> > > power,
> > > +with different time windows to be applied to each power zone.
> I think better to spell out that time window is not applicable to "peak 
> power".
> Otherwise someone will send a bug report.
> 
> 
> [...]
> 
> > >  static int rapl_msr_probe(struct platform_device *pdev)  {
> > > int ret;
> > > +   const struct x86_cpu_id *id =
> > > x86_match_cpu(pl4_support_ids);
> 
> To match coding style in this file:
>   const struct x86_cpu_id *id = x86_match_cpu(pl4_support_ids);
> int ret;
> 

Thanks for review.  

Regards,
Sumeet.

> > > rapl_msr_priv.read_raw = rapl_msr_read_raw;
> 
> Thanks,
> Srinivas

[PATCH v1] driver core: Fix scheduling while atomic warnings during device link deletion

2020-07-15 Thread Saravana Kannan

Marek and Guenter reported that commit 287905e68dd2 ("driver core:
Expose device link details in sysfs") caused sleeping/scheduling while
atomic warnings.

BUG: sleeping function called from invalid context at kernel/locking/mutex.c:935
in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 12, name: kworker/0:1
2 locks held by kworker/0:1/12:
  #0: ee8074a8 ((wq_completion)rcu_gp){+.+.}-{0:0}, at: 
process_one_work+0x174/0x7dc
  #1: ee921f20 ((work_completion)(>work)){+.+.}-{0:0}, at: 
process_one_work+0x174/0x7dc
Preemption disabled at:
[] srcu_invoke_callbacks+0xc0/0x154
- 8< - SNIP
[] (device_del) from [] (device_unregister+0x24/0x64)
[] (device_unregister) from [] 
(srcu_invoke_callbacks+0xcc/0x154)
[] (srcu_invoke_callbacks) from [] 
(process_one_work+0x234/0x7dc)
[] (process_one_work) from [] (worker_thread+0x44/0x51c)
[] (worker_thread) from [] (kthread+0x158/0x1a0)
[] (kthread) from [] (ret_from_fork+0x14/0x20)
Exception stack(0xee921fb0 to 0xee921ff8)

This was caused by the device link device being released in the context
of srcu_invoke_callbacks().  There is no need to wait till the RCU
callback to release the device link device.  So release the device
earlier and revert the RCU callback code to what it was before
commit 287905e68dd2 ("driver core: Expose device link details in sysfs")

Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs")
Reported-by: Marek Szyprowski 
Reported-by: Guenter Roeck 
Signed-off-by: Saravana Kannan 
---
Marek and Guenter,

It haven't had a chance to test this yet. Can one of you please test it
and confirm it fixes the issue?

Thanks,
Saravana

 drivers/base/core.c | 10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 5373ddd029f6..ccb2ce11f5b5 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -306,16 +306,10 @@ static struct attribute *devlink_attrs[] = {
 };
 ATTRIBUTE_GROUPS(devlink);
 
-static void devlink_dev_release(struct device *dev)
-{
-   kfree(to_devlink(dev));
-}
-
 static struct class devlink_class = {
.name = "devlink",
.owner = THIS_MODULE,
.dev_groups = devlink_groups,
-   .dev_release = devlink_dev_release,
 };
 
 static int devlink_add_symlinks(struct device *dev,
@@ -737,7 +731,7 @@ static void device_link_free(struct device_link *link)
 
put_device(link->consumer);
put_device(link->supplier);
-   device_unregister(>link_dev);
+   kfree(link);
 }
 
 #ifdef CONFIG_SRCU
@@ -756,6 +750,7 @@ static void __device_link_del(struct kref *kref)
if (link->flags & DL_FLAG_PM_RUNTIME)
pm_runtime_drop_link(link->consumer);
 
+   device_unregister(>link_dev);
list_del_rcu(>s_node);
list_del_rcu(>c_node);
call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu);
@@ -771,6 +766,7 @@ static void __device_link_del(struct kref *kref)
if (link->flags & DL_FLAG_PM_RUNTIME)
pm_runtime_drop_link(link->consumer);
 
+   device_unregister(>link_dev);
list_del(>s_node);
list_del(>c_node);
device_link_free(link);
-- 
2.28.0.rc0.105.gf9edc3c819-goog

Re: [External] Re: [PATCH v2] mm: memcg/slab: fix memory leak at non-root kmem_cache destroy

2020-07-15 Thread Muchun Song

On Thu, Jul 16, 2020 at 1:54 AM Roman Gushchin  wrote:
>
> On Thu, Jul 16, 2020 at 12:50:22AM +0800, Muchun Song wrote:
> > If the kmem_cache refcount is greater than one, we should not
> > mark the root kmem_cache as dying. If we mark the root kmem_cache
> > dying incorrectly, the non-root kmem_cache can never be destroyed.
> > It resulted in memory leak when memcg was destroyed. We can use the
> > following steps to reproduce.
> >
> >   1) Use kmem_cache_create() to create a new kmem_cache named A.
> >   2) Coincidentally, the kmem_cache A is an alias for kmem_cache B,
> >  so the refcount of B is just increased.
> >   3) Use kmem_cache_destroy() to destroy the kmem_cache A, just
> >  decrease the B's refcount but mark the B as dying.
> >   4) Create a new memory cgroup and alloc memory from the kmem_cache
> >  B. It leads to create a non-root kmem_cache for allocating memory.
> >   5) When destroy the memory cgroup created in the step 4), the
> >  non-root kmem_cache can never be destroyed.
> >
> > If we repeat steps 4) and 5), this will cause a lot of memory leak.
> > So only when refcount reach zero, we mark the root kmem_cache as dying.
> >
> > Fixes: 92ee383f6daa ("mm: fix race between kmem_cache destroy, create and 
> > deactivate")
> > Signed-off-by: Muchun Song 
> > Reviewed-by: Shakeel Butt 
> > ---
> >
> > changelog in v2:
> >  1) Fix a confusing typo in the commit log.
>
> Ok, now I see the problem. Thank you for fixing the commit log!
>
> >  2) Remove flush_memcg_workqueue() for !CONFIG_MEMCG_KMEM.
> >  3) Introduce a new helper memcg_set_kmem_cache_dying() to fix a race
> > condition between flush_memcg_workqueue() and slab_unmergeable().
> >
> >  mm/slab_common.c | 54 
> > +++---
> >  1 file changed, 47 insertions(+), 7 deletions(-)
> >
> > diff --git a/mm/slab_common.c b/mm/slab_common.c
> > index 8c1ffbf7de45..c4958116e3fd 100644
> > --- a/mm/slab_common.c
> > +++ b/mm/slab_common.c
> > @@ -258,6 +258,11 @@ static void memcg_unlink_cache(struct kmem_cache *s)
> >   list_del(>memcg_params.kmem_caches_node);
> >   }
> >  }
> > +
> > +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> > +{
> > + return is_root_cache(s) && s->memcg_params.dying;
> > +}
> >  #else
> >  static inline int init_memcg_params(struct kmem_cache *s,
> >   struct kmem_cache *root_cache)
> > @@ -272,6 +277,11 @@ static inline void destroy_memcg_params(struct 
> > kmem_cache *s)
> >  static inline void memcg_unlink_cache(struct kmem_cache *s)
> >  {
> >  }
> > +
> > +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> > +{
> > + return false;
> > +}
> >  #endif /* CONFIG_MEMCG_KMEM */
> >
> >  /*
> > @@ -326,6 +336,13 @@ int slab_unmergeable(struct kmem_cache *s)
> >   if (s->refcount < 0)
> >   return 1;
> >
> > + /*
> > +  * If the kmem_cache is dying. We should also skip this
> > +  * kmem_cache.
> > +  */
> > + if (memcg_kmem_cache_dying(s))
> > + return 1;
> > +
> >   return 0;
> >  }
> >
> > @@ -886,12 +903,15 @@ static int shutdown_memcg_caches(struct kmem_cache *s)
> >   return 0;
> >  }
> >
> > -static void flush_memcg_workqueue(struct kmem_cache *s)
> > +static void memcg_set_kmem_cache_dying(struct kmem_cache *s)
> >  {
> >   spin_lock_irq(_kmem_wq_lock);
> >   s->memcg_params.dying = true;
> >   spin_unlock_irq(_kmem_wq_lock);
> > +}
> >
> > +static void flush_memcg_workqueue(struct kmem_cache *s)
> > +{
> >   /*
> >* SLAB and SLUB deactivate the kmem_caches through call_rcu. Make
> >* sure all registered rcu callbacks have been invoked.
> > @@ -923,10 +943,6 @@ static inline int shutdown_memcg_caches(struct 
> > kmem_cache *s)
> >  {
> >   return 0;
> >  }
> > -
> > -static inline void flush_memcg_workqueue(struct kmem_cache *s)
> > -{
> > -}
> >  #endif /* CONFIG_MEMCG_KMEM */
> >
> >  void slab_kmem_cache_release(struct kmem_cache *s)
> > @@ -944,8 +960,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
> >   if (unlikely(!s))
> >   return;
> >
> > - flush_memcg_workqueue(s);
> > -
> >   get_online_cpus();
> >   get_online_mems();
> >
> > @@ -955,6 +969,32 @@ void kmem_cache_destroy(struct kmem_cache *s)
> >   if (s->refcount)
> >   goto out_unlock;
> >
> > +#ifdef CONFIG_MEMCG_KMEM
> > + memcg_set_kmem_cache_dying(s);
> > +
> > + mutex_unlock(_mutex);
>
> Hm, but in theory s->refcount can be increased here?

I have tried my best to read all the codes that operate on s->refcount.
There is only one place which increases the s->refcount, it is the
__kmem_cache_alias(). If the kmem cache is dying, the slab_unmergeable()
can never return true for the dying kmem cache because it is the same slab_mutex
protection, so I think that there is not a problem, right?

> So it doesn't solve the problem completely, but makes it

RE: [PATCH v2] powercap: Add Power Limit4 support

2020-07-15 Thread Pawnikar, Sumeet R


> -Original Message-
> From: linux-pm-ow...@vger.kernel.org 
> On Behalf Of Rafael J. Wysocki
> Sent: Tuesday, July 14, 2020 6:51 PM
> To: Pawnikar, Sumeet R ; Srinivas
> Pandruvada 
> Cc: Rafael J. Wysocki ; Zhang, Rui
> ; Linux PM ; Linux Kernel
> Mailing List ; Shevchenko, Andriy
> 
> Subject: Re: [PATCH v2] powercap: Add Power Limit4 support
> 
> On Tue, Jul 14, 2020 at 10:22 AM Sumeet Pawnikar
>  wrote:
> >
> > Modern Intel Mobile platforms support power limit4 (PL4), which is the
> > SoC package level maximum power limit (in Watts). It can be used to
> > preemptively limits potential SoC power to prevent power spikes from
> > tripping the power adapter and battery over-current protection.
> > This patch enables this feature by exposing package level peak power
> > capping control to userspace via RAPL sysfs interface. With this,
> > application like DTPF can modify PL4 power limit, the similar way of
> > other package power limit (PL1).
> > As this feature is not tested on previous generations, here it is
> > enabled only for the platform that has been verified to work, for
> > safety concerns.
> 
> Thanks for the better changelog!
> 
> > Signed-off-by: Sumeet Pawnikar 
> > Signed-off-by: Zhang Rui 
> 
> But why is the Rui's S-o-b under your patch?
> 
> Is it there because Rui has contributed to the patch?  If so, you should have
> used the Co-developed-by tag instead.
> 

Sure, I will update this accordingly. 

Thanks,
Sumeet.

> Srinivas, does the patch look good to you?
> 
> > ---
> > Changes in v2:
> >  - Addressed review comments from Rafael.
> >  - Made the commit message more clearer.
> >  - Updated powercap documentation.
> > ---
> >  Documentation/power/powercap/powercap.rst |   14 +---
> >  drivers/powercap/intel_rapl_common.c  |   54
> +++--
> >  drivers/powercap/intel_rapl_msr.c |   15 
> >  include/linux/intel_rapl.h|5 ++-
> >  4 files changed, 80 insertions(+), 8 deletions(-)
> >
> > diff --git a/Documentation/power/powercap/powercap.rst
> > b/Documentation/power/powercap/powercap.rst
> > index 7ae3b44c7624..b3af059b6d5d 100644
> > --- a/Documentation/power/powercap/powercap.rst
> > +++ b/Documentation/power/powercap/powercap.rst
> > @@ -167,11 +167,12 @@ For example::
> >  package-0
> >  -
> >
> > -The Intel RAPL technology allows two constraints, short term and long
> > term, -with two different time windows to be applied to each power
> > zone.  Thus for -each zone there are 2 attributes representing the
> > constraint names, 2 power -limits and 2 attributes representing the
> > sizes of the time windows. Such that,
> > -constraint_j_* attributes correspond to the jth constraint (j = 0,1).
> > +Depending on different power zones, the Intel RAPL technology allows
> > +one or multiple constraints like short term, long term and peak
> > +power, with different time windows to be applied to each power zone.
> > +All the zones contain attributes representing the constraint names,
> > +power limits and the sizes of the time windows. Such that,
> > +constraint_j_* attributes correspond to the jth constraint (j = 0,1,2).
> >
> >  For example::
> >
> > @@ -181,6 +182,9 @@ For example::
> > constraint_1_name
> > constraint_1_power_limit_uw
> > constraint_1_time_window_us
> > +   constraint_2_name
> > +   constraint_2_power_limit_uw
> > +   constraint_2_time_window_us
> >
> >  Power Zone Attributes
> >  =
> > diff --git a/drivers/powercap/intel_rapl_common.c
> > b/drivers/powercap/intel_rapl_common.c
> > index 61a63a16b5e7..a8bcc58d61f0 100644
> > --- a/drivers/powercap/intel_rapl_common.c
> > +++ b/drivers/powercap/intel_rapl_common.c
> > @@ -39,6 +39,8 @@
> >  #define POWER_HIGH_LOCK BIT_ULL(63)
> >  #define POWER_LOW_LOCK  BIT(31)
> >
> > +#define POWER_LIMIT4_MASK  0x1FFF
> > +
> >  #define TIME_WINDOW1_MASK   (0x7FULL<<17)
> >  #define TIME_WINDOW2_MASK   (0x7FULL<<49)
> >
> > @@ -82,6 +84,7 @@ enum unit_type {
> >
> >  static const char pl1_name[] = "long_term";  static const char
> > pl2_name[] = "short_term";
> > +static const char pl4_name[] = "peak_power";
> >
> >  #define power_zone_to_rapl_domain(_zone) \
> > container_of(_zone, struct rapl_domain, power_zone) @@ -337,6
> > +340,9 @@ static int set_power_limit(struct powercap_zone *power_zone,
> int cid,
> > case PL2_ENABLE:
> > rapl_write_data_raw(rd, POWER_LIMIT2, power_limit);
> > break;
> > +   case PL4_ENABLE:
> > +   rapl_write_data_raw(rd, POWER_LIMIT4, power_limit);
> > +   break;
> > default:
> > ret = -EINVAL;
> > }
> > @@ -371,6 +377,9 @@ static int get_current_power_limit(struct
> powercap_zone *power_zone, int cid,
> > case PL2_ENABLE:
> > prim = POWER_LIMIT2;
> > break;
> > +   case PL4_ENABLE:
>

Re: [PATCH v7 2/5] remoteproc: qcom: Introduce helper to store pil info in IMEM

2020-07-15 Thread Nathan Chancellor

On Mon, Jun 22, 2020 at 12:19:39PM -0700, Bjorn Andersson wrote:
> A region in IMEM is used to communicate load addresses of remoteproc to
> post mortem debug tools. Implement a helper function that can be used to
> store this information in order to enable these tools to process
> collected ramdumps.
> 
> Reviewed-by: Mathieu Poirier 
> Reviewed-by: Vinod Koul 
> Signed-off-by: Bjorn Andersson 
> ---
> 
> Changes since v6:
> - Replaced entry struct and usage of offset_of with a comment and defined 
> offsets
> - Renamed pil_reloc_lock
> - Write out upper 32 bits of the address
> - Include header from implementation
> - Add linux/types.h to the header file
> 
>  drivers/remoteproc/Kconfig |   3 +
>  drivers/remoteproc/Makefile|   1 +
>  drivers/remoteproc/qcom_pil_info.c | 129 +
>  drivers/remoteproc/qcom_pil_info.h |   9 ++
>  4 files changed, 142 insertions(+)
>  create mode 100644 drivers/remoteproc/qcom_pil_info.c
>  create mode 100644 drivers/remoteproc/qcom_pil_info.h
> 
> diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
> index c4d1731295eb..f4bd96d1a1a3 100644
> --- a/drivers/remoteproc/Kconfig
> +++ b/drivers/remoteproc/Kconfig
> @@ -116,6 +116,9 @@ config KEYSTONE_REMOTEPROC
> It's safe to say N here if you're not interested in the Keystone
> DSPs or just want to use a bare minimum kernel.
>  
> +config QCOM_PIL_INFO
> + tristate
> +
>  config QCOM_RPROC_COMMON
>   tristate
>  
> diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
> index e8b886e511f0..fe398f82d550 100644
> --- a/drivers/remoteproc/Makefile
> +++ b/drivers/remoteproc/Makefile
> @@ -16,6 +16,7 @@ obj-$(CONFIG_OMAP_REMOTEPROC)   += 
> omap_remoteproc.o
>  obj-$(CONFIG_WKUP_M3_RPROC)  += wkup_m3_rproc.o
>  obj-$(CONFIG_DA8XX_REMOTEPROC)   += da8xx_remoteproc.o
>  obj-$(CONFIG_KEYSTONE_REMOTEPROC)+= keystone_remoteproc.o
> +obj-$(CONFIG_QCOM_PIL_INFO)  += qcom_pil_info.o
>  obj-$(CONFIG_QCOM_RPROC_COMMON)  += qcom_common.o
>  obj-$(CONFIG_QCOM_Q6V5_COMMON)   += qcom_q6v5.o
>  obj-$(CONFIG_QCOM_Q6V5_ADSP) += qcom_q6v5_adsp.o
> diff --git a/drivers/remoteproc/qcom_pil_info.c 
> b/drivers/remoteproc/qcom_pil_info.c
> new file mode 100644
> index ..0536e3904669
> --- /dev/null
> +++ b/drivers/remoteproc/qcom_pil_info.c
> @@ -0,0 +1,129 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2019-2020 Linaro Ltd.
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include "qcom_pil_info.h"
> +
> +/*
> + * The PIL relocation information region is used to communicate memory 
> regions
> + * occupied by co-processor firmware for post mortem crash analysis.
> + *
> + * It consists of an array of entries with an 8 byte textual identifier of 
> the
> + * region followed by a 64 bit base address and 32 bit size, both little
> + * endian.
> + */
> +#define PIL_RELOC_NAME_LEN   8
> +#define PIL_RELOC_ENTRY_SIZE (PIL_RELOC_NAME_LEN + sizeof(__le64) + 
> sizeof(__le32))
> +
> +struct pil_reloc {
> + void __iomem *base;
> + size_t num_entries;
> +};
> +
> +static struct pil_reloc _reloc __read_mostly;
> +static DEFINE_MUTEX(pil_reloc_lock);
> +
> +static int qcom_pil_info_init(void)
> +{
> + struct device_node *np;
> + struct resource imem;
> + void __iomem *base;
> + int ret;
> +
> + /* Already initialized? */
> + if (_reloc.base)
> + return 0;
> +
> + np = of_find_compatible_node(NULL, NULL, "qcom,pil-reloc-info");
> + if (!np)
> + return -ENOENT;
> +
> + ret = of_address_to_resource(np, 0, );
> + of_node_put(np);
> + if (ret < 0)
> + return ret;
> +
> + base = ioremap(imem.start, resource_size());
> + if (!base) {
> + pr_err("failed to map PIL relocation info region\n");
> + return -ENOMEM;
> + }
> +
> + memset_io(base, 0, resource_size());
> +
> + _reloc.base = base;
> + _reloc.num_entries = resource_size() / PIL_RELOC_ENTRY_SIZE;
> +
> + return 0;
> +}
> +
> +/**
> + * qcom_pil_info_store() - store PIL information of image in IMEM
> + * @image:   name of the image
> + * @base:base address of the loaded image
> + * @size:size of the loaded image
> + *
> + * Return: 0 on success, negative errno on failure
> + */
> +int qcom_pil_info_store(const char *image, phys_addr_t base, size_t size)
> +{
> + char buf[PIL_RELOC_NAME_LEN];
> + void __iomem *entry;
> + int ret;
> + int i;
> +
> + mutex_lock(_reloc_lock);
> + ret = qcom_pil_info_init();
> + if (ret < 0) {
> + mutex_unlock(_reloc_lock);
> + return ret;
> + }
> +
> + for (i = 0; i < _reloc.num_entries; i++) {
> + entry = _reloc.base + i * PIL_RELOC_ENTRY_SIZE;
> +
> + memcpy_fromio(buf, entry, PIL_RELOC_NAME_LEN);
> +
> + /*
> +

Re: [PATCH v4 7/9] bus: mhi: core: Introduce debugfs entries and counters for MHI

2020-07-15 Thread Manivannan Sadhasivam

On Thu, Jul 09, 2020 at 12:33:02PM -0700, bbh...@codeaurora.org wrote:
> On 2020-07-04 08:41, Manivannan Sadhasivam wrote:
> > On Mon, Jun 29, 2020 at 09:39:40AM -0700, Bhaumik Bhatt wrote:
> > > Introduce debugfs entries to show state, register, channel, and event
> > > ring information. Add MHI state counters to keep track of the state
> > > changes on the device. Also, allow the host to trigger a device reset,
> > > issue votes, and change the MHI timeout to help in debug.
> > > 
> > > Signed-off-by: Bhaumik Bhatt 
> > > ---
> > >  drivers/bus/mhi/Kconfig |   8 +
> > >  drivers/bus/mhi/core/Makefile   |   5 +-
> > >  drivers/bus/mhi/core/debugfs.c  | 444
> > > 
> > >  drivers/bus/mhi/core/init.c |   7 +
> > >  drivers/bus/mhi/core/internal.h |  24 +++
> > >  drivers/bus/mhi/core/pm.c   |   4 +
> > >  include/linux/mhi.h |   4 +
> > >  7 files changed, 493 insertions(+), 3 deletions(-)
> > >  create mode 100644 drivers/bus/mhi/core/debugfs.c
> > > 
> > > diff --git a/drivers/bus/mhi/Kconfig b/drivers/bus/mhi/Kconfig
> > > index a8bd9bd..6a217ff 100644
> > > --- a/drivers/bus/mhi/Kconfig
> > > +++ b/drivers/bus/mhi/Kconfig
> > > @@ -12,3 +12,11 @@ config MHI_BUS
> > >communication protocol used by the host processors to control
> > >and communicate with modem devices over a high speed peripheral
> > >bus or shared memory.
> > > +
> > > +config MHI_BUS_DEBUG
> > > + bool "Debugfs support for the MHI bus"
> > > + depends on MHI_BUS && DEBUG_FS
> > > + help
> > > +  Enable debugfs support for use with the MHI transport. Allows
> > > +  reading and/or modifying some values within the MHI controller
> > > +  for debug and test purposes.
> > > diff --git a/drivers/bus/mhi/core/Makefile
> > > b/drivers/bus/mhi/core/Makefile
> > > index 66e2700..460a548 100644
> > > --- a/drivers/bus/mhi/core/Makefile
> > > +++ b/drivers/bus/mhi/core/Makefile
> > > @@ -1,3 +1,2 @@
> > > -obj-$(CONFIG_MHI_BUS) := mhi.o
> > > -
> > > -mhi-y := init.o main.o pm.o boot.o
> > > +obj-$(CONFIG_MHI_BUS) := init.o main.o pm.o boot.o
> > > +obj-$(CONFIG_MHI_BUS_DEBUG) += debugfs.o
> > > diff --git a/drivers/bus/mhi/core/debugfs.c
> > > b/drivers/bus/mhi/core/debugfs.c
> > > new file mode 100644
> > > index 000..266cbf0
> > > --- /dev/null
> > > +++ b/drivers/bus/mhi/core/debugfs.c
> > > @@ -0,0 +1,444 @@
> > > +static int mhi_debugfs_device_vote_show(struct seq_file *m, void *d)
> > > +{

[...]

> > 
> > The term 'vote' is confusing here. Can you come up with something which
> > portrays
> > device power mode here?
> > 
> I was hoping vote would be appropriate as we would like to "vote" for the
> device to be
> in active state by doing a "get" on it. If a vote is present, it would mean
> that the
> device is in its active/M0 power state which can be seen on the "states"
> debugfs entry.
> 
> Let me know what you think. I will consult Hemant on this as well once more.

If you want to go with the term 'vote', then please add a prefix like power.
This makes it more explicit. Simply saying 'device vote' doesn't mean power
state.

> > > + struct mhi_controller *mhi_cntrl = m->private;
> > > + struct mhi_device *mhi_dev = mhi_cntrl->mhi_dev;
> > > +
> > > + if (!mhi_is_active(mhi_cntrl)) {
> > > + seq_puts(m, "Device not ready\n");
> > > + return -ENODEV;
> > > + }
> > > +
> > > + seq_printf(m,
> > > +"Votes: %d\n%s\n", mhi_dev->dev_wake,
> > > +"Usage: echo get/put > device_vote for vote/unvote");
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static ssize_t mhi_debugfs_device_vote_write(struct file *file,
> > > +  const char __user *ubuf,
> > > +  size_t count, loff_t *ppos)
> > > +{
> > > + struct seq_file *m = file->private_data;
> > > + struct mhi_controller *mhi_cntrl = m->private;
> > > + struct mhi_device *mhi_dev = mhi_cntrl->mhi_dev;
> > > + char buf[32];
> > > + int ret = -EINVAL;
> > > +
> > > + if (copy_from_user(, ubuf, min_t(size_t, sizeof(buf) - 1,
> > > count)))
> > > + return -EFAULT;
> > > +
> > > + if (!strncmp(buf, "get", 3)) {
> > 
> > Hmm, but the buffer size is 32?
> > 
> Yes, I referred to some other driver while writing this and buffer size was
> chosen
> to be more than enough for comparison purpose. Any other way to handle this?

AFAIK the requirement for the src buffer is to be 16byte aligned so that the
load and store won't cross the cache boundary. So just use 16 in that case.

> > > + ret = mhi_device_get_sync(mhi_dev);
> > > + } else if (!strncmp(buf, "put", 3)) {
> > > + mhi_device_put(mhi_dev);
> > > + ret = 0;
> > > + }
> > > +
> > > + return ret ? ret : count;
> > > +}
> > > +
> > > +static int mhi_debugfs_timeout_ms_show(struct seq_file *m, void *d)
> > > +{
> > > + struct mhi_controller *mhi_cntrl = m->private;
> > > +
> > > + seq_printf(m, "%u ms\n", mhi_cntrl->timeout_ms);
> >

Re: WARNING in submit_bio_checks

2020-07-15 Thread syzbot

syzbot has bisected this issue to:

commit 449325b52b7a6208f65ed67d3484fd7b7184477b
Author: Alexei Starovoitov 
Date:   Tue May 22 02:22:29 2018 +

umh: introduce fork_usermode_blob() helper

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=10fc4b0090
start commit:   9e50b94b Add linux-next specific files for 20200703
git tree:   linux-next
final oops: https://syzkaller.appspot.com/x/report.txt?x=12fc4b0090
console output: https://syzkaller.appspot.com/x/log.txt?x=14fc4b0090
kernel config:  https://syzkaller.appspot.com/x/.config?x=f99cc0faa1476ed6
dashboard link: https://syzkaller.appspot.com/bug?extid=4c50ac32e5b10e4133e1
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=fb6d10
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=1218fa1f10

Reported-by: syzbot+4c50ac32e5b10e413...@syzkaller.appspotmail.com
Fixes: 449325b52b7a ("umh: introduce fork_usermode_blob() helper")

For information about bisection process see: https://goo.gl/tpsmEJ#bisection

[PATCH 2/2] debugfs: Add access restriction option

2020-07-15 Thread Peter Enderborg

Since debugfs include sensitive information it need to be treated
carefully. But it also has many very useful debug functions for userspace.
With this option we can have same configuration for system with
need of debugfs and a way to turn it off. This gives a extra protection
for exposure on systems where user-space services with system
access are attacked.

It is controlled by a configurable default value that can be override
with a kernel command line parameter. (debugfs=)

It can be on or off, but also internally on but not seen from user-space.
This no-mount mode do not register a debugfs as filesystem, but client can
register their parts in the internal structures. This data can be readed
with a debugger or saved with a crashkernel. When it is off clients
get EPERM error when accessing the functions for registering their
components.

Signed-off-by: Peter Enderborg 
---
 .../admin-guide/kernel-parameters.txt | 15 
 fs/debugfs/inode.c| 37 +++
 fs/debugfs/internal.h | 14 +++
 lib/Kconfig.debug | 32 
 4 files changed, 98 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index fb95fad81c79..779d6cdc9627 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -827,6 +827,21 @@
useful to also enable the page_owner functionality.
on: enable the feature
 
+   debugfs=[KNL] This parameter enables what is exposed to 
userspace
+   and debugfs internal clients.
+   Format: { on, no-mount, off }
+   on: All functions are enabled.
+   no-mount:
+   Filesystem is not registered but kernel clients 
can
+   access APIs and a crashkernel can be used to 
read
+   its content. There is nothing to mount.
+   off:Filesystem is not registered and clients
+   get a -EPERM as result when trying to register 
files
+   or directories within debugfs.
+   This is equilivant of the runtime functionality 
if
+   debugfs was not enabled in the kernel at all.
+   Default value is set in build-time with a kernel 
configuration.
+
debugpat[X86] Enable PAT debugging
 
decnet.addr=[HW,NET]
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b7f2e971ecbc..02d08b17d0e6 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -35,6 +35,7 @@
 static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
 static bool debugfs_registered;
+static unsigned int debugfs_allow = DEFAULT_DEBUGFS_ALLOW_BITS;
 
 /*
  * Don't allow access attributes to be changed whilst the kernel is locked down
@@ -266,6 +267,9 @@ static struct dentry *debug_mount(struct file_system_type 
*fs_type,
int flags, const char *dev_name,
void *data)
 {
+   if (!(debugfs_allow & DEBUGFS_ALLOW_API))
+   return ERR_PTR(-EPERM);
+
return mount_single(fs_type, flags, data, debug_fill_super);
 }
 
@@ -311,6 +315,9 @@ static struct dentry *start_creating(const char *name, 
struct dentry *parent)
struct dentry *dentry;
int error;
 
+   if (!(debugfs_allow & DEBUGFS_ALLOW_API))
+   return ERR_PTR(-EPERM);
+
pr_debug("creating file '%s'\n", name);
 
if (IS_ERR(parent))
@@ -385,6 +392,11 @@ static struct dentry *__debugfs_create_file(const char 
*name, umode_t mode,
if (IS_ERR(dentry))
return dentry;
 
+   if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
+   failed_creating(dentry);
+   return ERR_PTR(-EPERM);
+   }
+
inode = debugfs_get_inode(dentry->d_sb);
if (unlikely(!inode)) {
pr_err("out of free dentries, can not create file '%s'\n",
@@ -541,6 +553,11 @@ struct dentry *debugfs_create_dir(const char *name, struct 
dentry *parent)
if (IS_ERR(dentry))
return dentry;
 
+   if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
+   failed_creating(dentry);
+   return ERR_PTR(-EPERM);
+   }
+
inode = debugfs_get_inode(dentry->d_sb);
if (unlikely(!inode)) {
pr_err("out of free dentries, can not create directory '%s'\n",
@@ -583,6 +600,11 @@ struct dentry *debugfs_create_automount(const char *name,
if (IS_ERR(dentry))
return dentry;
 
+   if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
+   failed_creating(dentry);
+   return ERR_PTR(-EPERM);
+   }
+
inode =

[PATCH v7 0/2] debugfs: Add access restriction option

2020-07-15 Thread Peter Enderborg

Since debugfs include sensitive information it need to be treated
carefully. But it also has many very useful debug functions for userspace.
With this option we can have same configuration for system with
need of debugfs and a way to turn it off. This gives a extra protection
for exposure on systems where user-space services with system
access are attacked.

v2. Removed MOUNT as part of restrictions. Added API's restrictions as
separate restriction.
v3  Updated Documentation after Randy Dunlap reviews and suggestions.
v4  Removed #ifdefs from inode.c and using internal.h for configuration
and now using BIT() for that. Function is now always on, and are
instead selected by a built in default or command line parameter.
Changed return value on debug_mount
Reported-by: kernel test robot 
Im not sure about that it is right
v5  Added notes to config help suggested by GregKH.
Removed _BIT from names, white-space and tab.
(checkpatch did not complain).
v6  Using ALLOW instead of ACCESS as name on BIT's. Change the fs to
mount to make it clear and easy to understand.
v7  Updated Kconfig.debug with Randy Dunlap corrections.

[PATCH 1/2] tracefs: Remove unnecessary debug_fs checks.

2020-07-15 Thread Peter Enderborg

This is a preparation for debugfs restricted mode.
We don't need debugfs to trace, the removed check stop tracefs to work
if debugfs is not initialised. We instead tries to automount within
debugfs and relay on it's handling. The code path is to create a
backward compatibility from when tracefs was part of debugfs, it is now
standalone and does not need debugfs. When debugfs is in restricted
it is compiled in but not active and return EPERM to clients and
tracefs wont work if it assumes it is active it is compiled in
kernel.

Reported-by: kernel test robot 
Signed-off-by: Peter Enderborg 
Reviewed-by: Greg Kroah-Hartman 
Acked-by: Steven Rostedt (VMware) 
---
 kernel/trace/trace.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bb62269724d5..848f67a5f16d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8945,9 +8945,7 @@ struct dentry *tracing_init_dentry(void)
if (tr->dir)
return NULL;
 
-   if (WARN_ON(!tracefs_initialized()) ||
-   (IS_ENABLED(CONFIG_DEBUG_FS) &&
-WARN_ON(!debugfs_initialized(
+   if (WARN_ON(!tracefs_initialized()))
return ERR_PTR(-ENODEV);
 
/*
-- 
2.17.1

Re: [PATCH v3] powerpc/pseries: detect secure and trusted boot state of the system.

2020-07-15 Thread Michael Ellerman

Daniel Axtens  writes:
> Hi Nayna,
>
> Looks good to me.
>
> Sorry for not noticing this before, but I think
>> +#include 

> is now superfluous (I think it's leftover from the machine_is
> version?). Maybe mpe will take pity on you and remove it when he picks
> up your patch.

Yeah I did that.

cheers

Re: [PATCH] ASoC: soc-component: Add missed return for snd_soc_pcm_component_mmap

2020-07-15 Thread Shengjiu Wang

On Thu, Jul 16, 2020 at 12:19 PM Kuninori Morimoto
 wrote:
>
>
> Hi Shengjiu
>
> > Add missed return for snd_soc_pcm_component_mmap, otherwise it always
> > return -EINVAL.
> >
> > Fixes: e2329eeba45f ("ASoC: soc-component: add soc_component_err()")
> > Signed-off-by: Shengjiu Wang 
> > ---
>
> Oops, indeed.
> Thank you for the patch.
>
> But it seems these functions are also missing "return"
> snd_soc_pcm_component_new()
> snd_soc_pcm_component_sync_stop()
>
> Can you please care these, too ?
Ok, will send v2

best regards
wang shengjiu

[PATCH] igc: Do not use link uninitialized in igc_check_for_copper_link

2020-07-15 Thread Nathan Chancellor

Clang warns:

drivers/net/ethernet/intel/igc/igc_mac.c:374:6: warning: variable 'link'
is used uninitialized whenever 'if' condition is true
[-Wsometimes-uninitialized]
if (!mac->get_link_status) {
^
drivers/net/ethernet/intel/igc/igc_mac.c:424:33: note: uninitialized use
occurs here
ret_val = igc_set_ltr_i225(hw, link);
   ^~~~
drivers/net/ethernet/intel/igc/igc_mac.c:374:2: note: remove the 'if' if
its condition is always false
if (!mac->get_link_status) {
^~~~
drivers/net/ethernet/intel/igc/igc_mac.c:367:11: note: initialize the
variable 'link' to silence this warning
bool link;
 ^
  = 0
1 warning generated.

It is not wrong, link is only uninitialized after this through
igc_phy_has_link. Presumably, if we skip the majority of this function
when get_link_status is false, we should skip calling igc_set_ltr_i225
as well. Just directly return 0 in this case, rather than bothering with
adding another label or initializing link in the if statement.

Fixes: 707abf069548 ("igc: Add initial LTR support")
Link: https://github.com/ClangBuiltLinux/linux/issues/1095
Signed-off-by: Nathan Chancellor 
---
 drivers/net/ethernet/intel/igc/igc_mac.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c 
b/drivers/net/ethernet/intel/igc/igc_mac.c
index b47e7b0a6398..26e3c56a4a8b 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.c
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -371,10 +371,8 @@ s32 igc_check_for_copper_link(struct igc_hw *hw)
 * get_link_status flag is set upon receiving a Link Status
 * Change or Rx Sequence Error interrupt.
 */
-   if (!mac->get_link_status) {
-   ret_val = 0;
-   goto out;
-   }
+   if (!mac->get_link_status)
+   return 0;
 
/* First we want to see if the MII Status Register reports
 * link.  If so, then we want to get the current speed/duplex

base-commit: ca0e494af5edb59002665bf12871e94b4163a257
-- 
2.28.0.rc0

RE: [PATCH v2] panic: prevent panic_timeout * 1000 from overflow

2020-07-15 Thread charley.ashbringer

> > Since panic_timeout is an integer passed-in through sysctl,
> > the loop boundary panic_timeout * 1000 could overflow and
> > result in a zero-delay panic when panic_timeout is greater
> > than INT_MAX/1000.
> >
> > Fix this by moving 1000 to the left, also in case i/1000
> > might never be greater than panic_timeout, change i to
> > long long so that it strictly has more bits.
> >
> > ...
> >
> > --- a/kernel/panic.c
> > +++ b/kernel/panic.c
> > @@ -178,7 +178,8 @@ void panic(const char *fmt, ...)
> >  {
> > static char buf[1024];
> > va_list args;
> > -   long i, i_next = 0, len;
> > +   long long i;
> > +   long i_next = 0, len;
> > int state = 0;
> > int old_cpu, this_cpu;
> > bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
> > @@ -315,7 +316,7 @@ void panic(const char *fmt, ...)
> >  */
> > pr_emerg("Rebooting in %d seconds..\n", panic_timeout);
> >
> > -   for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP)
{
> > +   for (i = 0; i / 1000 < panic_timeout; i += PANIC_TIMER_STEP)
{
> 
> Problem is, 32-bit machines generally cannot perform 64-bit divides.
> So a call is emitted to the library function __divsi64() (I forget the
exact
> name) which Linux doesn't implement (because it's so slow, and we don't
> want to be calling it by accident).
> 

It's good to know, thanks for letting me know why 64-bit division 
is slow, and 64-multiplication is fast, surely doing so many
64-bit division will drag a lot, and should be prevented.

> So a fix would be to call do_div() or something from
> include/linux/div64.h but it's all a great mess.
> 
> However we can do native 64-bit multiplication on 32-bit!  So how about
> something like
> 
> --- a/kernel/panic.c~a
> +++ a/kernel/panic.c
> @@ -313,13 +313,16 @@ void panic(const char *fmt, ...)
>* Delay timeout seconds before rebooting the machine.
>* We can't use the "normal" timers since we just panicked.
>*/
> + u64 timeout = panic_timeout * 1000; /* avoid overflow */
> + u64 timer;
>   pr_emerg("Rebooting in %d seconds..\n", panic_timeout);
> - for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP)
{
> + for (timer = 0; timer < timeout; timer += PANIC_TIMER_STEP)
{

If using u64 as the loop boundary, would it be a problem if
panic_timeout is negative? Since in the current code, if
panic_timeout is negative, the loop will not be executed;
as in the patched code, the loop boundary will be a huge 
unsigned value. I guess s64 should do？

If it's not a problem, I'll submit another patch enforcing
the change, including the changes suggested by Matthew here:

> > +   u64 timeout = panic_timeout * 1000; /* avoid overflow */
> 1000ULL to not truncate before the assignment.

> > +   u64 timer;
> ... as you implied lateru64 timer, timer_next;


Thank you guys so much for your valuable feedback, I learned a lot!

Best,
Changming

Re: [RFC PATCH 4/7] x86: use exit_lazy_tlb rather than membarrier_mm_sync_core_before_usermode

2020-07-15 Thread Nicholas Piggin

Excerpts from Nicholas Piggin's message of July 16, 2020 2:15 pm:
> Excerpts from Mathieu Desnoyers's message of July 14, 2020 12:13 am:
>> - On Jul 13, 2020, at 9:47 AM, Nicholas Piggin npig...@gmail.com wrote:
>> 
>>> Excerpts from Nicholas Piggin's message of July 13, 2020 2:45 pm:
 Excerpts from Andy Lutomirski's message of July 11, 2020 3:04 am:
> Also, as it stands, I can easily see in_irq() ceasing to promise to
> serialize.  There are older kernels for which it does not promise to
> serialize.  And I have plans to make it stop serializing in the
> nearish future.
 
 You mean x86's return from interrupt? Sounds fun... you'll konw where to
 update the membarrier sync code, at least :)
>>> 
>>> Oh, I should actually say Mathieu recently clarified a return from
>>> interrupt doesn't fundamentally need to serialize in order to support
>>> membarrier sync core.
>> 
>> Clarification to your statement:
>> 
>> Return from interrupt to kernel code does not need to be context serializing
>> as long as kernel serializes before returning to user-space.
>> 
>> However, return from interrupt to user-space needs to be context serializing.
> 
> Hmm, I'm not sure it's enough even with the sync in the exit_lazy_tlb
> in the right places.
> 
> A kernel thread does a use_mm, then it blocks and the user process with
> the same mm runs on that CPU, and then it calls into the kernel, blocks,
> the kernel thread runs again, another CPU issues a membarrier which does
> not IPI this one because it's running a kthread, and then the kthread
> switches back to the user process (still without having unused the mm),
> and then the user process returns from syscall without having done a 
> core synchronising instruction.
> 
> The cause of the problem is you want to avoid IPI'ing kthreads. Why?
> I'm guessing it really only matters as an optimisation in case of idle
> threads. Idle thread is easy (well, easier) because it won't use_mm, so 
> you could check for rq->curr == rq->idle in your loop (in a suitable 
> sched accessor function).
> 
> But... I'm not really liking this subtlety in the scheduler for all this 
> (the scheduler still needs the barriers when switching out of idle).
> 
> Can it be improved somehow? Let me forget x86 core sync problem for now
> (that _may_ be a bit harder), and step back and look at what we're doing.
> The memory barrier case would actually suffer from the same problem as
> core sync, because in the same situation it has no implicit mmdrop in
> the scheduler switch code either.
> 
> So what are we doing with membarrier? We want any activity caused by the 
> set of CPUs/threads specified that can be observed by this thread before 
> calling membarrier is appropriately fenced from activity that can be 
> observed to happen after the call returns.
> 
> CPU0 CPU1
>  1. user stuff
> a. membarrier()  2. enter kernel
> b. read rq->curr 3. rq->curr switched to kthread
> c. is kthread, skip IPI  4. switch_to kthread
> d. return to user5. rq->curr switched to user thread
>6. switch_to user thread
>7. exit kernel
>  8. more user stuff
> 
> As far as I can see, the problem is CPU1 might reorder step 5 and step
> 8, so you have mmdrop of lazy mm be a mb after step 6.
> 
> But why? The membarrier call only cares that there is a full barrier
> between 1 and 8, right? Which it will get from the previous context
> switch to the kthread.

I should be more complete here, especially since I was complaining
about unclear barrier comment :)


CPU0 CPU1
a. user stuff1. user stuff
b. membarrier()  2. enter kernel
c. smp_mb()  3. smp_mb__after_spinlock(); // in __schedule
d. read rq->curr 4. rq->curr switched to kthread
e. is kthread, skip IPI  5. switch_to kthread
f. return to user6. rq->curr switched to user thread
g. user stuff7. switch_to user thread
 8. exit kernel
 9. more user stuff

What you're really ordering is a, g vs 1, 9 right?

In other words, 9 must see a if it sees g, g must see 1 if it saw 9,
etc.

Userspace does not care where the barriers are exactly or what kernel 
memory accesses might be being ordered by them, so long as there is a
mb somewhere between a and g, and 1 and 9. Right?

RE: [PATCH 2/9] iommu/ipmmu-vmsa: Hook up R8A774E1 DT matching code

2020-07-15 Thread Yoshihiro Shimoda

Hi Geert-san,

> From: Geert Uytterhoeven, Sent: Tuesday, July 14, 2020 9:40 PM
> 
> Hi Shimoda-san,
> 
> On Tue, Jul 14, 2020 at 1:42 PM Yoshihiro Shimoda
>  wrote:
> > > From: Geert Uytterhoeven, Sent: Tuesday, July 14, 2020 5:42 PM
> > > On Tue, Jul 14, 2020 at 10:30 AM Lad, Prabhakar
> > >  wrote:
> > > > On Tue, Jul 14, 2020 at 9:09 AM Geert Uytterhoeven 
> > > >  wrote:
> > > > > On Mon, Jul 13, 2020 at 11:35 PM Lad Prabhakar
> > > > Also the recent patch to add
> > > > "r8a77961" just adds to soc_rcar_gen3_whitelist.
> > >
> > > Oops, commit 17fe16181639801b ("iommu/renesas: Add support for r8a77961")
> > > did it wrong, too.
> >
> > Thank you for the point it out. We should add r8a77961 to the 
> > soc_rcar_gen3[].
> > However, I don't know why I could not realize this issue...
> > So, I investigated this a little and then, IIUC, glob_match() which
> > soc_device_match() uses seems to return true, if *pat = "r8a7796" and *str 
> > = "r8a77961".
> 
> Are you sure about this?

I'm very sorry. I completely misunderstood the glob_match() behavior.
And, now I understood why the current code can use IPMMU on r8a77961...
# Since the first soc_device_match() will return false, ipmmu_slave_whitelist()
# will return true and then the ipmmu_of_xlate() will be succeeded.

> I enabled CONFIG_GLOB_SELFTEST, and globtest succeeded.
> It does test glob_match("a", "aa"), which is a similar test.
> 
> To be 100% sure, I added:
> 
> --- a/lib/globtest.c
> +++ b/lib/globtest.c
> @@ -59,6 +59,7 @@ static char const glob_tests[] __initconst =
> "1" "a\0" "a\0"
> "0" "a\0" "b\0"
> "0" "a\0" "aa\0"
> +   "0" "r8a7796\0" "r8a77961\0"
> "0" "a\0" "\0"
> "1" "\0" "\0"
> "0" "\0" "a\0"
> 
> and it still succeeded.

I'm very sorry to waste your time about this...

Best regards,
Yoshihiro Shimoda

[PATCH] riscv: Add SiFive drivers to rv32_defconfig

2020-07-15 Thread Bin Meng

From: Bin Meng 

This adds SiFive drivers to rv32_defconfig, to keep in sync with the
64-bit config. This is useful when testing 32-bit kernel with QEMU
'sifive_u' 32-bit machine.

Signed-off-by: Bin Meng 
---

 arch/riscv/configs/rv32_defconfig | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/riscv/configs/rv32_defconfig 
b/arch/riscv/configs/rv32_defconfig
index 05bbf52..8759501 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -14,6 +14,7 @@ CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 CONFIG_BPF_SYSCALL=y
+CONFIG_SOC_SIFIVE=y
 CONFIG_SOC_VIRT=y
 CONFIG_ARCH_RV32I=y
 CONFIG_SMP=y
@@ -61,6 +62,8 @@ CONFIG_HVC_RISCV_SBI=y
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM_VIRTIO=y
+CONFIG_SPI=y
+CONFIG_SPI_SIFIVE=y
 # CONFIG_PTP_1588_CLOCK is not set
 CONFIG_POWER_RESET=y
 CONFIG_DRM=y
@@ -76,6 +79,8 @@ CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
 CONFIG_USB_UAS=y
+CONFIG_MMC=y
+CONFIG_MMC_SPI=y
 CONFIG_RTC_CLASS=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
-- 
2.7.4

[PATCH] spi: atmel-quadspi: Use optimezed memcpy_fromio()/memcpy_toio()

2020-07-15 Thread Tudor Ambarus

Optimezed mem*io operations are defined for LE platforms, use them.

The ARM and !ARCH_EBSA110 dependencies for COMPILE_TEST were added
only for the _memcpy_fromio()/_memcpy_toio() functions. Drop these
dependencies.

Tested unaligned accesses on both sama5d2 and sam9x60 QSPI controllers
using SPI NOR flashes, everything works ok. The following performance
improvement can be seen when running mtd_speedtest:

sama5d2_xplained (mx25l25635e)
- before:
mtd_speedtest: eraseblock write speed is 983 KiB/s
mtd_speedtest: eraseblock read speed is 6150 KiB/s
- after:
mtd_speedtest: eraseblock write speed is 1055 KiB/s
mtd_speedtest: eraseblock read speed is 20144 KiB/s

sam9x60ek (sst26vf064b)
- before:
mtd_speedtest: eraseblock write speed is 4770 KiB/s
mtd_speedtest: eraseblock read speed is 8062 KiB/s
- after:
mtd_speedtest: eraseblock write speed is 4524 KiB/s
mtd_speedtest: eraseblock read speed is 21186 KiB/s

Signed-off-by: Tudor Ambarus 
---
 drivers/spi/Kconfig | 2 +-
 drivers/spi/atmel-quadspi.c | 8 
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index fd64c865f6ef..b89d03a36cbd 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -103,7 +103,7 @@ config SPI_AT91_USART
 
 config SPI_ATMEL_QUADSPI
tristate "Atmel Quad SPI Controller"
-   depends on ARCH_AT91 || (ARM && COMPILE_TEST && !ARCH_EBSA110)
+   depends on ARCH_AT91 || COMPILE_TEST
depends on OF && HAS_IOMEM
help
  This enables support for the Quad SPI controller in master mode.
diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c
index a898755fb41e..8c009c175f2c 100644
--- a/drivers/spi/atmel-quadspi.c
+++ b/drivers/spi/atmel-quadspi.c
@@ -430,11 +430,11 @@ static int atmel_qspi_exec_op(struct spi_mem *mem, const 
struct spi_mem_op *op)
 
/* Send/Receive data */
if (op->data.dir == SPI_MEM_DATA_IN)
-   _memcpy_fromio(op->data.buf.in, aq->mem + offset,
-  op->data.nbytes);
+   memcpy_fromio(op->data.buf.in, aq->mem + offset,
+ op->data.nbytes);
else
-   _memcpy_toio(aq->mem + offset, op->data.buf.out,
-op->data.nbytes);
+   memcpy_toio(aq->mem + offset, op->data.buf.out,
+   op->data.nbytes);
 
/* Release the chip-select */
atmel_qspi_write(QSPI_CR_LASTXFER, aq, QSPI_CR);
-- 
2.25.1

[PATCH] drm/i915/display: Ensure that ret is always initialized in icl_combo_phy_verify_state

2020-07-15 Thread Nathan Chancellor

Clang warns:

drivers/gpu/drm/i915/display/intel_combo_phy.c:268:3: warning: variable
'ret' is uninitialized when used here [-Wuninitialized]
ret &= check_phy_reg(dev_priv, phy, ICL_PORT_TX_DW8_LN0(phy),
^~~
drivers/gpu/drm/i915/display/intel_combo_phy.c:261:10: note: initialize
the variable 'ret' to silence this warning
bool ret;
^
 = 0
1 warning generated.

In practice, the bug this warning appears to be concerned with would not
actually matter because ret gets initialized to the return value of
cnl_verify_procmon_ref_values. However, that does appear to be a bug
since it means the first hunk of the patch this fixes won't actually do
anything (since the values of check_phy_reg won't factor into the final
ret value). Initialize ret to true then make all of the assignments a
bitwise AND with itself so that the function always does what it should
do.

Fixes: 239bef676d8e ("drm/i915/display: Implement new combo phy initialization 
step")
Link: https://github.com/ClangBuiltLinux/linux/issues/1094
Signed-off-by: Nathan Chancellor 
---
 drivers/gpu/drm/i915/display/intel_combo_phy.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c 
b/drivers/gpu/drm/i915/display/intel_combo_phy.c
index eccaa79cb4a9..a4b8aa6d0a9e 100644
--- a/drivers/gpu/drm/i915/display/intel_combo_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c
@@ -258,7 +258,7 @@ static bool phy_is_master(struct drm_i915_private 
*dev_priv, enum phy phy)
 static bool icl_combo_phy_verify_state(struct drm_i915_private *dev_priv,
   enum phy phy)
 {
-   bool ret;
+   bool ret = true;
u32 expected_val = 0;
 
if (!icl_combo_phy_enabled(dev_priv, phy))
@@ -276,7 +276,7 @@ static bool icl_combo_phy_verify_state(struct 
drm_i915_private *dev_priv,
 DCC_MODE_SELECT_CONTINUOSLY);
}
 
-   ret = cnl_verify_procmon_ref_values(dev_priv, phy);
+   ret &= cnl_verify_procmon_ref_values(dev_priv, phy);
 
if (phy_is_master(dev_priv, phy)) {
ret &= check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW8(phy),

base-commit: ca0e494af5edb59002665bf12871e94b4163a257
-- 
2.28.0.rc0

[PATCH] can: m_can: Set device to software init mode before closing

2020-07-15 Thread Faiz Abbas

There might be some requests pending in the buffer when the
interface close sequence occurs. In some devices, these
pending requests might lead to the module not shutting down
properly when m_can_clk_stop() is called.

Therefore, move the device to init state before potentially
powering it down.

Signed-off-by: Faiz Abbas 
---
 drivers/net/can/m_can/m_can.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 02c5795b7393..d0c458f7f6e1 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1414,6 +1414,9 @@ static void m_can_stop(struct net_device *dev)
/* disable all interrupts */
m_can_disable_all_interrupts(cdev);
 
+   /* Set init mode to disengage from the network */
+   m_can_config_endisable(cdev, true);
+
/* set the state as STOPPED */
cdev->can.state = CAN_STATE_STOPPED;
 }
-- 
2.17.1

[PATCH v3 3/4] regulator: core: Add basic enable/disable support for sync_state() callbacks

2020-07-15 Thread Saravana Kannan

Consider the following example:
- regulator-X is provided by device-X.
- regulator-X is a supplier to device-A, device-B and device-C.
- device-A is off/inactive from boot.
- device-B and device-C are left on/active by the bootloader
- regulator-X is left on boot by the bootloader at 2000 mV to supply
  device-B and device-C.

Example boot sequence:
1. device-X is probed successfully.
2. device-A is probed by driver-A
   a. driver-A gets regulator-X
   b. driver-A votes on regulator-X
   c. driver-A initializes device-A
   d. driver-A votes off regulator-X
   e. regulator-X is turned off.
3. System crashes or device-B and device-C become unreliable because
   regulator-X was turned off without following the proper quiescing
   steps for device-B and device-C.

There are Android devices that exhibit the issue in the example where
regulator-X is an LDO, device-A is a camera device and device-B and
device-C are UFS and USB. To avoid this, they have their own downstream
changes to the regulator framework.

This patch addresses the problem in the example by:

1. When a regulator is registered,
   a. The sync_state() callback for the regulator's device is set to
  regulator_sync_state(). The sync_state() callback is called when
  all the consumers of the regulator's device have probed
  successfully.
   b. If the regulator is ON at boot, a BOOT-LIMITS consumer is created
  for the regulator and an enable vote is made.

2. When the regulator_sync_state() callback comes, all the boot-on
   regulators registered by that device will have the BOOT-LIMITS enable
   vote removed and the BOOT-LIMITS consumer freed.

If an exclusive get is ever attempted on a boot-on regulator with an
active BOOT-LIMITS vote, the regulator is handed off to the new consumer
(and the BOOT-LIMITS freed) without affecting the regulator state. This
ensures, consumers doing exclusive gets continue to work after this
commit.

To maintain backward compatibility with systems where some consumers of
the device might never probe, a new regulator_cleanup_timeout kernel
commandline argument is added and defaulted to 30 seconds. When the
timeout is a non-zero value and it expires, all BOOT-LIMITS consumer
votes are removed even if the sync_state() callbacks haven't been
called.

In systems where all the consumers are expected to probe, the
regulator_cleanup_timeout can be set to 0. When that's done, the
BOOT-LIMITS consumer votes for a regulator are removed only when all the
consumers of the regulator's device have probed.

So with this patch and regulator_cleanup_timeout=0, the example will work
as follows:

1. device-X is probed successfully.
   a. regulator-X is registered.
   b. Since regulator-X is on, an enable vote is made by the BOOT-LIMITS
  consumer.
2. device-A is probed by driver-A
   a. driver-A gets regulator-X
   b. driver-A votes on regulator-X
   c. driver-A initializes device-A
   d. driver-A votes off regulator-X
   e. regulator-X is NOT turned off due to BOOT-LIMITS consumer.
3. device-B is probed by driver-B
   a. driver-B gets regulator-X
   b. driver-B votes on regulator-X
   c. driver-B initializes device-B.
   d. driver-B votes off regulator-X because device-B enters runtime
  idle.
   e. regulator-X is NOT turned off due to BOOT-LIMITS consumer.
4. device-C is probed by driver-C
   a. Does stuff similar to device-B and votes off regulator-X.
5. Since all consumers of device-X have probed, device-X gets
   sync_state() callback which is a call to regulator_sync_state():
   a. BOOT-LIMITS removes enable vote for regulator-X
   b. regulator-X is turned off.
   c. BOOT-LIMITS consumer is freed.
6. The system is stable because regulator-X is only turned off after
   device-B and device-C get to initialize and quiesce properly.

OR

Same steps for 1 - 3 as above.
4. device-C is never probed because driver-C isn't available.
5. Since all consumers of device-X have NOT probed, device-X doesn't get
   sync_state() callback.
   a. BOOT-LIMITS votes continue to be enforced for regulator-X
6. device-C continues to work in the mode the boot loader left it in and
   the system remains usable. For example, device C is
   - A display backlight that doesn't have a driver
   - An interconnect that doesn't have an interconnect provider driver

Signed-off-by: Saravana Kannan 
---
 drivers/regulator/core.c | 155 ++-
 include/linux/regulator/driver.h |   2 +
 2 files changed, 154 insertions(+), 3 deletions(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index c9615d3530c7..f10ef6ec1af1 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1769,6 +1769,115 @@ static struct regulator_dev 
*regulator_dev_lookup(struct device *dev,
return ERR_PTR(-ENODEV);
 }
 
+/**
+ * regulator_add_boot_limits - Set up boot limits for a regulator device.
+ * @rdev: regulator device to set up boot limits for.
+ *
+ * Makes requests on the regulator device

[PATCH v3 4/4] regulator: core: Add voltage support for sync_state() callbacks

2020-07-15 Thread Saravana Kannan

Consider the following example:
- regulator-X is provided by device-X.
- regulator-X is a supplier to device-A, device-B and device-C.
- device-A is off/inactive from boot.
- device-B and device-C are left on/active by the bootloader
- regulator-X is left on boot by the bootloader at 2000 mV to supply
  device-B and device-C.

Example boot sequence:
1. device-X is probed successfully.
2. device-B is probed by driver-B
   a. driver-B gets regulator-X
   b. driver-B votes on regulator-X
   c. driver-B lowers device-B performance point.
   d. driver-B lowers voltage vote to 1000 mV.
   e. regulator-X voltage is lowered to 1000 mV.
3. System crashes or device-C becomes unreliable because regulator-X
   voltage was lowered to 1000 mV when device-C still needed it at 2000 mV

The issue reported by Marek Szyprowski [1] between vdd_int and vdd_arm
is very similar to example 2, except driver-B lowers the voltage of
device-C due to a regulator coupling instead of a direct request from
driver-B.

This patch addresses the problem in the example by:
1. When a boot-on regulator is registered, a minimum voltage limit that
   matches the boot time voltage is placed on the regulator.

2. When the regulator_sync_state() callback comes, the minimum voltage
   limit is removed along with the rest of the boot limits.

So with this patch and regulator_cleanup_timeout=0, the example will
work as follows:

1. device-X is probed successfully.
   a. regulator-X is registered.
   b. Since regulator-X is on, a minimum voltage of 2000 mV is made by
  the BOOT-LIMITS consumer.
   c. Since regulator-X is on, an enable vote is made by the BOOT-LIMITS
  consumer.
2. device-B is probed by driver-B
   a. driver-B gets regulator-X
   b. driver-B votes on regulator-X
   c. driver-B lowers device-B performance point.
   d. driver-B lowers voltage vote to 1000 mV.
   e. regulator-X voltage is NOT lowered to 1000 mV.
3. device-C is probed by driver-C
   a. Does stuff similar to device-B.
4. Since all consumers of device-X have probed, device-X gets
   sync_state() callback which is a call to regulator_sync_state():
   a. BOOT-LIMITS removes enable vote for regulator-X
   b. regulator-X remains on becaue device-B and device-C have their
  enable votes in.
   c. BOOT-LIMITS consumer is freed.
   d. regulator-X voltage drops is lowered to 1000 mV.
5. The system is stable because regulator-X voltage is NOT lowered to
   1000 mV when device-C still needed it at 2000 mV.

[1] - 
https://lore.kernel.org/lkml/20200605063724.9030-1-m.szyprow...@samsung.com/#t
Signed-off-by: Saravana Kannan 
---
 drivers/regulator/core.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index f10ef6ec1af1..9b70295820f3 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1783,6 +1783,8 @@ static struct regulator_dev *regulator_dev_lookup(struct 
device *dev,
  */
 static void regulator_add_boot_limits(struct regulator_dev *rdev)
 {
+   int boot_uV;
+
/* We already set up boot limits. */
if (rdev->boot_limits)
return;
@@ -1815,6 +1817,13 @@ static void regulator_add_boot_limits(struct 
regulator_dev *rdev)
}
rdev->open_count++;
 
+   if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
+   boot_uV = regulator_get_voltage_rdev(rdev);
+   if (boot_uV > 0)
+   regulator_set_voltage(rdev->boot_limits, boot_uV,
+INT_MAX);
+   }
+
if (regulator_enable(rdev->boot_limits)) {
dev_err(>dev, "Unable to set boot limits\n");
destroy_regulator(rdev->boot_limits);
@@ -1847,10 +1856,12 @@ static int regulator_del_boot_limits(struct 
regulator_dev *rdev, bool handoff)
return 0;
 
rdev_info(rdev, "removing boot limits\n");
-   if (!handoff)
+   if (!handoff) {
regulator_disable(rdev->boot_limits);
-   else
+   regulator_set_voltage(rdev->boot_limits, 0, INT_MAX);
+   } else {
rdev->use_count--;
+   }
destroy_regulator(rdev->boot_limits);
/*
 * Set it to an error value so that boot limits can't be set again once
-- 
2.28.0.rc0.105.gf9edc3c819-goog

[PATCH v3 1/4] driver core: Add dev_set_drv_sync_state()

2020-07-15 Thread Saravana Kannan

This can be used by frameworks to set the sync_state() helper functions
for drivers that don't already have them set.

Signed-off-by: Saravana Kannan 
---
 include/linux/device.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/include/linux/device.h b/include/linux/device.h
index 15460a5ac024..2f56afdd9107 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -806,6 +806,18 @@ static inline bool dev_has_sync_state(struct device *dev)
return false;
 }
 
+static inline int dev_set_drv_sync_state(struct device *dev,
+void (*fn)(struct device *dev))
+{
+   if (!dev || !dev->driver)
+   return 0;
+   if (dev->driver->sync_state && dev->driver->sync_state != fn)
+   return -EBUSY;
+   if (!dev->driver->sync_state)
+   dev->driver->sync_state = fn;
+   return 0;
+}
+
 /*
  * High level routines for use by the bus drivers
  */
-- 
2.28.0.rc0.105.gf9edc3c819-goog

[PATCH v3 2/4] regulator: core: Add destroy_regulator()

2020-07-15 Thread Saravana Kannan

Part of the regulator_get() code is already factored out into
create_regulator(). This patch factors out some of the regulator_put()
code into destroy_regulator() so that create_regulator() has a
corresponding unwind function. Subsequent patches will use this
function.

Signed-off-by: Saravana Kannan 
---
 drivers/regulator/core.c | 34 +-
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 03154f5b939f..c9615d3530c7 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -105,6 +105,7 @@ static int regulator_balance_voltage(struct regulator_dev 
*rdev,
 static struct regulator *create_regulator(struct regulator_dev *rdev,
  struct device *dev,
  const char *supply_name);
+static void destroy_regulator(struct regulator *regulator);
 static void _regulator_put(struct regulator *regulator);
 
 const char *rdev_get_name(struct regulator_dev *rdev)
@@ -2034,20 +2035,9 @@ struct regulator *regulator_get_optional(struct device 
*dev, const char *id)
 }
 EXPORT_SYMBOL_GPL(regulator_get_optional);
 
-/* regulator_list_mutex lock held by regulator_put() */
-static void _regulator_put(struct regulator *regulator)
+static void destroy_regulator(struct regulator *regulator)
 {
-   struct regulator_dev *rdev;
-
-   if (IS_ERR_OR_NULL(regulator))
-   return;
-
-   lockdep_assert_held_once(_list_mutex);
-
-   /* Docs say you must disable before calling regulator_put() */
-   WARN_ON(regulator->enable_count);
-
-   rdev = regulator->rdev;
+   struct regulator_dev *rdev = regulator->rdev;
 
debugfs_remove_recursive(regulator->debugfs);
 
@@ -2068,6 +2058,24 @@ static void _regulator_put(struct regulator *regulator)
 
kfree_const(regulator->supply_name);
kfree(regulator);
+}
+
+/* regulator_list_mutex lock held by regulator_put() */
+static void _regulator_put(struct regulator *regulator)
+{
+   struct regulator_dev *rdev;
+
+   if (IS_ERR_OR_NULL(regulator))
+   return;
+
+   lockdep_assert_held_once(_list_mutex);
+
+   /* Docs say you must disable before calling regulator_put() */
+   WARN_ON(regulator->enable_count);
+
+   rdev = regulator->rdev;
+
+   destroy_regulator(regulator);
 
module_put(rdev->owner);
put_device(>dev);
-- 
2.28.0.rc0.105.gf9edc3c819-goog

[PATCH v3 0/4] regulator_sync_state() support

2020-07-15 Thread Saravana Kannan

Consider the following example:
- regulator-X is provided by device-X.
- regulator-X is a supplier to device-A, device-B and device-C.
- device-A is off/inactive from boot.
- device-B and device-C are left on/active by the bootloader
- regulator-X is left on boot by the bootloader at 2000 mV to supply
  device-B and device-C.

Example boot sequence 1:
1. device-X is probed successfully.
2. device-A is probed by driver-A
   a. driver-A gets regulator-X
   b. driver-A votes on regulator-X
   c. driver-A initializes device-A
   d. driver-A votes off regulator-X
   e. regulator-X is turned off.
3. System crashes or device-B and device-C become unreliable because
   regulator-X was turned off without following the proper quiescing
   steps for device-B and device-C.

Example boot sequence 2:
1. device-X is probed successfully.
2. device-B is probed by driver-B
   a. driver-B gets regulator-X
   b. driver-B votes on regulator-X
   c. driver-B lowers device-B performance point.
   d. driver-B lowers voltage vote to 1000 mV.
   e. regulator-X voltage is lowered to 1000 mV.
3. System crashes or device-C becomes unreliable because regulator-X
   voltage was lowered to 1000 mV when device-C still needed it at 2000 mV

This patch series makes sure these examples are handled correctly and
system crash or device instability is avoided and the system remains
usable.

More details provided in the commit texts.

v2->v3:
Patch 2/4 - No functional change. Simple refactor.
Patch 3/4
- Was Patch 2/2 in v2.
- Rewrote commit text to hopefully address all previous points.
- Renamed variable/functions. Hope it's clearer.
- Added more comments.
- Added logging
- Fixed timeout functionality.
- Handle exclusive consumers properly
- Handle coupled regulators properly
Patch 4/4 - Prevents voltage from going too low during boot.

v1->v2:
Patch 1/2
- New patch
Patch 2/2
- This was the only patch in v1
- Made the late_initcall_sync timeout a commandline param
- If timeout is set, we also give up waiting for all consumers after
  the timeout expires.
- Made every regulator driver add sync_state() support

Saravana Kannan (4):
  driver core: Add dev_set_drv_sync_state()
  regulator: core: Add destroy_regulator()
  regulator: core: Add basic enable/disable support for sync_state()
callbacks
  regulator: core: Add voltage support for sync_state() callbacks

 drivers/regulator/core.c | 200 ---
 include/linux/device.h   |  12 ++
 include/linux/regulator/driver.h |   2 +
 3 files changed, 198 insertions(+), 16 deletions(-)

-- 
2.28.0.rc0.105.gf9edc3c819-goog

Re: kernel BUG at net/core/dev.c:LINE! (3)

2020-07-15 Thread syzbot

syzbot has found a reproducer for the following issue on:

HEAD commit:4ff91fa0 Merge branch 'udp_tunnel-NIC-RX-port-offload-infr..
git tree:   net-next
console output: https://syzkaller.appspot.com/x/log.txt?x=1777b9bf10
kernel config:  https://syzkaller.appspot.com/x/.config?x=8a8f9de6c9d911de
dashboard link: https://syzkaller.appspot.com/bug?extid=af23e7f3e0a7e10c8b67
compiler:   gcc (GCC) 10.1.0-syz 20200507
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=13a7c4f710

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+af23e7f3e0a7e10c8...@syzkaller.appspotmail.com

bond1 (unregistering): (slave wireguard2): Releasing backup interface
bond1 (unregistering): (slave wireguard1): Releasing backup interface
bond1 (unregistering): (slave wireguard0): Releasing backup interface
device wireguard0 left promiscuous mode
bond1 (unregistering): Destroying bond
[ cut here ]
kernel BUG at net/core/dev.c:8948!
invalid opcode:  [#1] PREEMPT SMP KASAN
CPU: 0 PID: 129 Comm: kworker/u4:3 Not tainted 5.8.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Workqueue: netns cleanup_net
RIP: 0010:rollback_registered_many+0x2be/0xf60 net/core/dev.c:8948
Code: 4c 89 e8 48 c1 e8 03 42 80 3c 20 00 0f 85 91 0c 00 00 48 b8 22 01 00 00 
00 00 ad de 48 89 43 70 e9 b9 fe ff ff e8 82 19 3d fb <0f> 0b 4c 8d 7b 68 4c 8d 
6b 70 eb a5 e8 71 19 3d fb 48 8b 74 24 10
RSP: 0018:c9e976b0 EFLAGS: 00010293
RAX:  RBX: 8880a2bb RCX: 86369018
RDX: 8880a8dcc2c0 RSI: 8636916e RDI: 0001
RBP: c9e97770 R08:  R09: 8a7b9707
R10: 0001 R11:  R12: dc00
R13: 8880a2bb0068 R14: c9e97718 R15: 0002
FS:  () GS:8880ae60() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 00e07978 CR3: 93d17000 CR4: 001406f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 rollback_registered net/core/dev.c:9022 [inline]
 unregister_netdevice_queue+0x2dd/0x570 net/core/dev.c:10103
 unregister_netdevice include/linux/netdevice.h:2762 [inline]
 bond_release_and_destroy drivers/net/bonding/bond_main.c:2212 [inline]
 bond_slave_netdev_event drivers/net/bonding/bond_main.c:3285 [inline]
 bond_netdev_event.cold+0xc1/0x10e drivers/net/bonding/bond_main.c:3398
 notifier_call_chain+0xb5/0x200 kernel/notifier.c:83
 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:2033
 call_netdevice_notifiers_extack net/core/dev.c:2045 [inline]
 call_netdevice_notifiers net/core/dev.c:2059 [inline]
 rollback_registered_many+0x665/0xf60 net/core/dev.c:8977
 unregister_netdevice_many.part.0+0x1a/0x2f0 net/core/dev.c:10122
 unregister_netdevice_many net/core/dev.c:10121 [inline]
 default_device_exit_batch+0x30c/0x3d0 net/core/dev.c:10605
 ops_exit_list+0x10d/0x160 net/core/net_namespace.c:189
 cleanup_net+0x4ea/0xa00 net/core/net_namespace.c:603
 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269
 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
 kthread+0x3b5/0x4a0 kernel/kthread.c:291
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293
Modules linked in:
---[ end trace c01e039c1ee3796a ]---
RIP: 0010:rollback_registered_many+0x2be/0xf60 net/core/dev.c:8948
Code: 4c 89 e8 48 c1 e8 03 42 80 3c 20 00 0f 85 91 0c 00 00 48 b8 22 01 00 00 
00 00 ad de 48 89 43 70 e9 b9 fe ff ff e8 82 19 3d fb <0f> 0b 4c 8d 7b 68 4c 8d 
6b 70 eb a5 e8 71 19 3d fb 48 8b 74 24 10
RSP: 0018:c9e976b0 EFLAGS: 00010293
RAX:  RBX: 8880a2bb RCX: 86369018
RDX: 8880a8dcc2c0 RSI: 8636916e RDI: 0001
RBP: c9e97770 R08:  R09: 8a7b9707
R10: 0001 R11:  R12: dc00
R13: 8880a2bb0068 R14: c9e97718 R15: 0002
FS:  () GS:8880ae60() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7f091b46f018 CR3: 97f54000 CR4: 001406f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400

Re: [PATCH] ASoC: soc-component: Add missed return for snd_soc_pcm_component_mmap

2020-07-15 Thread Kuninori Morimoto



Hi Shengjiu

> Add missed return for snd_soc_pcm_component_mmap, otherwise it always
> return -EINVAL.
> 
> Fixes: e2329eeba45f ("ASoC: soc-component: add soc_component_err()")
> Signed-off-by: Shengjiu Wang 
> ---

Oops, indeed.
Thank you for the patch.

But it seems these functions are also missing "return"
snd_soc_pcm_component_new()
snd_soc_pcm_component_sync_stop()

Can you please care these, too ?

Thank you for your help !!

Best regards
---
Kuninori Morimoto

Re: [PATCH 3/3 v3] usb: typec: tcpm: Stay in BIST mode till hardreset or unattached

2020-07-15 Thread Guenter Roeck

On 7/15/20 8:41 PM, Badhri Jagan Sridharan wrote:
> Port starts to toggle when transitioning to unattached state.
> This is incorrect while in BIST mode.
> 
> 6.4.3.1 BIST Carrier Mode
> Upon receipt of a BIST Message, with a BIST Carrier Mode BIST Data Object,
> the UUT Shall send out a continuous string of BMC encoded alternating "1"s
> and “0”s. The UUT Shall exit the Continuous BIST Mode within
> tBISTContMode of this Continuous BIST Mode being enabled(see
> Section 6.6.7.2).
> 
> 6.4.3.2 BIST Test Data
> Upon receipt of a BIST Message, with a BIST Test Data BIST Data Object,
> the UUT Shall return a GoodCRC Message and Shall enter a test mode in which
> it sends no further Messages except for GoodCRC Messages in response to
> received Messages. See Section 5.9.2 for the definition of the Test Data
> Frame. The test Shall be ended by sending Hard Reset Signaling to reset the
> UUT.
> 
> Signed-off-by: Badhri Jagan Sridharan 

Reviewed-by: Guenter Roeck 

> ---
> Version history:
> Changes since V1:
> - None
> 
> Changes since V2:(Guenter's suggestions)
> - Fixed formatting error
> - Reduced timeout to 50ms
> 
> ---
>  drivers/usb/typec/tcpm/tcpm.c | 7 +--
>  include/linux/usb/pd.h| 1 +
>  2 files changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
> index 379fcab9dbd973..38b958a9650104 100644
> --- a/drivers/usb/typec/tcpm/tcpm.c
> +++ b/drivers/usb/typec/tcpm/tcpm.c
> @@ -3559,6 +3559,8 @@ static void run_state_machine(struct tcpm_port *port)
>   switch (BDO_MODE_MASK(port->bist_request)) {
>   case BDO_MODE_CARRIER2:
>   tcpm_pd_transmit(port, TCPC_TX_BIST_MODE_2, NULL);
> + tcpm_set_state(port, unattached_state(port),
> +PD_T_BIST_CONT_MODE);
>   break;
>   case BDO_MODE_TESTDATA:
>   if (port->tcpc->set_bist_data) {
> @@ -3569,8 +3571,6 @@ static void run_state_machine(struct tcpm_port *port)
>   default:
>   break;
>   }
> - /* Always switch to unattached state */
> - tcpm_set_state(port, unattached_state(port), 0);
>   break;
>   case GET_STATUS_SEND:
>   tcpm_pd_send_control(port, PD_CTRL_GET_STATUS);
> @@ -3960,6 +3960,9 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port)
>  static void _tcpm_pd_hard_reset(struct tcpm_port *port)
>  {
>   tcpm_log_force(port, "Received hard reset");
> + if (port->bist_request == BDO_MODE_TESTDATA && 
> port->tcpc->set_bist_data)
> + port->tcpc->set_bist_data(port->tcpc, false);
> +
>   /*
>* If we keep receiving hard reset requests, executing the hard reset
>* must have failed. Revert to error recovery if that happens.
> diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h
> index a665d7f211424d..b6c233e79bd457 100644
> --- a/include/linux/usb/pd.h
> +++ b/include/linux/usb/pd.h
> @@ -483,4 +483,5 @@ static inline unsigned int rdo_max_power(u32 rdo)
>  #define PD_N_CAPS_COUNT  (PD_T_NO_RESPONSE / 
> PD_T_SEND_SOURCE_CAP)
>  #define PD_N_HARD_RESET_COUNT2
>  
> +#define PD_T_BIST_CONT_MODE  50 /* 30 - 60 ms */
>  #endif /* __LINUX_USB_PD_H */
>

Re: [RFC PATCH 4/7] x86: use exit_lazy_tlb rather than membarrier_mm_sync_core_before_usermode

2020-07-15 Thread Nicholas Piggin

Excerpts from Mathieu Desnoyers's message of July 14, 2020 12:13 am:
> - On Jul 13, 2020, at 9:47 AM, Nicholas Piggin npig...@gmail.com wrote:
> 
>> Excerpts from Nicholas Piggin's message of July 13, 2020 2:45 pm:
>>> Excerpts from Andy Lutomirski's message of July 11, 2020 3:04 am:
 Also, as it stands, I can easily see in_irq() ceasing to promise to
 serialize.  There are older kernels for which it does not promise to
 serialize.  And I have plans to make it stop serializing in the
 nearish future.
>>> 
>>> You mean x86's return from interrupt? Sounds fun... you'll konw where to
>>> update the membarrier sync code, at least :)
>> 
>> Oh, I should actually say Mathieu recently clarified a return from
>> interrupt doesn't fundamentally need to serialize in order to support
>> membarrier sync core.
> 
> Clarification to your statement:
> 
> Return from interrupt to kernel code does not need to be context serializing
> as long as kernel serializes before returning to user-space.
> 
> However, return from interrupt to user-space needs to be context serializing.

Hmm, I'm not sure it's enough even with the sync in the exit_lazy_tlb
in the right places.

A kernel thread does a use_mm, then it blocks and the user process with
the same mm runs on that CPU, and then it calls into the kernel, blocks,
the kernel thread runs again, another CPU issues a membarrier which does
not IPI this one because it's running a kthread, and then the kthread
switches back to the user process (still without having unused the mm),
and then the user process returns from syscall without having done a 
core synchronising instruction.

The cause of the problem is you want to avoid IPI'ing kthreads. Why?
I'm guessing it really only matters as an optimisation in case of idle
threads. Idle thread is easy (well, easier) because it won't use_mm, so 
you could check for rq->curr == rq->idle in your loop (in a suitable 
sched accessor function).

But... I'm not really liking this subtlety in the scheduler for all this 
(the scheduler still needs the barriers when switching out of idle).

Can it be improved somehow? Let me forget x86 core sync problem for now
(that _may_ be a bit harder), and step back and look at what we're doing.
The memory barrier case would actually suffer from the same problem as
core sync, because in the same situation it has no implicit mmdrop in
the scheduler switch code either.

So what are we doing with membarrier? We want any activity caused by the 
set of CPUs/threads specified that can be observed by this thread before 
calling membarrier is appropriately fenced from activity that can be 
observed to happen after the call returns.

CPU0 CPU1
 1. user stuff
a. membarrier()  2. enter kernel
b. read rq->curr 3. rq->curr switched to kthread
c. is kthread, skip IPI  4. switch_to kthread
d. return to user5. rq->curr switched to user thread
 6. switch_to user thread
 7. exit kernel
 8. more user stuff

As far as I can see, the problem is CPU1 might reorder step 5 and step
8, so you have mmdrop of lazy mm be a mb after step 6.

But why? The membarrier call only cares that there is a full barrier
between 1 and 8, right? Which it will get from the previous context
switch to the kthread.

I must say the memory barrier comments in membarrier could be improved
a bit (unless I'm missing where the main comment is). It's fine to know
what barriers pair with one another, but we need to know which exact
memory accesses it is ordering

   /*
 * Matches memory barriers around rq->curr modification in
 * scheduler.
 */

Sure, but it doesn't say what else is being ordered. I think it's just
the user memory accesses, but would be nice to make that a bit more
explicit. If we had such comments then we might know this case is safe.

I think the funny powerpc barrier is a similar case of this. If we
ever see remote_rq->curr->flags & PF_KTHREAD, then we _know_ that
CPU has or will have issued a memory barrier between running user
code.

So AFAIKS all this membarrier stuff in kernel/sched/core.c could
just go away. Except x86 because thread switch doesn't imply core
sync, so CPU1 between 1 and 8 may never issue a core sync instruction
the same way a context switch must be a full mb.

Before getting to x86 -- Am I right, or way off track here? 

Thanks,
Nick

Re: [PATCH v8 6/7] arm64: dts: add dts nodes for MT6779

2020-07-15 Thread Hanks Chen

On Tue, 2020-07-14 at 20:14 +0200, Matthias Brugger wrote:
> 
> On 14/07/2020 11:20, Hanks Chen wrote:
> > this adds initial MT6779 dts settings for board support,
> > including cpu, gic, timer, ccf, pinctrl, uart, sysirq...etc.
> > 
> > Signed-off-by: Hanks Chen 
> > ---
> >   arch/arm64/boot/dts/mediatek/Makefile   |   1 +
> >   arch/arm64/boot/dts/mediatek/mt6779-evb.dts |  31 +++
> >   arch/arm64/boot/dts/mediatek/mt6779.dtsi| 271 
> >   3 files changed, 303 insertions(+)
> >   create mode 100644 arch/arm64/boot/dts/mediatek/mt6779-evb.dts
> >   create mode 100644 arch/arm64/boot/dts/mediatek/mt6779.dtsi
> > 
> [...]
> > +
> > +   uart0: serial@11002000 {
> > +   compatible = "mediatek,mt6779-uart",
> > +"mediatek,mt6577-uart";
> > +   reg = <0 0x11002000 0 0x400>;
> > +   interrupts = ;
> > +   clocks = <>, <_ao CLK_INFRA_UART0>;
> > +   clock-names = "baud", "bus";
> > +   status = "disabled";
> > +   };
> > +
> > +   uart1: serial@11003000 {
> > +   compatible = "mediatek,mt6779-uart",
> > +"mediatek,mt6577-uart";
> > +   reg = <0 0x11003000 0 0x400>;
> > +   interrupts = ;
> > +   clocks = <>, <_ao CLK_INFRA_UART1>;
> > +   clock-names = "baud", "bus";
> > +   status = "disabled";
> > +   };
> > +
> > +   uart2: serial@11004000 {
> > +   compatible = "mediatek,mt6779-uart",
> > +"mediatek,mt6577-uart";
> > +   reg = <0 0x11004000 0 0x400>;
> > +   interrupts = ;
> > +   clocks = <>, <_ao CLK_INFRA_UART2>;
> > +   clock-names = "baud", "bus";
> > +   status = "disabled";
> > +   };
> 
> Devicetree describes the HW we have. As far as I know, we have 4 UARTs on 
> MT6779. So we should list them all here.
> 

Actually, We have only 3 UARTs HW on MT6779, but have 4 UART clk in
header file of clk.
CLK_INFRA_UART3 is a dummy clk interface, it has no effect on the
operation of the read/write instruction.

If you think it is not good, I can remove it in the header file of clk.

Thanks

> Regards,
> Matthias

[PATCH] ASoC: soc-component: Add missed return for snd_soc_pcm_component_mmap

2020-07-15 Thread Shengjiu Wang

Add missed return for snd_soc_pcm_component_mmap, otherwise it always
return -EINVAL.

Fixes: e2329eeba45f ("ASoC: soc-component: add soc_component_err()")
Signed-off-by: Shengjiu Wang 
---
 sound/soc/soc-component.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index af9909c5492f..cde7b9c609bb 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -705,7 +705,7 @@ int snd_soc_pcm_component_mmap(struct snd_pcm_substream 
*substream,
/* FIXME. it returns 1st mmap now */
for_each_rtd_components(rtd, i, component)
if (component->driver->mmap)
-   soc_component_ret(
+   return soc_component_ret(
component,
component->driver->mmap(component,
substream, vma));
-- 
2.27.0

[PATCH] net: smc91x: Fix possible memory leak in smc_drv_probe()

2020-07-15 Thread Wang Hai

If try_toggle_control_gpio() failed in smc_drv_probe(), free_netdev(ndev)
should be called to free the ndev created earlier. Otherwise, a memleak
will occur.

Fixes: 7d2911c43815 ("net: smc91x: Fix gpios for device tree based booting")
Reported-by: Hulk Robot 
Signed-off-by: Wang Hai 
---
 drivers/net/ethernet/smsc/smc91x.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smc91x.c 
b/drivers/net/ethernet/smsc/smc91x.c
index 90410f9d3b1a..1c4fea9c3ec4 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2274,7 +2274,7 @@ static int smc_drv_probe(struct platform_device *pdev)
ret = try_toggle_control_gpio(>dev, >power_gpio,
  "power", 0, 0, 100);
if (ret)
-   return ret;
+   goto out_free_netdev;
 
/*
 * Optional reset GPIO configured? Minimum 100 ns reset needed
@@ -2283,7 +2283,7 @@ static int smc_drv_probe(struct platform_device *pdev)
ret = try_toggle_control_gpio(>dev, >reset_gpio,
  "reset", 0, 0, 100);
if (ret)
-   return ret;
+   goto out_free_netdev;
 
/*
 * Need to wait for optional EEPROM to load, max 750 us 
according
-- 
2.17.1

Re: [PATCH v12 2/2] PCI: hip: Add handling of HiSilicon HIP PCIe controller errors

2020-07-15 Thread Yicong Yang

Hi Bjorn,

Thanks for the comments.


On 2020/7/15 5:10, Bjorn Helgaas wrote:
> [+cc Lorenzo]
>
> On Mon, Jul 13, 2020 at 03:10:19PM +0100, Shiju Jose wrote:
>> From: Yicong Yang 
>>
>> The HiSilicon HIP PCIe controller is capable of handling errors
>> on root port and perform port reset separately at each root port.
> s/perform/performing/ (to match "handling")
>
>> The driver placed in the drivers/pci/controller/ because the
>> HIP PCIe controller does not use DWC ip.
> s/ip/IP/

will fix these.


> +#define HISI_PCIE_LOCAL_VALID_ERR_MISC   9
> +
> +static guid_t hisi_pcie_sec_guid =
> + GUID_INIT(0xB2889FC9, 0xE7D7, 0x4F9D,
> +   0xA8, 0x67, 0xAF, 0x42, 0xE9, 0x8B, 0xE7, 0x72);
> +
> +/*
> + * We pass core id and core port id to the ACPI reset method to identify
> + * certain root port to reset, while the firmware reports sockets port
> + * id which occurs an error. Use the macros here to do the conversion
> Maybe: 
>
>   Firmware reports the socket port ID where the error occurred.  These
>   macros convert that to the core ID and core port ID required by the
>   ACPI reset method.
>
> But even that doesn't quite make sense because you apparently get two
> values (edata->core_id, edata->port_id) from firmware.

will reword the comments.

Actually We have got the socket_id from the firmware, and we use it to find the 
correct
error handler device on the same socket in hisi_pcie_notify_error(). As for
port id and core port id, the driver got the port id indexed per socket, but 
the firmware
needs the port id indexed per core to locate the right register, so we need 
these macros
to do the conversion.

>> + */
>> +#define HISI_PCIE_CORE_ID(v) ((v) >> 3)
>> +#define HISI_PCIE_PORT_ID(core, v)   (((v) >> 1) + ((core) << 3))
>> +#define HISI_PCIE_CORE_PORT_ID(v)(((v) & 7) << 1)
> These would make more sense reordered and with HISI_PCIE_PORT_ID()
> rewritten like this:
>
>   #define HISI_PCIE_PORT_ID(core, v)   (((core) << 3) | ((v) >> 1))
>   #define HISI_PCIE_CORE_ID(v) ((v) >> 3)
>   #define HISI_PCIE_CORE_PORT_ID(v)(((v) & 7) << 1)

will reorder these.

Regards,
Yicong


>
>> +
>> +struct hisi_pcie_error_data {
>> +u64 val_bits;
>> +u8  version;
>> +u8  soc_id;
>> +u8  socket_id;
>> +u8  nimbus_id;
>> +u8  sub_module_id;
>> +u8  core_id;
>> +u8  port_id;
>> +u8  err_severity;
>> +u16 err_type;
>> +u8  reserv[2];
>> +u32 err_misc[HISI_PCIE_ERR_MISC_REGS];
>> +};
>> +
>> +struct hisi_pcie_error_private {
>> +struct notifier_block   nb;
>> +struct device *dev;
>> +};
>> +
>> +enum hisi_pcie_submodule_id {
>> +HISI_PCIE_SUB_MODULE_ID_AP,
>> +HISI_PCIE_SUB_MODULE_ID_TL,
>> +HISI_PCIE_SUB_MODULE_ID_MAC,
>> +HISI_PCIE_SUB_MODULE_ID_DL,
>> +HISI_PCIE_SUB_MODULE_ID_SDI,
>> +};
>> +
>> +static const char * const hisi_pcie_sub_module[] = {
>> +[HISI_PCIE_SUB_MODULE_ID_AP]= "AP Layer",
>> +[HISI_PCIE_SUB_MODULE_ID_TL]= "TL Layer",
>> +[HISI_PCIE_SUB_MODULE_ID_MAC]   = "MAC Layer",
>> +[HISI_PCIE_SUB_MODULE_ID_DL]= "DL Layer",
>> +[HISI_PCIE_SUB_MODULE_ID_SDI]   = "SDI Layer",
>> +};
>> +
>> +enum hisi_pcie_err_severity {
>> +HISI_PCIE_ERR_SEV_RECOVERABLE,
>> +HISI_PCIE_ERR_SEV_FATAL,
>> +HISI_PCIE_ERR_SEV_CORRECTED,
>> +HISI_PCIE_ERR_SEV_NONE,
>> +};
>> +
>> +static const char * const hisi_pcie_error_sev[] = {
>> +[HISI_PCIE_ERR_SEV_RECOVERABLE] = "recoverable",
>> +[HISI_PCIE_ERR_SEV_FATAL]   = "fatal",
>> +[HISI_PCIE_ERR_SEV_CORRECTED]   = "corrected",
>> +[HISI_PCIE_ERR_SEV_NONE]= "none",
>> +};
>> +
>> +static const char *hisi_pcie_get_string(const char * const *array,
>> +size_t n, u32 id)
>> +{
>> +u32 index;
>> +
>> +for (index = 0; index < n; index++) {
>> +if (index == id && array[index])
>> +return array[index];
>> +}
>> +
>> +return "unknown";
>> +}
>> +
>> +static int hisi_pcie_port_reset(struct platform_device *pdev,
>> +u32 chip_id, u32 port_id)
>> +{
>> +struct device *dev = >dev;
>> +acpi_handle handle = ACPI_HANDLE(dev);
>> +union acpi_object arg[3];
>> +struct acpi_object_list arg_list;
>> +acpi_status s;
>> +unsigned long long data = 0;
>> +
>> +arg[0].type = ACPI_TYPE_INTEGER;
>> +arg[0].integer.value = chip_id;
>> +arg[1].type = ACPI_TYPE_INTEGER;
>> +arg[1].integer.value = HISI_PCIE_CORE_ID(port_id);
>> +arg[2].type = ACPI_TYPE_INTEGER;
>> +arg[2].integer.value = HISI_PCIE_CORE_PORT_ID(port_id);
>> +
>> +arg_list.count = 3;
>> +arg_list.pointer = arg;
>> +
>> +s = acpi_evaluate_integer(handle, "RST", _list, );
>> +if (ACPI_FAILURE(s)) {
>> +dev_err(dev, "No RST method\n");
>> +return -EIO;
>> +}
>> +
>> +if (data) {
>>

Re: [PATCH 3/9 v2 net-next] net: wimax: fix duplicate words in comments

2020-07-15 Thread Jakub Kicinski

On Wed, 15 Jul 2020 20:35:34 -0700 Randy Dunlap wrote:
> On 7/15/20 8:34 PM, Jakub Kicinski wrote:
> > On Wed, 15 Jul 2020 09:42:40 -0700 Randy Dunlap wrote:  
> >>  /*
> >> - * CPP sintatic sugar to generate A_B like symbol names when one of
> >> - * the arguments is a a preprocessor #define.
> >> + * CPP syntatic sugar to generate A_B like symbol names when one of  
> > 
> > synta*c*tic
> > 
> > Let me fix that up before applying.  
> 
> eww. Thanks.

Applied, pushed. Thanks!

[PATCH 0/3] KVM: VMX: Clean up RTIT MAXPHYADDR usage

2020-07-15 Thread Sean Christopherson

Stop using cpuid_query_maxphyaddr() for a random RTIT MSR check and
unexport said function to discourage future use.

Sean Christopherson (3):
  KVM: VMX: Use precomputed MAXPHYADDR for RTIT base MSR check
  KVM: VMX: Replace MSR_IA32_RTIT_OUTPUT_BASE_MASK with helper function
  KVM: x86: Unexport cpuid_query_maxphyaddr()

 arch/x86/kvm/cpuid.c   |  1 -
 arch/x86/kvm/vmx/vmx.c | 11 +++
 2 files changed, 7 insertions(+), 5 deletions(-)

-- 
2.26.0

[PATCH 3/3] KVM: x86: Unexport cpuid_query_maxphyaddr()

2020-07-15 Thread Sean Christopherson

Stop exporting cpuid_query_maxphyaddr() now that it's not being abused
by VMX.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/cpuid.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7d92854082a14..e4a8065fbddd7 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -188,7 +188,6 @@ int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
 not_found:
return 36;
 }
-EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);
 
 /* when an old userspace process fills a new kernel module */
 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
-- 
2.26.0

[PATCH 1/3] KVM: VMX: Use precomputed MAXPHYADDR for RTIT base MSR check

2020-07-15 Thread Sean Christopherson

Use cpuid_maxphyaddr() instead of cpuid_query_maxphyaddr() for the
RTIT base MSR check.  There is no reason to recompute MAXPHYADDR as the
precomputed version is synchronized with CPUID updates, and
MSR_IA32_RTIT_OUTPUT_BASE is not written between stuffing CPUID and
refreshing vcpu->arch.maxphyaddr.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/vmx/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1bb59ae5016dc..50b7e85d37352 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -146,7 +146,7 @@ module_param_named(preemption_timer, 
enable_preemption_timer, bool, S_IRUGO);
RTIT_STATUS_BYTECNT))
 
 #define MSR_IA32_RTIT_OUTPUT_BASE_MASK \
-   (~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f)
+   (~((1UL << cpuid_maxphyaddr(vcpu)) - 1) | 0x7f)
 
 /*
  * These 2 parameters are used to config the controls for Pause-Loop Exiting:
-- 
2.26.0

[PATCH 2/3] KVM: VMX: Replace MSR_IA32_RTIT_OUTPUT_BASE_MASK with helper function

2020-07-15 Thread Sean Christopherson

Replace the subtly not-a-constant MSR_IA32_RTIT_OUTPUT_BASE_MASK with a
proper helper function to check whether or not the specified base is
valid.  Blindly referencing the local 'vcpu' is especially nasty.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/vmx/vmx.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 50b7e85d37352..cf3c3562e843c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -145,9 +145,6 @@ module_param_named(preemption_timer, 
enable_preemption_timer, bool, S_IRUGO);
RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
RTIT_STATUS_BYTECNT))
 
-#define MSR_IA32_RTIT_OUTPUT_BASE_MASK \
-   (~((1UL << cpuid_maxphyaddr(vcpu)) - 1) | 0x7f)
-
 /*
  * These 2 parameters are used to config the controls for Pause-Loop Exiting:
  * ple_gap:upper bound on the amount of time between two successive
@@ -1036,6 +1033,12 @@ static inline bool pt_can_write_msr(struct vcpu_vmx *vmx)
   !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
 }
 
+static inline bool pt_output_base_valid(struct kvm_vcpu *vcpu, u64 base)
+{
+   /* The base must be 128-byte aligned and a legal physical address. */
+   return !(base & (~((1UL << cpuid_maxphyaddr(vcpu)) - 1) | 0x7f));
+}
+
 static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
 {
u32 i;
@@ -2193,7 +2196,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
!intel_pt_validate_cap(vmx->pt_desc.caps,
   PT_CAP_single_range_output))
return 1;
-   if (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK)
+   if (!pt_output_base_valid(vcpu, data))
return 1;
vmx->pt_desc.guest.output_base = data;
break;
-- 
2.26.0

Re: [PATCH v2 2/2] soc: mediatek: add mtk-devapc driver

2020-07-15 Thread Neal Liu

Hi Chun-Kuang,

On Thu, 2020-07-16 at 07:46 +0800, Chun-Kuang Hu wrote:
> HI, Neal:
> 
> Neal Liu  於 2020年7月9日 週四 下午5:13寫道：
> >
> > MediaTek bus fabric provides TrustZone security support and data
> > protection to prevent slaves from being accessed by unexpected
> > masters.
> > The security violation is logged and sent to the processor for
> > further analysis or countermeasures.
> >
> > Any occurrence of security violation would raise an interrupt, and
> > it will be handled by mtk-devapc driver. The violation
> > information is printed in order to find the murderer.
> >
> > Signed-off-by: Neal Liu 
> > ---
> 
> [snip]
> 
> > +
> > +/*
> > + * mtk_devapc_dump_vio_dbg - shift & dump the violation debug information.
> > + */
> > +static bool mtk_devapc_dump_vio_dbg(struct mtk_devapc_context *devapc_ctx,
> > +   int slave_type, int *vio_idx)
> > +{
> > +   const struct mtk_device_info **device_info;
> > +   u32 shift_bit;
> > +   int i;
> > +
> > +   device_info = devapc_ctx->device_info;
> > +
> > +   for (i = 0; i < get_vio_slave_num(slave_type); i++) {
> > +   *vio_idx = device_info[slave_type][i].vio_index;
> > +
> > +   if (check_vio_mask(devapc_ctx, slave_type, *vio_idx))
> > +   continue;
> 
> I guess if one vio_idx is masked, its status would never be true. If
> my guess is right, I think you could skip check_vio_mask() and
> directly check_vio_status().

No. Even if vio_idx is masked, vio_status will still raise when
violation is triggered.

> 
> > +
> > +   if (!check_vio_status(devapc_ctx, slave_type, *vio_idx))
> > +   continue;
> > +
> > +   shift_bit = get_shift_group(devapc_ctx, slave_type, 
> > *vio_idx);
> > +
> > +   if (!sync_vio_dbg(devapc_ctx, slave_type, shift_bit))
> > +   continue;
> > +
> > +   devapc_extract_vio_dbg(devapc_ctx, slave_type);
> > +
> > +   return true;
> 
> I think multiple vio_idx would violate at the same time, why just process one?

We process each vio_idx for each interrupt.
If there are multiple vio_idx is raised, it will trigger another
interrupt to handle it.

> 
> Regards,
> Chun-Kuang.
> 
> > +   }
> > +
> > +   return false;
> > +}

[PATCH 3/9] KVM: VMX: Drop a duplicate declaration of construct_eptp()

2020-07-15 Thread Sean Christopherson

Remove an extra declaration of construct_eptp() from vmx.h.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/vmx/vmx.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 0d06951e607ce..0e8d25b0cec35 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -537,8 +537,6 @@ static inline struct vmcs *alloc_vmcs(bool shadow)
  GFP_KERNEL_ACCOUNT);
 }
 
-u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
-
 static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
 {
vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
-- 
2.26.0

[PATCH 8/9] KVM: x86/mmu: Rename max_page_level to max_huge_page_level

2020-07-15 Thread Sean Christopherson

Rename max_page_level to explicitly call out that it tracks the max huge
page level so as to avoid confusion when a future patch moves the max
TDP level, i.e. max root level, into the MMU and kvm_configure_mmu().

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 559b4b92b5e27..c867b35759ab5 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -92,7 +92,7 @@ module_param_named(flush_on_reuse, 
force_flush_and_sync_on_reuse, bool, 0644);
  */
 bool tdp_enabled = false;
 
-static int max_page_level __read_mostly;
+static int max_huge_page_level __read_mostly;
 
 enum {
AUDIT_PRE_PAGE_FAULT,
@@ -3256,7 +3256,7 @@ static int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, 
gfn_t gfn,
if (!slot)
return PG_LEVEL_4K;
 
-   max_level = min(max_level, max_page_level);
+   max_level = min(max_level, max_huge_page_level);
for ( ; max_level > PG_LEVEL_4K; max_level--) {
linfo = lpage_info_slot(gfn, slot, max_level);
if (!linfo->disallow_lpage)
@@ -5580,23 +5580,23 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t 
gva, unsigned long pcid)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
 
-void kvm_configure_mmu(bool enable_tdp, int tdp_page_level)
+void kvm_configure_mmu(bool enable_tdp, int tdp_huge_page_level)
 {
tdp_enabled = enable_tdp;
 
/*
-* max_page_level reflects the capabilities of KVM's MMU irrespective
+* max_huge_page_level reflects KVM's MMU capabilities irrespective
 * of kernel support, e.g. KVM may be capable of using 1GB pages when
 * the kernel is not.  But, KVM never creates a page size greater than
 * what is used by the kernel for any given HVA, i.e. the kernel's
 * capabilities are ultimately consulted by kvm_mmu_hugepage_adjust().
 */
if (tdp_enabled)
-   max_page_level = tdp_page_level;
+   max_huge_page_level = tdp_huge_page_level;
else if (boot_cpu_has(X86_FEATURE_GBPAGES))
-   max_page_level = PG_LEVEL_1G;
+   max_huge_page_level = PG_LEVEL_1G;
else
-   max_page_level = PG_LEVEL_2M;
+   max_huge_page_level = PG_LEVEL_2M;
 }
 EXPORT_SYMBOL_GPL(kvm_configure_mmu);
 
-- 
2.26.0

[PATCH 2/3 v3] usb: typec: tcpci: Support BIST test data mode for compliance.

2020-07-15 Thread Badhri Jagan Sridharan

Quoting from TCPCI spec:
"Setting this bit to 1 is intended to be used only when a USB compliance
tester is using USB BIST Test Data to test the PHY layer of the TCPC. The
TCPM should clear this bit when a disconnect is detected.
0: Normal Operation. Incoming messages enabled by RECEIVE_DETECT
passed to TCPM via Alert.
1: BIST Test Mode. Incoming messages enabled by RECEIVE_DETECT
result in GoodCRC response but may not be passed to the TCPM via
Alert."

Signed-off-by: Badhri Jagan Sridharan 
Reviewed-by: Guenter Roeck 
---
Changes since V1:(Guenter's suggestions)
- Split the change into two: TCPM and TCPCI
- Move BIST log to TCPM log
- Alignment and column count changes

Changes since V2:(Guenter's suggestions)
- Re-ordered patchset
- Included Reviewed-by tag
---
 drivers/usb/typec/tcpm/tcpci.c | 9 +
 drivers/usb/typec/tcpm/tcpci.h | 1 +
 2 files changed, 10 insertions(+)

diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c
index 753645bb25273a..f57d91fd0e0924 100644
--- a/drivers/usb/typec/tcpm/tcpci.c
+++ b/drivers/usb/typec/tcpm/tcpci.c
@@ -227,6 +227,14 @@ static int tcpci_set_vconn(struct tcpc_dev *tcpc, bool 
enable)
enable ? TCPC_POWER_CTRL_VCONN_ENABLE : 0);
 }
 
+static int tcpci_set_bist_data(struct tcpc_dev *tcpc, bool enable)
+{
+   struct tcpci *tcpci = tcpc_to_tcpci(tcpc);
+
+   return regmap_update_bits(tcpci->regmap, TCPC_TCPC_CTRL, 
TCPC_TCPC_CTRL_BIST_TM,
+enable ? TCPC_TCPC_CTRL_BIST_TM : 0);
+}
+
 static int tcpci_set_roles(struct tcpc_dev *tcpc, bool attached,
   enum typec_role role, enum typec_data_role data)
 {
@@ -530,6 +538,7 @@ struct tcpci *tcpci_register_port(struct device *dev, 
struct tcpci_data *data)
tcpci->tcpc.set_pd_rx = tcpci_set_pd_rx;
tcpci->tcpc.set_roles = tcpci_set_roles;
tcpci->tcpc.pd_transmit = tcpci_pd_transmit;
+   tcpci->tcpc.set_bist_data = tcpci_set_bist_data;
 
err = tcpci_parse_config(tcpci);
if (err < 0)
diff --git a/drivers/usb/typec/tcpm/tcpci.h b/drivers/usb/typec/tcpm/tcpci.h
index 303ebde265465c..11c36d086c8608 100644
--- a/drivers/usb/typec/tcpm/tcpci.h
+++ b/drivers/usb/typec/tcpm/tcpci.h
@@ -36,6 +36,7 @@
 
 #define TCPC_TCPC_CTRL 0x19
 #define TCPC_TCPC_CTRL_ORIENTATION BIT(0)
+#define TCPC_TCPC_CTRL_BIST_TM BIT(1)
 
 #define TCPC_ROLE_CTRL 0x1a
 #define TCPC_ROLE_CTRL_DRP BIT(6)
-- 
2.27.0.389.gc38d7665816-goog

[PATCH 3/3 v3] usb: typec: tcpm: Stay in BIST mode till hardreset or unattached

2020-07-15 Thread Badhri Jagan Sridharan

Port starts to toggle when transitioning to unattached state.
This is incorrect while in BIST mode.

6.4.3.1 BIST Carrier Mode
Upon receipt of a BIST Message, with a BIST Carrier Mode BIST Data Object,
the UUT Shall send out a continuous string of BMC encoded alternating "1"s
and “0”s. The UUT Shall exit the Continuous BIST Mode within
tBISTContMode of this Continuous BIST Mode being enabled(see
Section 6.6.7.2).

6.4.3.2 BIST Test Data
Upon receipt of a BIST Message, with a BIST Test Data BIST Data Object,
the UUT Shall return a GoodCRC Message and Shall enter a test mode in which
it sends no further Messages except for GoodCRC Messages in response to
received Messages. See Section 5.9.2 for the definition of the Test Data
Frame. The test Shall be ended by sending Hard Reset Signaling to reset the
UUT.

Signed-off-by: Badhri Jagan Sridharan 
---
Version history:
Changes since V1:
- None

Changes since V2:(Guenter's suggestions)
- Fixed formatting error
- Reduced timeout to 50ms

---
 drivers/usb/typec/tcpm/tcpm.c | 7 +--
 include/linux/usb/pd.h| 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 379fcab9dbd973..38b958a9650104 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -3559,6 +3559,8 @@ static void run_state_machine(struct tcpm_port *port)
switch (BDO_MODE_MASK(port->bist_request)) {
case BDO_MODE_CARRIER2:
tcpm_pd_transmit(port, TCPC_TX_BIST_MODE_2, NULL);
+   tcpm_set_state(port, unattached_state(port),
+  PD_T_BIST_CONT_MODE);
break;
case BDO_MODE_TESTDATA:
if (port->tcpc->set_bist_data) {
@@ -3569,8 +3571,6 @@ static void run_state_machine(struct tcpm_port *port)
default:
break;
}
-   /* Always switch to unattached state */
-   tcpm_set_state(port, unattached_state(port), 0);
break;
case GET_STATUS_SEND:
tcpm_pd_send_control(port, PD_CTRL_GET_STATUS);
@@ -3960,6 +3960,9 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port)
 static void _tcpm_pd_hard_reset(struct tcpm_port *port)
 {
tcpm_log_force(port, "Received hard reset");
+   if (port->bist_request == BDO_MODE_TESTDATA && 
port->tcpc->set_bist_data)
+   port->tcpc->set_bist_data(port->tcpc, false);
+
/*
 * If we keep receiving hard reset requests, executing the hard reset
 * must have failed. Revert to error recovery if that happens.
diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h
index a665d7f211424d..b6c233e79bd457 100644
--- a/include/linux/usb/pd.h
+++ b/include/linux/usb/pd.h
@@ -483,4 +483,5 @@ static inline unsigned int rdo_max_power(u32 rdo)
 #define PD_N_CAPS_COUNT(PD_T_NO_RESPONSE / 
PD_T_SEND_SOURCE_CAP)
 #define PD_N_HARD_RESET_COUNT  2
 
+#define PD_T_BIST_CONT_MODE50 /* 30 - 60 ms */
 #endif /* __LINUX_USB_PD_H */
-- 
2.27.0.389.gc38d7665816-goog

[PATCH 6/9] KVM: VXM: Remove temporary WARN on expected vs. actual EPTP level mismatch

2020-07-15 Thread Sean Christopherson

Remove the WARN in vmx_load_mmu_pgd() that was temporarily added to aid
bisection/debug in the event the current MMU's shadow root level didn't
match VMX's computed EPTP level.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/vmx/vmx.c | 10 --
 1 file changed, 10 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 244053cff0a3a..da75878171cea 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3072,14 +3072,6 @@ static int vmx_get_tdp_level(struct kvm_vcpu *vcpu)
return 4;
 }
 
-static int get_ept_level(struct kvm_vcpu *vcpu)
-{
-   if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
-   return vmx_eptp_page_walk_level(nested_ept_get_eptp(vcpu));
-
-   return vmx_get_tdp_level(vcpu);
-}
-
 u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa,
   int root_level)
 {
@@ -3104,8 +3096,6 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, 
unsigned long pgd,
u64 eptp;
 
if (enable_ept) {
-   WARN_ON(pgd_level != get_ept_level(vcpu));
-
eptp = construct_eptp(vcpu, pgd, pgd_level);
vmcs_write64(EPT_POINTER, eptp);
 
-- 
2.26.0

[PATCH 4/9] KVM: VMX: Make vmx_load_mmu_pgd() static

2020-07-15 Thread Sean Christopherson

Make vmx_load_mmu_pgd() static as it is no longer invoked directly by
nested VMX (or any code for that matter).

No functional change intended.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/vmx/vmx.c | 2 +-
 arch/x86/kvm/vmx/vmx.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1bb59ae5016dc..791baa73e5786 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3092,7 +3092,7 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long 
root_hpa)
return eptp;
 }
 
-void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd)
+static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd)
 {
struct kvm *kvm = vcpu->kvm;
bool update_guest_cr3 = true;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 0e8d25b0cec35..3c55433ac1b21 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -338,7 +338,6 @@ void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer);
 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
-void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long cr3);
 void ept_save_pdptrs(struct kvm_vcpu *vcpu);
 void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
-- 
2.26.0

[PATCH 0/9] KVM: x86: TDP level cleanups and shadow NPT fix

2020-07-15 Thread Sean Christopherson

The primary purpose of this series is to implement a suggestion from Paolo
to have the MMU make the decision between 4 and 5 level EPT/TDP (when
5-level page tables are supported).  Having the MMU "own" the decision of
whether or not to use 5-level paging leads to a variety of nice cleanups,
and ultimately gets rid of another kvm_x86_ops.

Patch 1 is a fix for SVM's shadow NPT that is compile tested only.  I
don't know enough about the shadow NPT details to know if it's a "real"
bug or just a supericial oddity that can't actually cause problems.

"Remove temporary WARN on expected vs. actual EPTP level mismatch" could
easily be squashed with "Pull the PGD's level from the MMU instead of
recalculating it", I threw it in as a separate patch to provide a
bisection helper in case things go sideways.

Sean Christopherson (9):
  KVM: nSVM: Correctly set the shadow NPT root level in its MMU role
  KVM: x86/mmu: Add separate helper for shadow NPT root page role calc
  KVM: VMX: Drop a duplicate declaration of construct_eptp()
  KVM: VMX: Make vmx_load_mmu_pgd() static
  KVM: x86: Pull the PGD's level from the MMU instead of recalculating
it
  KVM: VXM: Remove temporary WARN on expected vs. actual EPTP level
mismatch
  KVM: x86: Dynamically calculate TDP level from max level and
MAXPHYADDR
  KVM: x86/mmu: Rename max_page_level to max_huge_page_level
  KVM: x86: Specify max TDP level via kvm_configure_mmu()

 arch/x86/include/asm/kvm_host.h |  9 ++---
 arch/x86/kvm/cpuid.c|  2 --
 arch/x86/kvm/mmu.h  | 10 --
 arch/x86/kvm/mmu/mmu.c  | 63 +
 arch/x86/kvm/svm/nested.c   |  1 -
 arch/x86/kvm/svm/svm.c  |  8 ++---
 arch/x86/kvm/vmx/nested.c   |  2 +-
 arch/x86/kvm/vmx/vmx.c  | 31 +++-
 arch/x86/kvm/vmx/vmx.h  |  6 ++--
 arch/x86/kvm/x86.c  |  1 -
 10 files changed, 81 insertions(+), 52 deletions(-)

-- 
2.26.0

[PATCH 7/9] KVM: x86: Dynamically calculate TDP level from max level and MAXPHYADDR

2020-07-15 Thread Sean Christopherson

Calculate the desired TDP level on the fly using the max TDP level and
MAXPHYADDR instead of doing the same when CPUID is updated.  This avoids
the hidden dependency on cpuid_maxphyaddr() in vmx_get_tdp_level() and
also standardizes the "use 5-level paging iff MAXPHYADDR > 48" behavior
across x86.

Suggested-by: Paolo Bonzini 
Signed-off-by: Sean Christopherson 
---
 arch/x86/include/asm/kvm_host.h |  4 ++--
 arch/x86/kvm/cpuid.c|  2 --
 arch/x86/kvm/mmu/mmu.c  | 17 +
 arch/x86/kvm/svm/svm.c  |  4 ++--
 arch/x86/kvm/vmx/vmx.c  |  6 +++---
 arch/x86/kvm/x86.c  |  2 +-
 6 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ce60f4c38843f..ffd45b68e1d46 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -639,7 +639,7 @@ struct kvm_vcpu_arch {
struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
 
int maxphyaddr;
-   int tdp_level;
+   int max_tdp_level;
 
/* emulate context */
 
@@ -1133,7 +1133,7 @@ struct kvm_x86_ops {
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
-   int (*get_tdp_level)(struct kvm_vcpu *vcpu);
+   int (*get_max_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 
void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long pgd,
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7d92854082a14..fa873e3e6e90e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -140,9 +140,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.guest_supported_xcr0 =
(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
 
-   /* Note, maxphyaddr must be updated before tdp_level. */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
-   vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu);
kvm_mmu_reset_context(vcpu);
 
kvm_pmu_refresh(vcpu);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0fb033ce6cc57..559b4b92b5e27 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4846,13 +4846,22 @@ static union kvm_mmu_role 
kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
return role;
 }
 
+static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
+{
+   /* Use 5-level TDP if and only if it's useful/necessary. */
+   if (vcpu->arch.max_tdp_level == 5 && cpuid_maxphyaddr(vcpu) <= 48)
+   return 4;
+
+   return vcpu->arch.max_tdp_level;
+}
+
 static union kvm_mmu_role
 kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
 {
union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
 
role.base.ad_disabled = (shadow_accessed_mask == 0);
-   role.base.level = vcpu->arch.tdp_level;
+   role.base.level = kvm_mmu_get_tdp_level(vcpu);
role.base.direct = true;
role.base.gpte_is_8_bytes = true;
 
@@ -4873,7 +4882,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->sync_page = nonpaging_sync_page;
context->invlpg = NULL;
context->update_pte = nonpaging_update_pte;
-   context->shadow_root_level = vcpu->arch.tdp_level;
+   context->shadow_root_level = kvm_mmu_get_tdp_level(vcpu);
context->direct_map = true;
context->get_guest_pgd = get_cr3;
context->get_pdptr = kvm_pdptr_read;
@@ -4973,7 +4982,7 @@ kvm_calc_shadow_npt_root_page_role(struct kvm_vcpu *vcpu)
kvm_calc_shadow_root_page_role_common(vcpu, false);
 
role.base.direct = false;
-   role.base.level = vcpu->arch.tdp_level;
+   role.base.level = kvm_mmu_get_tdp_level(vcpu);
 
return role;
 }
@@ -5683,7 +5692,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct 
kvm_mmu *mmu)
 * SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can
 * skip allocating the PDP table.
 */
-   if (tdp_enabled && vcpu->arch.tdp_level > PT32E_ROOT_LEVEL)
+   if (tdp_enabled && kvm_mmu_get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
return 0;
 
page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c70d7dd333061..c94faca46e760 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -254,7 +254,7 @@ static inline void invlpga(unsigned long addr, u32 asid)
asm volatile (__ex("invlpga %1, %0") : : "c"(asid), "a"(addr));
 }
 
-static int get_npt_level(struct kvm_vcpu *vcpu)
+static int get_max_npt_level(void)
 {
 #ifdef CONFIG_X86_64
return PT64_ROOT_4LEVEL;
@@ -4109,7 +4109,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 
.set_tss_addr =

[PATCH 5/9] KVM: x86: Pull the PGD's level from the MMU instead of recalculating it

2020-07-15 Thread Sean Christopherson

Use the shadow_root_level from the current MMU as the root level for the
PGD, i.e. for VMX's EPTP.  This eliminates the weird dependency between
VMX and the MMU where both must independently calculate the same root
level for things to work correctly.  Temporarily keep VMX's calculation
of the level and use it to WARN if the incoming level diverges.

Opportunistically refactor kvm_mmu_load_pgd() to avoid indentation hell,
and rename a 'cr3' param in the load_mmu_pgd prototype that managed to
survive the cr3 purge.

No functional change intended.

Signed-off-by: Sean Christopherson 
---
 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/kvm/mmu.h  | 10 +++---
 arch/x86/kvm/svm/svm.c  |  3 ++-
 arch/x86/kvm/vmx/nested.c   |  2 +-
 arch/x86/kvm/vmx/vmx.c  | 18 --
 arch/x86/kvm/vmx/vmx.h  |  3 ++-
 6 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1bab87a444d78..ce60f4c38843f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1136,7 +1136,8 @@ struct kvm_x86_ops {
int (*get_tdp_level)(struct kvm_vcpu *vcpu);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 
-   void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long cr3);
+   void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long pgd,
+int pgd_level);
 
bool (*has_wbinvd_exit)(void);
 
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 9f6554613babc..5efc6081ca138 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -90,9 +90,13 @@ static inline unsigned long kvm_get_active_pcid(struct 
kvm_vcpu *vcpu)
 
 static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
 {
-   if (VALID_PAGE(vcpu->arch.mmu->root_hpa))
-   kvm_x86_ops.load_mmu_pgd(vcpu, vcpu->arch.mmu->root_hpa |
-  kvm_get_active_pcid(vcpu));
+   u64 root_hpa = vcpu->arch.mmu->root_hpa;
+
+   if (!VALID_PAGE(root_hpa))
+   return;
+
+   kvm_x86_ops.load_mmu_pgd(vcpu, root_hpa | kvm_get_active_pcid(vcpu),
+vcpu->arch.mmu->shadow_root_level);
 }
 
 int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 783330d0e7b88..c70d7dd333061 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3541,7 +3541,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu 
*vcpu)
return exit_fastpath;
 }
 
-static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root)
+static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root,
+int root_level)
 {
struct vcpu_svm *svm = to_svm(vcpu);
unsigned long cr3;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 4d561edf6f9ca..50b56622e16a6 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2162,7 +2162,7 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx 
*vmx)
 * consistency checks.
 */
if (enable_ept && nested_early_check)
-   vmcs_write64(EPT_POINTER, construct_eptp(>vcpu, 0));
+   vmcs_write64(EPT_POINTER, construct_eptp(>vcpu, 0, 4));
 
/* All VMFUNCs are currently emulated through L0 vmexits.  */
if (cpu_has_vmx_vmfunc())
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 791baa73e5786..244053cff0a3a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2933,14 +2933,16 @@ static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
 
 static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
 {
-   u64 root_hpa = vcpu->arch.mmu->root_hpa;
+   struct kvm_mmu *mmu = vcpu->arch.mmu;
+   u64 root_hpa = mmu->root_hpa;
 
/* No flush required if the current context is invalid. */
if (!VALID_PAGE(root_hpa))
return;
 
if (enable_ept)
-   ept_sync_context(construct_eptp(vcpu, root_hpa));
+   ept_sync_context(construct_eptp(vcpu, root_hpa,
+   mmu->shadow_root_level));
else if (!is_guest_mode(vcpu))
vpid_sync_context(to_vmx(vcpu)->vpid);
else
@@ -3078,11 +3080,12 @@ static int get_ept_level(struct kvm_vcpu *vcpu)
return vmx_get_tdp_level(vcpu);
 }
 
-u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
+u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa,
+  int root_level)
 {
u64 eptp = VMX_EPTP_MT_WB;
 
-   eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
+   eptp |= (root_level == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
 
if (enable_ept_ad_bits &&
(!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
@@ -3092,7 +3095,8

[PATCH 2/9] KVM: x86/mmu: Add separate helper for shadow NPT root page role calc

2020-07-15 Thread Sean Christopherson

Refactor the shadow NPT role calculation into a separate helper to
better differentiate it from the non-nested shadow MMU, e.g. the NPT
variant is never direct and derives its root level from the TDP level.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c | 30 +-
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 678b6209dad50..0fb033ce6cc57 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4908,7 +4908,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 }
 
 static union kvm_mmu_role
-kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
+kvm_calc_shadow_root_page_role_common(struct kvm_vcpu *vcpu, bool base_only)
 {
union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
 
@@ -4916,9 +4916,19 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu 
*vcpu, bool base_only)
!is_write_protection(vcpu);
role.base.smap_andnot_wp = role.ext.cr4_smap &&
!is_write_protection(vcpu);
-   role.base.direct = !is_paging(vcpu);
role.base.gpte_is_8_bytes = !!is_pae(vcpu);
 
+   return role;
+}
+
+static union kvm_mmu_role
+kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
+{
+   union kvm_mmu_role role =
+   kvm_calc_shadow_root_page_role_common(vcpu, base_only);
+
+   role.base.direct = !is_paging(vcpu);
+
if (!is_long_mode(vcpu))
role.base.level = PT32E_ROOT_LEVEL;
else if (is_la57_mode(vcpu))
@@ -4956,14 +4966,24 @@ static void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, 
u32 cr0, u32 cr4, u32 efe
shadow_mmu_init_context(vcpu, context, cr0, cr4, efer, 
new_role);
 }
 
+static union kvm_mmu_role
+kvm_calc_shadow_npt_root_page_role(struct kvm_vcpu *vcpu)
+{
+   union kvm_mmu_role role =
+   kvm_calc_shadow_root_page_role_common(vcpu, false);
+
+   role.base.direct = false;
+   role.base.level = vcpu->arch.tdp_level;
+
+   return role;
+}
+
 void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer,
 gpa_t nested_cr3)
 {
struct kvm_mmu *context = >arch.guest_mmu;
-   union kvm_mmu_role new_role =
-   kvm_calc_shadow_mmu_root_page_role(vcpu, false);
+   union kvm_mmu_role new_role = kvm_calc_shadow_npt_root_page_role(vcpu);
 
-   new_role.base.level = vcpu->arch.tdp_level;
context->shadow_root_level = new_role.base.level;
 
__kvm_mmu_new_pgd(vcpu, nested_cr3, new_role.base, false, false);
-- 
2.26.0

[PATCH 1/3 v3] usb: typec: tcpm: Support bist test data mode for compliance

2020-07-15 Thread Badhri Jagan Sridharan

TCPM supports BIST carried mode. PD compliance tests require
BIST Test Data to be supported as well.

Introducing set_bist_data callback to signal tcpc driver for
configuring the port controller hardware to enable/disable
BIST Test Data mode.

Signed-off-by: Badhri Jagan Sridharan 
Reviewed-by: Guenter Roeck 
---
Version history:
Changes since V1:(Guenter's suggestions)
- Split the change into two: TCPM and TCPCI
- Move BIST log to TCPM log

Changes since V2:(Guenter's suggestions)
- Re-ordered patchset
- Included Reviewed-by tag
---
 drivers/usb/typec/tcpm/tcpm.c | 11 +++
 include/linux/usb/tcpm.h  |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 82b19ebd7838e0..379fcab9dbd973 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -2746,6 +2746,11 @@ static void tcpm_detach(struct tcpm_port *port)
if (!port->attached)
return;
 
+   if (port->tcpc->set_bist_data) {
+   tcpm_log(port, "disable BIST MODE TESTDATA");
+   port->tcpc->set_bist_data(port->tcpc, false);
+   }
+
if (tcpm_port_is_disconnected(port))
port->hard_reset_count = 0;
 
@@ -3555,6 +3560,12 @@ static void run_state_machine(struct tcpm_port *port)
case BDO_MODE_CARRIER2:
tcpm_pd_transmit(port, TCPC_TX_BIST_MODE_2, NULL);
break;
+   case BDO_MODE_TESTDATA:
+   if (port->tcpc->set_bist_data) {
+   tcpm_log(port, "Enable BIST MODE TESTDATA");
+   port->tcpc->set_bist_data(port->tcpc, true);
+   }
+   break;
default:
break;
}
diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h
index e7979c01c3517c..89f58760cf4800 100644
--- a/include/linux/usb/tcpm.h
+++ b/include/linux/usb/tcpm.h
@@ -79,6 +79,7 @@ enum tcpm_transmit_type {
  * @try_role:  Optional; called to set a preferred role
  * @pd_transmit:Called to transmit PD message
  * @mux:   Pointer to multiplexer data
+ * @set_bist_data: Turn on/off bist data mode for compliance testing
  */
 struct tcpc_dev {
struct fwnode_handle *fwnode;
@@ -103,6 +104,7 @@ struct tcpc_dev {
int (*try_role)(struct tcpc_dev *dev, int role);
int (*pd_transmit)(struct tcpc_dev *dev, enum tcpm_transmit_type type,
   const struct pd_message *msg);
+   int (*set_bist_data)(struct tcpc_dev *dev, bool on);
 };
 
 struct tcpm_port;
-- 
2.27.0.389.gc38d7665816-goog

[PATCH 1/9] KVM: nSVM: Correctly set the shadow NPT root level in its MMU role

2020-07-15 Thread Sean Christopherson

Move the initialization of shadow NPT MMU's shadow_root_level into
kvm_init_shadow_npt_mmu() and explicitly set the level in the shadow NPT
MMU's role to be the TDP level.  This ensures the role and MMU levels
are synchronized and also initialized before __kvm_mmu_new_pgd(), which
consumes the level when attempting a fast PGD switch.

Cc: Vitaly Kuznetsov 
Fixes: 9fa72119b24db ("kvm: x86: Introduce kvm_mmu_calc_root_page_role()")
Fixes: a506fdd223426 ("KVM: nSVM: implement nested_svm_load_cr3() and use it 
for host->guest switch")
Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c| 3 +++
 arch/x86/kvm/svm/nested.c | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 77810ce66bdb4..678b6209dad50 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4963,6 +4963,9 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, u32 
cr0, u32 cr4, u32 efer,
union kvm_mmu_role new_role =
kvm_calc_shadow_mmu_root_page_role(vcpu, false);
 
+   new_role.base.level = vcpu->arch.tdp_level;
+   context->shadow_root_level = new_role.base.level;
+
__kvm_mmu_new_pgd(vcpu, nested_cr3, new_role.base, false, false);
 
if (new_role.as_u64 != context->mmu_role.as_u64)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 61378a3c2ce44..fb68467e60496 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -85,7 +85,6 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
-   vcpu->arch.mmu->shadow_root_level = vcpu->arch.tdp_level;
reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
vcpu->arch.walk_mmu  = >arch.nested_mmu;
 }
-- 
2.26.0

[PATCH 9/9] KVM: x86: Specify max TDP level via kvm_configure_mmu()

2020-07-15 Thread Sean Christopherson

Capture the max TDP level during kvm_configure_mmu() instead of using a
kvm_x86_ops hook to do it at every vCPU creation.

Signed-off-by: Sean Christopherson 
---
 arch/x86/include/asm/kvm_host.h | 4 ++--
 arch/x86/kvm/mmu/mmu.c  | 9 ++---
 arch/x86/kvm/svm/svm.c  | 3 +--
 arch/x86/kvm/vmx/vmx.c  | 3 +--
 arch/x86/kvm/x86.c  | 1 -
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ffd45b68e1d46..5ab3af7275d81 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1133,7 +1133,6 @@ struct kvm_x86_ops {
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
-   int (*get_max_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 
void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long pgd,
@@ -1509,7 +1508,8 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t 
gva, unsigned long pcid);
 void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
 bool skip_mmu_sync);
 
-void kvm_configure_mmu(bool enable_tdp, int tdp_page_level);
+void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
+  int tdp_huge_page_level);
 
 static inline u16 kvm_read_ldt(void)
 {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c867b35759ab5..862bf418214e2 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -93,6 +93,7 @@ module_param_named(flush_on_reuse, 
force_flush_and_sync_on_reuse, bool, 0644);
 bool tdp_enabled = false;
 
 static int max_huge_page_level __read_mostly;
+static int max_tdp_level __read_mostly;
 
 enum {
AUDIT_PRE_PAGE_FAULT,
@@ -4849,10 +4850,10 @@ static union kvm_mmu_role 
kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
 static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
 {
/* Use 5-level TDP if and only if it's useful/necessary. */
-   if (vcpu->arch.max_tdp_level == 5 && cpuid_maxphyaddr(vcpu) <= 48)
+   if (max_tdp_level == 5 && cpuid_maxphyaddr(vcpu) <= 48)
return 4;
 
-   return vcpu->arch.max_tdp_level;
+   return max_tdp_level;
 }
 
 static union kvm_mmu_role
@@ -5580,9 +5581,11 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t 
gva, unsigned long pcid)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
 
-void kvm_configure_mmu(bool enable_tdp, int tdp_huge_page_level)
+void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
+  int tdp_huge_page_level)
 {
tdp_enabled = enable_tdp;
+   max_tdp_level = tdp_max_root_level;
 
/*
 * max_huge_page_level reflects KVM's MMU capabilities irrespective
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c94faca46e760..5f47b44c5c324 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -885,7 +885,7 @@ static __init int svm_hardware_setup(void)
if (npt_enabled && !npt)
npt_enabled = false;
 
-   kvm_configure_mmu(npt_enabled, PG_LEVEL_1G);
+   kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
 
if (nrips) {
@@ -4109,7 +4109,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 
.set_tss_addr = svm_set_tss_addr,
.set_identity_map_addr = svm_set_identity_map_addr,
-   .get_max_tdp_level = get_max_npt_level,
.get_mt_mask = svm_get_mt_mask,
 
.get_exit_info = svm_get_exit_info,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c0b1c7bd1248a..a70d8f6d8aba7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7959,7 +7959,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 
.set_tss_addr = vmx_set_tss_addr,
.set_identity_map_addr = vmx_set_identity_map_addr,
-   .get_max_tdp_level = vmx_get_max_tdp_level,
.get_mt_mask = vmx_get_mt_mask,
 
.get_exit_info = vmx_get_exit_info,
@@ -8110,7 +8109,7 @@ static __init int hardware_setup(void)
ept_lpage_level = PG_LEVEL_2M;
else
ept_lpage_level = PG_LEVEL_4K;
-   kvm_configure_mmu(enable_ept, ept_lpage_level);
+   kvm_configure_mmu(enable_ept, vmx_get_max_tdp_level(), ept_lpage_level);
 
/*
 * Only enable PML when hardware supports PML feature, and both EPT
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b8347d703430..831179adedaa9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9520,7 +9520,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
fx_init(vcpu);
 
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
-   vcpu->arch.max_tdp_level = kvm_x86_ops.get_max_tdp_level();

Re: [PATCH net-next] net: phy: sfp: Cotsworks SFF module EEPROM fixup

2020-07-15 Thread Florian Fainelli




On 7/15/2020 8:32 PM, Chris Healy wrote:
> 
> 
> On Wed, Jul 15, 2020 at 8:10 PM Florian Fainelli  > wrote:
> 
> 
> 
> On 7/14/2020 10:59 AM, Chris Healy wrote:
> > Some Cotsworks SFF have invalid data in the first few bytes of the
> > module EEPROM.  This results in these modules not being detected as
> > valid modules.
> >
> > Address this by poking the correct EEPROM values into the module
> > EEPROM when the model/PN match and the existing module EEPROM contents
> > are not correct.
> >
> > Signed-off-by: Chris Healy  >
> > ---
> >  drivers/net/phy/sfp.c | 44
> +++
> >  1 file changed, 44 insertions(+)
> >
> > diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
> > index 73c2969f11a4..2737d9b6b0ae 100644
> > --- a/drivers/net/phy/sfp.c
> > +++ b/drivers/net/phy/sfp.c
> > @@ -1632,10 +1632,43 @@ static int sfp_sm_mod_hpower(struct sfp
> *sfp, bool enable)
> >       return 0;
> >  }
> > 
> > +static int sfp_cotsworks_fixup_check(struct sfp *sfp, struct
> sfp_eeprom_id *id)
> > +{
> > +     u8 check;
> > +     int err;
> > +
> > +     if (id->base.phys_id != SFF8024_ID_SFF_8472 ||
> > +         id->base.phys_ext_id != SFP_PHYS_EXT_ID_SFP ||
> > +         id->base.connector != SFF8024_CONNECTOR_LC) {
> > +             dev_warn(sfp->dev, "Rewriting fiber module EEPROM
> with corrected values\n");
> > +             id->base.phys_id = SFF8024_ID_SFF_8472;
> > +             id->base.phys_ext_id = SFP_PHYS_EXT_ID_SFP;
> > +             id->base.connector = SFF8024_CONNECTOR_LC;
> > +             err = sfp_write(sfp, false, SFP_PHYS_ID, >base, 3);
> > +             if (err != 3) {
> > +                     dev_err(sfp->dev, "Failed to rewrite module
> EEPROM: %d\n", err);
> > +                     return err;
> > +             }
> > +
> > +             /* Cotsworks modules have been found to require a
> delay between write operations. */
> > +             mdelay(50);
> > +
> > +             /* Update base structure checksum */
> > +             check = sfp_check(>base, sizeof(id->base) - 1);
> > +             err = sfp_write(sfp, false, SFP_CC_BASE, , 1);
> > +             if (err != 1) {
> > +                     dev_err(sfp->dev, "Failed to update base
> structure checksum in fiber module EEPROM: %d\n", err);
> > +                     return err;
> > +             }
> > +     }
> > +     return 0;
> > +}
> > +
> >  static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
> >  {
> >       /* SFP module inserted - read I2C data */
> >       struct sfp_eeprom_id id;
> > +     bool cotsworks_sfbg;
> >       bool cotsworks;
> >       u8 check;
> >       int ret;
> > @@ -1657,6 +1690,17 @@ static int sfp_sm_mod_probe(struct sfp
> *sfp, bool report)
> >        * serial number and date code.
> >        */
> >       cotsworks = !memcmp(id.base.vendor_name, "COTSWORKS       ",
> 16);
> > +     cotsworks_sfbg = !memcmp(id.base.vendor_pn, "SFBG", 4);
> > +
> > +     /* Cotsworks SFF module EEPROM do not always have valid phys_id,
> > +      * phys_ext_id, and connector bytes.  Rewrite SFF EEPROM
> bytes if
> > +      * Cotsworks PN matches and bytes are not correct.
> > +      */
> > +     if (cotsworks && cotsworks_sfbg) {
> > +             ret = sfp_cotsworks_fixup_check(sfp, );
> > +             if (ret < 0)
> > +                     return ret;
> > +     }
> 
> So with the fixup you introduce, should we ever go into a situation
> where:
> 
> EPROM extended structure checksum failure
> 
> is printed?
> 
> 
> From what I've been told, Cotsworks had an ordering problem where both
> the base and extended checksums were being programmed before other
> fields were programmed during manufacturing resulting in both the base
> and extended checksums being incorrect.  (I've also heard that Cotsworks
> has resolved this issue late last year for all new units but units
> manufactured before late last year will have incorrect checksums.)
> 
> Given that I was touching the base structure in this patch, I felt that
> updating the base checksum was warranted.  I did not consider updating
> the extended structure checksum as I wasn't changing anything else with
> the extended structure.  As such, we would still have an invalid
> extended structure checksum and get the associated error message.

That makes sense and thanks for providing the context here!
-- 
Florian

Re: [TEGRA194_CPUFREQ PATCH v6 3/3] cpufreq: Add Tegra194 cpufreq driver

2020-07-15 Thread Viresh Kumar

On 15-07-20, 20:57, Sumit Gupta wrote:
> Sorry, missed to remove this. Will wait if any other comments before
> re-spin.

I don't have any further comments, maybe just send a new version of
this patch alone and name it v6.1.

-- 
viresh

Re: [PATCH 3/9 v2 net-next] net: wimax: fix duplicate words in comments

2020-07-15 Thread Randy Dunlap

On 7/15/20 8:34 PM, Jakub Kicinski wrote:
> On Wed, 15 Jul 2020 09:42:40 -0700 Randy Dunlap wrote:
>>  /*
>> - * CPP sintatic sugar to generate A_B like symbol names when one of
>> - * the arguments is a a preprocessor #define.
>> + * CPP syntatic sugar to generate A_B like symbol names when one of
> 
> synta*c*tic
> 
> Let me fix that up before applying.

eww. Thanks.

>> + * the arguments is a preprocessor #define.
>>   */
> 


-- 
~Randy

Re: [PATCH 3/9 v2 net-next] net: wimax: fix duplicate words in comments

2020-07-15 Thread Jakub Kicinski

On Wed, 15 Jul 2020 09:42:40 -0700 Randy Dunlap wrote:
>  /*
> - * CPP sintatic sugar to generate A_B like symbol names when one of
> - * the arguments is a a preprocessor #define.
> + * CPP syntatic sugar to generate A_B like symbol names when one of

synta*c*tic

Let me fix that up before applying.

> + * the arguments is a preprocessor #define.
>   */

Re: [PATCH 1/1] staging: android: ashmem: Fix lockdep warning for write operation

2020-07-15 Thread Eric Biggers

On Wed, Jul 15, 2020 at 07:45:27PM -0700, Suren Baghdasaryan wrote:
> syzbot report [1] describes a deadlock when write operation against an
> ashmem fd executed at the time when ashmem is shrinking its cache results
> in the following lock sequence:
> 
> Possible unsafe locking scenario:
> 
> CPU0CPU1
> 
>lock(fs_reclaim);
> lock(>s_type->i_mutex_key#13);
> lock(fs_reclaim);
>lock(>s_type->i_mutex_key#13);
> 
> kswapd takes fs_reclaim and then inode_lock while generic_perform_write
> takes inode_lock and then fs_reclaim. However ashmem does not support
> writing into backing shmem with a write syscall. The only way to change
> its content is to mmap it and operate on mapped memory. Therefore the race
> that lockdep is warning about is not valid. Resolve this by introducing a
> separate lockdep class for the backing shmem inodes.
> 
> [1]: https://lkml.kernel.org/lkml/0b5f9d059aa20...@google.com/
> 
> Signed-off-by: Suren Baghdasaryan 

Please add proper tags:

Reported-by: syzbot+7a0d9d0b26efefe61...@syzkaller.appspotmail.com
Fixes: ...
Cc: sta...@vger.kernel.org


The Reported-by tag to use was given in the original syzbot report.

- Eric

Re: [PATCH 2/2] debugfs: Add access restriction option

2020-07-15 Thread Randy Dunlap

Hi,

On 7/15/20 8:25 AM, Peter Enderborg wrote:
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 9ad9210d70a1..aec81f38bfce 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -476,6 +476,38 @@ config DEBUG_FS
>  
> If unsure, say N.
>  
> +choice
> + prompt "Debugfs default access"
> + depends on DEBUG_FS
> + default DEBUG_FS_ALLOW_ALL
> + help
> +   This select the default access restricions for debugfs.

   selects   restrictions
 
> +   It can be overridden with kernel command line option
> +   debugfs=[on,no-mount,off] The restrictions apply for API access

  ,off]. The

> +   and filesystem registration. .
> +
> +config DEBUG_FS_ALLOW_ALL
> +   bool "Access normal"
> +   help
> +   No restrictions applies. Both API and filesystem registration

  apply.

> +   is on. This is the normal default operation.
> +
> +config DEBUG_FS_DISALLOW_MOUNT
> +   bool "Do not register debugfs as filesystem"
> +   help
> +  The API is open but filesystem not loaded. Client can still do
> +  their work and readed with debug tools that does not need

and readthat do not need

> +  debugfs filesystem.
> +
> +config DEBUG_FS_ALLOW_NONE
> +   bool "No access"
> +   help
> +   Access is off. Clients get EPERM when trying to create nodes in

 -EPERM

> +   debugfs tree and debugfs is not registred as an filesystem.

  registered as a filesystem.


> +   Client can then back-off or continue without debugfs access.
> +
> +endchoice


Also, in many places in this Kconfig file, the indentation needs to be
fixed.  Some lines use spaces instead of one tab for indentation.
Help text (under "help") should be indented with one tab + 2 spaces.


-- 
~Randy

Re: [PATCH] opp: Increase parsed_static_opps on _of_add_opp_table_v1

2020-07-15 Thread Viresh Kumar

On 15-07-20, 23:54, Walter Lozano wrote:
> Currently, when using _of_add_opp_table_v2 parsed_static_opps is
> increased and this value is used on _opp_remove_all_static to
> check if there are static opps entries that need to be freed.
> Unfortunately this does not happens when using _of_add_opp_table_v1,
> which leads to warnings.
> 
> This patch increases parsed_static_opps on _of_add_opp_table_v1 in a
> similar way as in _of_add_opp_table_v2.
> 
> Signed-off-by: Walter Lozano 
> ---
> 
>  drivers/opp/of.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/opp/of.c b/drivers/opp/of.c
> index 9a5873591a40..b2bc82bf8b42 100644
> --- a/drivers/opp/of.c
> +++ b/drivers/opp/of.c
> @@ -917,6 +917,8 @@ static int _of_add_opp_table_v1(struct device *dev, 
> struct opp_table *opp_table)
>   nr -= 2;
>   }
>  
> + opp_table->parsed_static_opps++;
> +
>   return ret;
>  }

Merged with this and added relevant Fixes and stable tags.

diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index b2bc82bf8b42..314f306140a1 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -902,6 +902,10 @@ static int _of_add_opp_table_v1(struct device *dev, struct 
opp_table *opp_table)
return -EINVAL;
}
 
+   mutex_lock(_table->lock);
+   opp_table->parsed_static_opps = 1;
+   mutex_unlock(_table->lock);
+
val = prop->value;
while (nr) {
unsigned long freq = be32_to_cpup(val++) * 1000;
@@ -917,8 +921,6 @@ static int _of_add_opp_table_v1(struct device *dev, struct 
opp_table *opp_table)
nr -= 2;
}
 
-   opp_table->parsed_static_opps++;
-
return ret;
 }
 
-- 
viresh

Re: [PATCH] SUNDANCE NETWORK DRIVER: Replace HTTP links with HTTPS ones

2020-07-15 Thread Joe Perches

On Wed, 2020-07-15 at 17:44 -0700, Jakub Kicinski wrote:
> On Thu,  9 Jul 2020 22:49:25 +0200 Alexander A. Klimov wrote:
> > Rationale:
> > Reduces attack surface on kernel devs opening the links for MITM
> > as HTTPS traffic is much harder to manipulate.
> > 
> > Deterministic algorithm:
> > For each file:
> >   If not .svg:
> > For each line:
> >   If doesn't contain `\bxmlns\b`:
> > For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`:
> >   If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`:
> > If both the HTTP and HTTPS versions
> > return 200 OK and serve the same content:
> >   Replace HTTP with HTTPS.
> > 
> > Signed-off-by: Alexander A. Klimov 
> 
> Applied to net-next, but please find a better algorithm for generating
> the subject prefixes. 

Suggestions welcomed for automating patch subject prefixes
for generic treewide conversions by subsystem.

git history doesn't work particularly well for that.

Re: [PATCH v2 0/2] psi: enhance psi with the help of ebpf

2020-07-15 Thread Yafang Shao

On Thu, Jul 16, 2020 at 12:36 AM Shakeel Butt  wrote:
>
> Hi Yafang,
>
> On Tue, Mar 31, 2020 at 3:05 AM Yafang Shao  wrote:
> >
> > PSI gives us a powerful way to anaylze memory pressure issue, but we can
> > make it more powerful with the help of tracepoint, kprobe, ebpf and etc.
> > Especially with ebpf we can flexiblely get more details of the memory
> > pressure.
> >
> > In orderc to achieve this goal, a new parameter is added into
> > psi_memstall_{enter, leave}, which indicates the specific type of a
> > memstall. There're totally ten memstalls by now,
> > MEMSTALL_KSWAPD
> > MEMSTALL_RECLAIM_DIRECT
> > MEMSTALL_RECLAIM_MEMCG
> > MEMSTALL_RECLAIM_HIGH
> > MEMSTALL_KCOMPACTD
> > MEMSTALL_COMPACT
> > MEMSTALL_WORKINGSET_REFAULT
> > MEMSTALL_WORKINGSET_THRASH
> > MEMSTALL_MEMDELAY
> > MEMSTALL_SWAPIO
> > With the help of kprobe or tracepoint to trace this newly added agument we
> > can know which type of memstall it is and then do corresponding
> > improvement. I can also help us to analyze the latency spike caused by
> > memory pressure.
> >
> > But note that we can't use it to build memory pressure for a specific type
> > of memstall, e.g. memcg pressure, compaction pressure and etc, because it
> > doesn't implement various types of task->in_memstall, e.g.
> > task->in_memcgstall, task->in_compactionstall and etc.
> >
> > Although there're already some tracepoints can help us to achieve this
> > goal, e.g.
> > vmscan:mm_vmscan_kswapd_{wake, sleep}
> > vmscan:mm_vmscan_direct_reclaim_{begin, end}
> > vmscan:mm_vmscan_memcg_reclaim_{begin, end}
> > /* no tracepoint for memcg high reclaim*/
> > compcation:mm_compaction_kcompactd_{wake, sleep}
> > compcation:mm_compaction_begin_{begin, end}
> > /* no tracepoint for workingset refault */
> > /* no tracepoint for workingset thrashing */
> > /* no tracepoint for use memdelay */
> > /* no tracepoint for swapio */
> > but psi_memstall_{enter, leave} gives us a unified entrance for all
> > types of memstall and we don't need to add many begin and end tracepoints
> > that hasn't been implemented yet.
> >
> > Patch #2 gives us an example of how to use it with ebpf. With the help of
> > ebpf we can trace a specific task, application, container and etc. It also
> > can help us to analyze the spread of latencies and whether they were
> > clustered at a point of time or spread out over long periods of time.
> >
> > To summarize, with the pressure data in /proc/pressure/memroy we know that
> > the system is under memory pressure, and then with the newly added tracing
> > facility in this patchset we can get the reason of this memory pressure,
> > and then thinks about how to make the change.
> > The workflow can be illustrated as bellow.
> >
> >REASON ACTION
> >  | compcation   | improve compcation|
> >  | vmscan   | improve vmscan|
> > Memory pressure -| workingset   | improve workingset|
> >  | etc  | ...   |
> >
>
> I have not looked at the patch series in detail but I wanted to get
> your thoughts if it is possible to achieve what I am trying to do with
> this patch series.
>
> At the moment I am only interested in global reclaim and I wanted to
> enable alerts like "alert if there is process stuck in global reclaim
> for x seconds in last y seconds window" or "alert if all the processes
> are stuck in global reclaim for some z seconds".
>
> I see that using this series I can identify global reclaim but I am
> wondering if alert or notifications are possible. Android is using psi
> monitors for such alerts but it does not use cgroups, so, most of the
> memstalls are related to global reclaim stall. For cgroup environment,
> do we need for add support to psi monitor similar to this patch
> series?
>

Hi Shakeel,

We use the PSI tracepoints in our kernel to analyze the individual
latency caused by memory pressure, but the PSI tracepoints are
implemented with a new version as bellow:
trace_psi_memstall_enter(_RET_IP_);
trace_psi_memstall_leave(_RET_IP_);
And then using the _RET_IP_ to identify the specific PSI type.

If the _RET_IP_ is at try_to_free_mem_cgroup_pages(), then it means
the pressure caused by the memory cgroup, IOW, the limit of memcg is
reached and it has to do memcg reclaim. Otherwise we can consider it
as global memory pressure.
try_to_free_mem_cgroup_pages
psi_memstall_enter
if (static_branch_likely(_disabled))
return;
*flags = current->in_memstall;
 if (*flags)
 return;
 trace_psi_memstall_enter(_RET_IP_);  < memcg pressure


-- 
Thanks
Yafang

[PATCH v4 7/8] irqchip/loongson-liointc: Fix potential dead lock

2020-07-15 Thread Tiezhu Yang

In the function liointc_set_type(), we need to call the function
irq_gc_unlock_irqrestore() before returning.

Fixes: dbb152267908 ("irqchip: Add driver for Loongson I/O Local Interrupt 
Controller")
Reported-by: Jianmin Lv 
Signed-off-by: Tiezhu Yang 
---
 drivers/irqchip/irq-loongson-liointc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/irqchip/irq-loongson-liointc.c 
b/drivers/irqchip/irq-loongson-liointc.c
index 63b6147..6ef86a3 100644
--- a/drivers/irqchip/irq-loongson-liointc.c
+++ b/drivers/irqchip/irq-loongson-liointc.c
@@ -114,6 +114,7 @@ static int liointc_set_type(struct irq_data *data, unsigned 
int type)
liointc_set_bit(gc, LIOINTC_REG_INTC_POL, mask, false);
break;
default:
+   irq_gc_unlock_irqrestore(gc, flags);
return -EINVAL;
}
irq_gc_unlock_irqrestore(gc, flags);
-- 
2.1.0

[PATCH v4 5/8] irqchip/loongson-pch-pic: Check return value of irq_domain_translate_twocell()

2020-07-15 Thread Tiezhu Yang

Check the return value of irq_domain_translate_twocell() due to
it may returns -EINVAL if failed and use variable fwspec for it,
and then use a new variable parent_fwspec which is proper for
irq_domain_alloc_irqs_parent().

Fixes: ef8c01eb64ca ("irqchip: Add Loongson PCH PIC controller")
Signed-off-by: Tiezhu Yang 
---
 drivers/irqchip/irq-loongson-pch-pic.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-loongson-pch-pic.c 
b/drivers/irqchip/irq-loongson-pch-pic.c
index 2a05b93..016f32c 100644
--- a/drivers/irqchip/irq-loongson-pch-pic.c
+++ b/drivers/irqchip/irq-loongson-pch-pic.c
@@ -135,16 +135,19 @@ static int pch_pic_alloc(struct irq_domain *domain, 
unsigned int virq,
int err;
unsigned int type;
unsigned long hwirq;
-   struct irq_fwspec fwspec;
+   struct irq_fwspec *fwspec = arg;
+   struct irq_fwspec parent_fwspec;
struct pch_pic *priv = domain->host_data;
 
-   irq_domain_translate_twocell(domain, arg, , );
+   err = irq_domain_translate_twocell(domain, fwspec, , );
+   if (err)
+   return err;
 
-   fwspec.fwnode = domain->parent->fwnode;
-   fwspec.param_count = 1;
-   fwspec.param[0] = hwirq + priv->ht_vec_base;
+   parent_fwspec.fwnode = domain->parent->fwnode;
+   parent_fwspec.param_count = 1;
+   parent_fwspec.param[0] = hwirq + priv->ht_vec_base;
 
-   err = irq_domain_alloc_irqs_parent(domain, virq, 1, );
+   err = irq_domain_alloc_irqs_parent(domain, virq, 1, _fwspec);
if (err)
return err;
 
-- 
2.1.0

[RESEND v13 03/11] KVM: VMX: Set guest CET MSRs per KVM and host configuration

2020-07-15 Thread Yang Weijiang

CET MSRs pass through guest directly to enhance performance. CET runtime
control settings are stored in MSR_IA32_{U,S}_CET, Shadow Stack Pointer(SSP)
are stored in MSR_IA32_PL{0,1,2,3}_SSP, SSP table base address is stored in
MSR_IA32_INT_SSP_TAB, these MSRs are defined in kernel and re-used here.

MSR_IA32_U_CET and MSR_IA32_PL3_SSP are used for user-mode protection,the MSR
contents are switched between threads during scheduling, it makes sense to pass
through them so that the guest kernel can use xsaves/xrstors to operate them
efficiently. Other MSRs are used for non-user mode protection. See SDM for 
detailed
info.

The difference between CET VMCS fields and CET MSRs is that,the former are used
during VMEnter/VMExit, whereas the latter are used for CET state storage between
task/thread scheduling.

Co-developed-by: Zhang Yi Z 
Signed-off-by: Zhang Yi Z 
Signed-off-by: Yang Weijiang 
---
 arch/x86/kvm/vmx/vmx.c | 46 ++
 arch/x86/kvm/x86.c |  3 +++
 2 files changed, 49 insertions(+)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 13745f2a5ecd..a9f135c52cbc 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3126,6 +3126,13 @@ void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned 
long pgd)
vmcs_writel(GUEST_CR3, guest_cr3);
 }
 
+static bool is_cet_state_supported(struct kvm_vcpu *vcpu, u32 xss_states)
+{
+   return ((supported_xss & xss_states) &&
+   (guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) ||
+   guest_cpuid_has(vcpu, X86_FEATURE_IBT)));
+}
+
 int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7230,6 +7237,42 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
 }
 
+static void vmx_update_intercept_for_cet_msr(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+   bool incpt;
+
+   incpt = !is_cet_state_supported(vcpu, XFEATURE_MASK_CET_USER);
+   /*
+* U_CET is required for USER CET, and U_CET, PL3_SPP are bound as
+* one component and controlled by IA32_XSS[bit 11].
+*/
+   vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_U_CET, MSR_TYPE_RW,
+ incpt);
+   vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL3_SSP, MSR_TYPE_RW,
+ incpt);
+
+   incpt = !is_cet_state_supported(vcpu, XFEATURE_MASK_CET_KERNEL);
+   /*
+* S_CET is required for KERNEL CET, and PL0_SSP ... PL2_SSP are
+* bound as one component and controlled by IA32_XSS[bit 12].
+*/
+   vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_S_CET, MSR_TYPE_RW,
+ incpt);
+   vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL0_SSP, MSR_TYPE_RW,
+ incpt);
+   vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL1_SSP, MSR_TYPE_RW,
+ incpt);
+   vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL2_SSP, MSR_TYPE_RW,
+ incpt);
+
+   incpt |= !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
+   /* SSP_TAB is only available for KERNEL SHSTK.*/
+   vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW,
+ incpt);
+}
+
 static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7268,6 +7311,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
vmx_set_guest_msr(vmx, msr, enabled ? 0 : 
TSX_CTRL_RTM_DISABLE);
}
}
+
+   if (supported_xss & (XFEATURE_MASK_CET_KERNEL | XFEATURE_MASK_CET_USER))
+   vmx_update_intercept_for_cet_msr(vcpu);
 }
 
 static __init void vmx_set_cpu_caps(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 88c593f83b28..ea8a9dc9fbad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -184,6 +184,9 @@ static struct kvm_shared_msrs __percpu *shared_msrs;
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
| XFEATURE_MASK_PKRU)
 
+#define KVM_SUPPORTED_XSS   (XFEATURE_MASK_CET_USER | \
+XFEATURE_MASK_CET_KERNEL)
+
 u64 __read_mostly host_efer;
 EXPORT_SYMBOL_GPL(host_efer);
 
-- 
2.17.2

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1424 matches

Mail list logo