Re: [PATCH v2 05/19] mm/hugetlb: Introduce pgtable allocation/freeing helpers

2020-10-28 Thread Mike Kravetz
On 10/26/20 7:51 AM, Muchun Song wrote:
> On some architectures, the vmemmap areas use huge page mapping.
> If we want to free the unused vmemmap pages, we have to split
> the huge pmd firstly. So we should pre-allocate pgtable to split
> huge pmd.
> 
> Signed-off-by: Muchun Song 
> ---
>  arch/x86/include/asm/hugetlb.h |   5 ++
>  include/linux/hugetlb.h|  17 +
>  mm/hugetlb.c   | 117 +
>  3 files changed, 139 insertions(+)
> 
> diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
> index 1721b1aadeb1..f5e882f999cd 100644
> --- a/arch/x86/include/asm/hugetlb.h
> +++ b/arch/x86/include/asm/hugetlb.h
> @@ -5,6 +5,11 @@
>  #include 
>  #include 
>  
> +#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
> +#define VMEMMAP_HPAGE_SHIFT  PMD_SHIFT
> +#define arch_vmemmap_support_huge_mapping()  boot_cpu_has(X86_FEATURE_PSE)
> +#endif
> +
>  #define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE)
>  
>  #endif /* _ASM_X86_HUGETLB_H */
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index eed3dd3bd626..ace304a6196c 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -593,6 +593,23 @@ static inline unsigned int blocks_per_huge_page(struct 
> hstate *h)
>  
>  #include 
>  
> +#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
> +#ifndef arch_vmemmap_support_huge_mapping
> +static inline bool arch_vmemmap_support_huge_mapping(void)
> +{
> + return false;
> +}
> +#endif
> +
> +#ifndef VMEMMAP_HPAGE_SHIFT
> +#define VMEMMAP_HPAGE_SHIFT  PMD_SHIFT
> +#endif
> +#define VMEMMAP_HPAGE_ORDER  (VMEMMAP_HPAGE_SHIFT - PAGE_SHIFT)
> +#define VMEMMAP_HPAGE_NR (1 << VMEMMAP_HPAGE_ORDER)
> +#define VMEMMAP_HPAGE_SIZE   ((1UL) << VMEMMAP_HPAGE_SHIFT)
> +#define VMEMMAP_HPAGE_MASK   (~(VMEMMAP_HPAGE_SIZE - 1))
> +#endif /* CONFIG_HUGETLB_PAGE_FREE_VMEMMAP */
> +
>  #ifndef is_hugepage_only_range
>  static inline int is_hugepage_only_range(struct mm_struct *mm,
>   unsigned long addr, unsigned long len)
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index f1b2b733b49b..d6ae9b6876be 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1295,11 +1295,108 @@ static inline void 
> destroy_compound_gigantic_page(struct page *page,
>  #ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
>  #define RESERVE_VMEMMAP_NR   2U
>  
> +#define page_huge_pte(page)  ((page)->pmd_huge_pte)
> +

I am not good at function names.  The following suggestions may be too
verbose.  However, they helped me understand purpose of routines.

>  static inline unsigned int nr_free_vmemmap(struct hstate *h)

perhaps?free_vmemmap_pages_per_hpage()

>  {
>   return h->nr_free_vmemmap_pages;
>  }
>  
> +static inline unsigned int nr_vmemmap(struct hstate *h)

perhaps?vmemmap_pages_per_hpage()

> +{
> + return nr_free_vmemmap(h) + RESERVE_VMEMMAP_NR;
> +}
> +
> +static inline unsigned long nr_vmemmap_size(struct hstate *h)

perhaps?vmemmap_pages_size_per_hpage()

> +{
> + return (unsigned long)nr_vmemmap(h) << PAGE_SHIFT;
> +}
> +
> +static inline unsigned int nr_pgtable(struct hstate *h)

perhaps?pgtable_pages_to_prealloc_per_hpage()

> +{
> + unsigned long vmemmap_size = nr_vmemmap_size(h);
> +
> + if (!arch_vmemmap_support_huge_mapping())
> + return 0;
> +
> + /*
> +  * No need pre-allocate page tabels when there is no vmemmap pages
> +  * to free.
> +  */
> + if (!nr_free_vmemmap(h))
> + return 0;
> +
> + return ALIGN(vmemmap_size, VMEMMAP_HPAGE_SIZE) >> VMEMMAP_HPAGE_SHIFT;
> +}
> +
> +static inline void vmemmap_pgtable_init(struct page *page)
> +{
> + page_huge_pte(page) = NULL;
> +}
> +

I see the following routines follow the pattern for vmemmap manipulation
in dax.

> +static void vmemmap_pgtable_deposit(struct page *page, pte_t *pte_p)
> +{
> + pgtable_t pgtable = virt_to_page(pte_p);
> +
> + /* FIFO */
> + if (!page_huge_pte(page))
> + INIT_LIST_HEAD(>lru);
> + else
> + list_add(>lru, _huge_pte(page)->lru);
> + page_huge_pte(page) = pgtable;
> +}
> +
> +static pte_t *vmemmap_pgtable_withdraw(struct page *page)
> +{
> + pgtable_t pgtable;
> +
> + /* FIFO */
> + pgtable = page_huge_pte(page);
> + if (unlikely(!pgtable))
> + return NULL;
> + page_huge_pte(page) = list_first_entry_or_null(>lru,
> +struct page, lru);
> + if (page_huge_pte(page))
> + list_del(>lru);
> + return page_to_virt(pgtable);
> +}
> +
> +static int vmemmap_pgtable_prealloc(struct hstate *h, struct page *page)
> +{
> + int i;
> + pte_t *pte_p;
> + unsigned int nr = nr_pgtable(h);
> +
> + if (!nr)
> + return 0;
> +
> + vmemmap_pgtable_init(page);
> +
> + for (i 

linux-next: Signed-off-by missing for commit in the drm-intel-fixes tree

2020-10-28 Thread Stephen Rothwell
Hi all,

Commit

  d13208a88f41 ("lockdep: Fix nr_unused_locks")

is missing a Signed-off-by from its author.

Also, the author's email name is missing the leading 'P'.

-- 
Cheers,
Stephen Rothwell


pgpyNajf3ZsLX.pgp
Description: OpenPGP digital signature


[PATCH 1/1] usb: typec: stusb160x: fix signedness comparison issue with enum variables

2020-10-28 Thread Amelie Delaunay
chip->port_type and chip->pwr_opmode are enums and when GCC considers them
as unsigned, the conditions are never met.
This patch takes advantage of the ret variable and fixes the following
warnings:
drivers/usb/typec/stusb160x.c:548 stusb160x_get_fw_caps() warn: unsigned 
'chip->port_type' is never less than zero.
drivers/usb/typec/stusb160x.c:570 stusb160x_get_fw_caps() warn: unsigned 
'chip->pwr_opmode' is never less than zero.

Fixes: da0cb6310094 ("usb: typec: add support for STUSB160x Type-C controller 
family")
Reported-by: kernel test robot 
Signed-off-by: Amelie Delaunay 
---
 drivers/usb/typec/stusb160x.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/usb/typec/stusb160x.c b/drivers/usb/typec/stusb160x.c
index da7f1957bcb3..8519d33bc3e7 100644
--- a/drivers/usb/typec/stusb160x.c
+++ b/drivers/usb/typec/stusb160x.c
@@ -544,11 +544,11 @@ static int stusb160x_get_fw_caps(struct stusb160x *chip,
 */
ret = fwnode_property_read_string(fwnode, "power-role", _str);
if (!ret) {
-   chip->port_type = typec_find_port_power_role(cap_str);
-   if ((int)chip->port_type < 0) {
-   ret = chip->port_type;
+   ret = typec_find_port_power_role(cap_str);
+   if (ret < 0)
return ret;
-   }
+   chip->port_type = ret;
+
}
chip->capability.type = chip->port_type;
 
@@ -565,16 +565,13 @@ static int stusb160x_get_fw_caps(struct stusb160x *chip,
 */
ret = fwnode_property_read_string(fwnode, "power-opmode", _str);
if (!ret) {
-   chip->pwr_opmode = typec_find_pwr_opmode(cap_str);
+   ret = typec_find_pwr_opmode(cap_str);
/* Power delivery not yet supported */
-   if ((int)chip->pwr_opmode < 0 ||
-   chip->pwr_opmode == TYPEC_PWR_MODE_PD) {
-   ret = (int)chip->pwr_opmode < 0 ? chip->pwr_opmode :
- -EINVAL;
-   dev_err(chip->dev, "bad power operation mode: %d\n",
-   chip->pwr_opmode);
-   return ret;
+   if (ret < 0 || ret == TYPEC_PWR_MODE_PD) {
+   dev_err(chip->dev, "bad power operation mode: %d\n", 
ret);
+   return -EINVAL;
}
+   chip->pwr_opmode = ret;
}
 
return 0;
-- 
2.17.1



[PATCH 1/1] usb: typec: add missing MODULE_DEVICE_TABLE() to stusb160x

2020-10-28 Thread Amelie Delaunay
When stusb160x driver is built as a module, no modalias information is
available, and it prevents the module to be loaded by udev.
Add MODULE_DEVICE_TABLE() to fix this issue.

Fixes: da0cb6310094 ("usb: typec: add support for STUSB160x Type-C controller 
family")
Signed-off-by: Amelie Delaunay 
---
 drivers/usb/typec/stusb160x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/typec/stusb160x.c b/drivers/usb/typec/stusb160x.c
index da7f1957bcb3..42076697e26c 100644
--- a/drivers/usb/typec/stusb160x.c
+++ b/drivers/usb/typec/stusb160x.c
@@ -633,6 +633,7 @@ static const struct of_device_id stusb160x_of_match[] = {
{ .compatible = "st,stusb1600", .data = _regmap_config},
{},
 };
+MODULE_DEVICE_TABLE(of, stusb160x_of_match);
 
 static int stusb160x_probe(struct i2c_client *client)
 {
-- 
2.17.1



Re: [RFC][PATCH 0/2] ftrace: Add access to function arguments for all callbacks

2020-10-28 Thread Alexei Starovoitov
On Wed, Oct 28, 2020 at 09:15:42AM -0400, Steven Rostedt wrote:
> 
> This is something I wanted to implement a long time ago, but held off until
> there was a good reason to do so. Now it appears that having access to the
> arguments of the function by default is very useful. As a bonus, because
> arguments must be saved regardless before calling a callback, because they
> need to be restored before returning back to the start of the traced
> function, there's not much work to do to have them always be there for
> normal function callbacks.
> 
> The basic idea is that if CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS is set, then
> all callbacks registered to ftrace can use the regs parameter for the stack
> and arguments (kernel_stack_pointer(regs), regs_get_kernel_argument(regs, n)),
> without the need to set REGS that causes overhead by saving all registers as
> REGS simulates a breakpoint.

I don't have strong opinion on this feature, but if you want to have it
please add a giant disclaimer that this is going to be x86-64 and, may be arm64,
feature _forever_. On x86-32 and other architectures there is no way
to provide sane regs_get_kernel_argument(regs, n) semantics by blindly
saving registers. The kernel needs to know types and calling convention
of the architecture. That's the reason bpf side has btf_func_model concept.
To make sure that bpf trampoline can support all architectures in the future.


Re: [PATCH v3 1/5] x86/boot/compressed/64: Introduce sev_status

2020-10-28 Thread Joerg Roedel
On Wed, Oct 28, 2020 at 12:50:07PM -0400, Arvind Sankar wrote:
> On Wed, Oct 28, 2020 at 09:23:52AM +0100, Joerg Roedel wrote:
> > On Mon, Oct 26, 2020 at 07:27:06PM +0100, Borislav Petkov wrote:
> > > A couple of lines above you call get_sev_encryption_bit() which already
> > > reads MSR_AMD64_SEV. Why not set sev_status there too instead of reading
> > > that MSR again here?
> > > 
> > > It can read that MSR once and use sev_status(%rip) from then on to avoid
> > > reading that MSR multiple times...
> > 
> > Right, makes sense. I updated the patch.
> 
> Hang on, get_sev_encryption_bit() is also called from startup_32(),
> so it can't contain any 64-bit instructions to set sev_status.

Yeah, figured that out too and discussed it with Boris. Decided to leave
it as-is and add a comment why the MSR is re-read.

Thanks,

Joerg


[PATCH v3] gpio: mockup: Allow probing from device tree

2020-10-28 Thread Vincent Whitchurch
Allow the mockup driver to be probed via the device tree without any
module parameters, allowing it to be used to configure and test higher
level drivers like the leds-gpio driver and corresponding userspace
before actual hardware is available.

Signed-off-by: Vincent Whitchurch 
---

Notes:
v3:
- Keep includes sorted alphabetically
- Drop CONFIG_OF ifdefs

v2:
- Remove most of the added code, since the latest driver doesn't need it.
- Drop DT binding document, since Rob Herring was OK with not documenting 
this:
  
https://lore.kernel.org/linux-devicetree/5baa1ae6.1c69fb81.847f2.3...@mx.google.com/

 drivers/gpio/gpio-mockup.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index 67ed4f238d43..ca87c590ef3f 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -460,9 +461,16 @@ static int gpio_mockup_probe(struct platform_device *pdev)
return 0;
 }
 
+static const struct of_device_id gpio_mockup_of_match[] = {
+   { .compatible = "gpio-mockup", },
+   {},
+};
+MODULE_DEVICE_TABLE(of, gpio_mockup_of_match);
+
 static struct platform_driver gpio_mockup_driver = {
.driver = {
.name = "gpio-mockup",
+   .of_match_table = of_match_ptr(gpio_mockup_of_match),
},
.probe = gpio_mockup_probe,
 };
@@ -556,8 +564,7 @@ static int __init gpio_mockup_init(void)
 {
int i, num_chips, err;
 
-   if ((gpio_mockup_num_ranges < 2) ||
-   (gpio_mockup_num_ranges % 2) ||
+   if ((gpio_mockup_num_ranges % 2) ||
(gpio_mockup_num_ranges > GPIO_MOCKUP_MAX_RANGES))
return -EINVAL;
 
-- 
2.28.0



[PATCH 2/4] powerpc: Rename is_kvm_guest to check_kvm_guest

2020-10-28 Thread Srikar Dronamraju
is_kvm_guest() will be reused in subsequent patch in a new avatar.  Hence
rename is_kvm_guest to check_kvm_guest. No additional changes.

Signed-off-by: Srikar Dronamraju 
Cc: linuxppc-dev 
Cc: LKML 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Nathan Lynch 
Cc: Gautham R Shenoy 
Cc: Peter Zijlstra 
Cc: Valentin Schneider 
Cc: Juri Lelli 
Cc: Waiman Long 
Cc: Phil Auld 
---
 arch/powerpc/include/asm/kvm_guest.h | 4 ++--
 arch/powerpc/include/asm/kvm_para.h  | 2 +-
 arch/powerpc/kernel/firmware.c   | 2 +-
 arch/powerpc/platforms/pseries/smp.c | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_guest.h 
b/arch/powerpc/include/asm/kvm_guest.h
index c0ace884a0e8..ba8291e02ba9 100644
--- a/arch/powerpc/include/asm/kvm_guest.h
+++ b/arch/powerpc/include/asm/kvm_guest.h
@@ -7,9 +7,9 @@
 #define __POWERPC_KVM_GUEST_H__
 
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
-bool is_kvm_guest(void);
+bool check_kvm_guest(void);
 #else
-static inline bool is_kvm_guest(void) { return false; }
+static inline bool check_kvm_guest(void) { return false; }
 #endif
 
 #endif /* __POWERPC_KVM_GUEST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_para.h 
b/arch/powerpc/include/asm/kvm_para.h
index abe1b5e82547..6fba06b6cfdb 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -14,7 +14,7 @@
 
 static inline int kvm_para_available(void)
 {
-   return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest();
+   return IS_ENABLED(CONFIG_KVM_GUEST) && check_kvm_guest();
 }
 
 static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
index fe48d319d490..61243267d4cf 100644
--- a/arch/powerpc/kernel/firmware.c
+++ b/arch/powerpc/kernel/firmware.c
@@ -21,7 +21,7 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features);
 #endif
 
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
-bool is_kvm_guest(void)
+bool check_kvm_guest(void)
 {
struct device_node *hyper_node;
 
diff --git a/arch/powerpc/platforms/pseries/smp.c 
b/arch/powerpc/platforms/pseries/smp.c
index d578732c545d..c70b4be9f0a5 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -211,7 +211,7 @@ static __init void pSeries_smp_probe(void)
if (!cpu_has_feature(CPU_FTR_SMT))
return;
 
-   if (is_kvm_guest()) {
+   if (check_kvm_guest()) {
/*
 * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp
 * faults to the hypervisor which then reads the instruction
-- 
2.18.4



Re: [PATCH 1/2] builddeb: Fix rootless build in setuid/setgid directory

2020-10-28 Thread Masahiro Yamada
On Tue, Oct 27, 2020 at 4:32 AM Sven Joachim  wrote:
>
> Building 5.10-rc1 in a setgid directory failed with the following
> error:
>
> dpkg-deb: error: control directory has bad permissions 2755 (must be
> >=0755 and <=0775)
>
> When building with fakeroot, the earlier chown call would have removed
> the setgid bits, but in a rootless build they remain.
>


Applied to linux-kbuild. Thanks.

I agreed with "g-s" but was not sure about "u-s"
because nothing is explained about setuid,
and the setuid bit against directories seems to have no effect.





It was interesting to read this article:
https://superuser.com/questions/471844/why-is-setuid-ignored-on-directories



Also, it is summarized in the wikipedia
https://en.wikipedia.org/wiki/Setuid#setuid_and_setgid_on_directories

"The setuid permission set on a directory is ignored on most UNIX and
Linux systems.[citation needed] However FreeBSD can be configured to
interpret setuid in a manner similar to setgid, in which case it
forces all files and sub-directories created in a directory to be
owned by that directory's owner - a simple form of inheritance.[5]
This is generally not needed on most systems derived from BSD, since
by default directories are treated as if their setgid bit is always
set, regardless of the actual value. As is stated in open(2), "When a
new file is created it is given the group of the directory which
contains it.""


After all, I am convinced that it would not hurt to do "u-s"
although I have never tested kernel builds on FreeBSD.










> Fixes: 3e8541803624 ("builddeb: Enable rootless builds")
> Cc: Guillem Jover 
> Signed-off-by: Sven Joachim 
> ---
>  scripts/package/builddeb | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/scripts/package/builddeb b/scripts/package/builddeb
> index 1b11f8993629..91a502bb97e8 100755
> --- a/scripts/package/builddeb
> +++ b/scripts/package/builddeb
> @@ -45,6 +45,8 @@ create_package() {
> chmod -R go-w "$pdir"
> # in case we are in a restrictive umask environment like 0077
> chmod -R a+rX "$pdir"
> +   # in case we build in a setuid/setgid directory
> +   chmod -R ug-s "$pdir"
>
> # Create the package
> dpkg-gencontrol -p$pname -P"$pdir"
> --
> 2.28.0
>


--
Best Regards

Masahiro Yamada


Re: [linux-sunxi] [PATCH v9 12/14] arm64: defconfig: Enable Allwinner i2s driver

2020-10-28 Thread Chen-Yu Tsai
On Wed, Oct 28, 2020 at 2:32 AM Clément Péron  wrote:
>
> Enable Allwinner I2S driver for arm64 defconfig.
>
> Signed-off-by: Clément Péron 

Acked-by: Chen-Yu Tsai 


Re: [PATCH v2] gpio: mockup: Allow probing from device tree

2020-10-28 Thread Vincent Whitchurch
On Tue, Oct 27, 2020 at 07:12:13PM +0100, Bartosz Golaszewski wrote:
> On Tue, Oct 27, 2020 at 2:54 PM Vincent Whitchurch
>  wrote:
> > diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
> > index 67ed4f238d43..c93892a6936a 100644
> > --- a/drivers/gpio/gpio-mockup.c
> > +++ b/drivers/gpio/gpio-mockup.c
> > @@ -13,6 +13,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> 
> Please keep the includes ordered alphabetically.

Thanks, fixed in v3.

> >  #include 
> >  #include 
> >  #include 
> > @@ -460,9 +461,18 @@ static int gpio_mockup_probe(struct platform_device 
> > *pdev)
> > return 0;
> >  }
> >
> > +#ifdef CONFIG_OF
> > +static const struct of_device_id gpio_mockup_of_match[] = {
> > +   { .compatible = "gpio-mockup", },
> > +   {},
> > +};
> > +MODULE_DEVICE_TABLE(of, gpio_mockup_of_match);
> > +#endif
> 
> You don't need this ifdef - of_match_ptr() will evaluate to NULL if
> CONFIG_OF is disabled and the compiler will optimize this struct out.

The compiler can't optimise out the struct in the case of a module build
since there is a reference from the MODULE_DEVICE_TABLE:

 $ grep CONFIG_OF .config
 # CONFIG_OF is not set
 $ nm drivers/gpio/gpio-mockup.ko  | grep of_
  r gpio_mockup_of_match
  R __mod_of__gpio_mockup_of_match_device_table

But these few wasted bytes don't matter so I removed the CONFIG_OF
anyway as you suggested.


Re: [PATCH 2/3] ARM: dts: aspeed: amd-ethanolx: Enable KCS channel 3

2020-10-28 Thread Supreeth Venkatesh
Thanks Konstantin for this patch.
Thanks Joel for reviewing this.

On 10/28/20 12:28 AM, Joel Stanley wrote:
> [CAUTION: External Email]
> 
> On Tue, 27 Oct 2020 at 12:41, Konstantin Aladyshev
>  wrote:
>>
>> The KCS interface on the LPC channel 3 in the controller
>> is used for the in-band BMC<->BIOS IPMI communication.
>> 0xCA2 is a default host CPU LPC IO address for this
>> interface.
>>
>> Signed-off-by: Konstantin Aladyshev 
> 
> I don't have any docs on the platform so I'll wait for a review from
> Supreeth before applying this one.
Done.
> 
> It's a correct use of the bindings:
> 
> Reviewed-by: Joel Stanley 
Reviewed-by: Supreeth Venkatesh 

> 
>> ---
>>  arch/arm/boot/dts/aspeed-bmc-amd-ethanolx.dts | 5 +
>>  1 file changed, 5 insertions(+)
>>
>> diff --git a/arch/arm/boot/dts/aspeed-bmc-amd-ethanolx.dts 
>> b/arch/arm/boot/dts/aspeed-bmc-amd-ethanolx.dts
>> index 89ddc3847222..2a86bda8afd8 100644
>> --- a/arch/arm/boot/dts/aspeed-bmc-amd-ethanolx.dts
>> +++ b/arch/arm/boot/dts/aspeed-bmc-amd-ethanolx.dts
>> @@ -147,6 +147,11 @@
>> aspeed,lpc-io-reg = <0x62>;
>>  };
>>
>> + {
>> +   status = "okay";
>> +   aspeed,lpc-io-reg = <0xCA2>;
>> +};
>> +
>>   {
>> status = "okay";
>> aspeed,lpc-io-reg = <0x97DE>;
>> --
>> 2.17.1
>>


[PATCH v2 2/2] ASoC: fsl_aud2htx: Add aud2htx module driver

2020-10-28 Thread Shengjiu Wang
The AUD2HTX is a digital module that provides a bridge between
the Audio Subsystem and the HDMI RTX Subsystem. This module
includes intermediate storage to queue SDMA transactions prior
to being synchronized and passed to the HDMI RTX Subsystem over
the Audio Link.

The AUD2HTX contains a DMA request routed to the SDMA module.
This DMA request is controlled based on the watermark level in
the 32-entry sample buffer.

Signed-off-by: Shengjiu Wang 
---
changes in v2:
- remove hw_params, add operation to dai probe

 sound/soc/fsl/Kconfig   |   5 +
 sound/soc/fsl/Makefile  |   2 +
 sound/soc/fsl/fsl_aud2htx.c | 313 
 sound/soc/fsl/fsl_aud2htx.h |  67 
 4 files changed, 387 insertions(+)
 create mode 100644 sound/soc/fsl/fsl_aud2htx.c
 create mode 100644 sound/soc/fsl/fsl_aud2htx.h

diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index d04b64d32dc1..52a562215008 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -105,6 +105,11 @@ config SND_SOC_FSL_XCVR
  iMX CPUs. XCVR is a digital module that supports HDMI2.1 eARC,
  HDMI1.4 ARC and SPDIF.
 
+config SND_SOC_FSL_AUD2HTX
+   tristate "AUDIO TO HDMI TX module support"
+   help
+ Say Y if you want to add AUDIO TO HDMI TX support for NXP.
+
 config SND_SOC_FSL_UTILS
tristate
 
diff --git a/sound/soc/fsl/Makefile b/sound/soc/fsl/Makefile
index 1d2231f9cc47..2181b7f9f677 100644
--- a/sound/soc/fsl/Makefile
+++ b/sound/soc/fsl/Makefile
@@ -26,6 +26,7 @@ snd-soc-fsl-dma-objs := fsl_dma.o
 snd-soc-fsl-mqs-objs := fsl_mqs.o
 snd-soc-fsl-easrc-objs := fsl_easrc.o
 snd-soc-fsl-xcvr-objs := fsl_xcvr.o
+snd-soc-fsl-aud2htx-objs := fsl_aud2htx.o
 
 obj-$(CONFIG_SND_SOC_FSL_AUDMIX) += snd-soc-fsl-audmix.o
 obj-$(CONFIG_SND_SOC_FSL_ASOC_CARD) += snd-soc-fsl-asoc-card.o
@@ -40,6 +41,7 @@ obj-$(CONFIG_SND_SOC_FSL_MQS) += snd-soc-fsl-mqs.o
 obj-$(CONFIG_SND_SOC_FSL_EASRC) += snd-soc-fsl-easrc.o
 obj-$(CONFIG_SND_SOC_POWERPC_DMA) += snd-soc-fsl-dma.o
 obj-$(CONFIG_SND_SOC_FSL_XCVR) += snd-soc-fsl-xcvr.o
+obj-$(CONFIG_SND_SOC_FSL_AUD2HTX) += snd-soc-fsl-aud2htx.o
 
 # MPC5200 Platform Support
 obj-$(CONFIG_SND_MPC52xx_DMA) += mpc5200_dma.o
diff --git a/sound/soc/fsl/fsl_aud2htx.c b/sound/soc/fsl/fsl_aud2htx.c
new file mode 100644
index ..a6e25195a8df
--- /dev/null
+++ b/sound/soc/fsl/fsl_aud2htx.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2020 NXP
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "fsl_aud2htx.h"
+#include "imx-pcm.h"
+
+static int fsl_aud2htx_trigger(struct snd_pcm_substream *substream, int cmd,
+  struct snd_soc_dai *dai)
+{
+   struct fsl_aud2htx *aud2htx = snd_soc_dai_get_drvdata(dai);
+
+   switch (cmd) {
+   case SNDRV_PCM_TRIGGER_START:
+   case SNDRV_PCM_TRIGGER_RESUME:
+   case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+   regmap_update_bits(aud2htx->regmap, AUD2HTX_CTRL,
+  AUD2HTX_CTRL_EN, AUD2HTX_CTRL_EN);
+   regmap_update_bits(aud2htx->regmap, AUD2HTX_CTRL_EXT,
+  AUD2HTX_CTRE_DE, AUD2HTX_CTRE_DE);
+   break;
+   case SNDRV_PCM_TRIGGER_SUSPEND:
+   case SNDRV_PCM_TRIGGER_STOP:
+   case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+   regmap_update_bits(aud2htx->regmap, AUD2HTX_CTRL_EXT,
+  AUD2HTX_CTRE_DE, 0);
+   regmap_update_bits(aud2htx->regmap, AUD2HTX_CTRL,
+  AUD2HTX_CTRL_EN, 0);
+   break;
+   default:
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static const struct snd_soc_dai_ops fsl_aud2htx_dai_ops = {
+   .trigger= fsl_aud2htx_trigger,
+};
+
+static int fsl_aud2htx_dai_probe(struct snd_soc_dai *cpu_dai)
+{
+   struct fsl_aud2htx *aud2htx = dev_get_drvdata(cpu_dai->dev);
+
+   /* DMA request when number of entries < WTMK_LOW */
+   regmap_update_bits(aud2htx->regmap, AUD2HTX_CTRL_EXT,
+  AUD2HTX_CTRE_DT_MASK, 0);
+
+   /* Disable interrupts*/
+   regmap_update_bits(aud2htx->regmap, AUD2HTX_IRQ_MASK,
+  AUD2HTX_WM_HIGH_IRQ_MASK |
+  AUD2HTX_WM_LOW_IRQ_MASK |
+  AUD2HTX_OVF_MASK,
+  AUD2HTX_WM_HIGH_IRQ_MASK |
+  AUD2HTX_WM_LOW_IRQ_MASK |
+  AUD2HTX_OVF_MASK);
+
+   /* Configure watermark */
+   regmap_update_bits(aud2htx->regmap, AUD2HTX_CTRL_EXT,
+  AUD2HTX_CTRE_WL_MASK,
+  AUD2HTX_WTMK_LOW << AUD2HTX_CTRE_WL_SHIFT);
+   regmap_update_bits(aud2htx->regmap, AUD2HTX_CTRL_EXT,
+  AUD2HTX_CTRE_WH_MASK,
+

[PATCH v2 1/2] ASoC: dt-bindings: fsl_aud2htx: Add binding doc for aud2htx module

2020-10-28 Thread Shengjiu Wang
AUD2HTX (Audio Subsystem TO HDMI TX Subsystem) is a new
IP module found on i.MX8MP.

Signed-off-by: Shengjiu Wang 
---
changes in v2:
- fix indentation issue
- remove nodename

 .../bindings/sound/fsl,aud2htx.yaml   | 64 +++
 1 file changed, 64 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/sound/fsl,aud2htx.yaml

diff --git a/Documentation/devicetree/bindings/sound/fsl,aud2htx.yaml 
b/Documentation/devicetree/bindings/sound/fsl,aud2htx.yaml
new file mode 100644
index ..6d9ba2946bfb
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/fsl,aud2htx.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/fsl,aud2htx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP Audio Subsystem to HDMI RTX Subsystem Controller
+
+maintainers:
+  - Shengjiu Wang 
+
+properties:
+  compatible:
+const: fsl,imx8mp-aud2htx
+
+  reg:
+maxItems: 1
+
+  interrupts:
+maxItems: 1
+
+  clocks:
+items:
+  - description: Peripheral clock
+
+  clock-names:
+items:
+  - const: bus
+
+  dmas:
+items:
+  - description: DMA controller phandle and request line for TX
+
+  dma-names:
+items:
+  - const: tx
+
+  power-domains:
+maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - dmas
+  - dma-names
+
+examples:
+  - |
+#include 
+#include 
+
+aud2htx: aud2htx@30cb {
+compatible = "fsl,imx8mp-aud2htx";
+reg = <0x30cb 0x1>;
+interrupts = ;
+clocks = <_clk IMX8MP_CLK_AUDIOMIX_AUD2HTX_IPG>;
+clock-names = "bus";
+dmas = < 26 2 0>;
+dma-names = "tx";
+power-domains = <_pd>;
+};
-- 
2.27.0



Re: [PATCH v3] platform/mellanox: mlxbf-pmc: Add Mellanox BlueField PMC driver

2020-10-28 Thread Hans de Goede
Hi all,

On 10/28/20 12:03 PM, Hans de Goede wrote:
> Hi,
> 
> On 10/8/20 2:37 PM, Shravan Kumar Ramani wrote:
>> The performance modules in BlueField are present in several hardware
>> blocks and each block provides access to these stats either through
>> counters that can be programmed to monitor supported events or
>> through memory-mapped registers that hold the relevant information.
>> The hardware blocks that include a performance module are:
>>  * Tile (block containing 2 cores and a shared L2 cache)
>>  * TRIO (PCIe root complex)
>>  * MSS (Memory Sub-system containing the Memory Controller and L3 cache)
>>  * GIC (Interrupt controller)
>>  * SMMU (System Memory Management Unit)
>> The mlx_pmc driver provides access to all of these performance modules
>> through a hwmon sysfs interface.
> 
> Thank you for your patch, I've applied this patch to my review-hans 
> branch:
> https://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git/log/?h=review-hans
> 
> Note it will show up there once I've pushed my local branch there,
> which might take a while.
> 
> Once I've run some tests on this branch the patches there will be
> added to the platform-drivers-x86/for-next branch and eventually
> will be included in the pdx86 pull-request to Linus for the next
> merge-window.

Andy I now see that this goes under drivers/platform/mellanox and the
MAINTAINERS entry for this still points to you:

MELLANOX HARDWARE PLATFORM SUPPORT
M:  Andy Shevchenko 
M:  Darren Hart 
M:  Vadim Pasternak 
L:  platform-driver-...@vger.kernel.org
S:  Supported
F:  Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
F:  drivers/platform/mellanox/
F:  include/linux/platform_data/mlxreg.h

But the patches are being send to to the
platform-driver-...@vger.kernel.org list.

Also I assume that you (Andy) do not want to maintain (another)
git tree just for the occasional melanox patch, so I'll just keep
this patch in my review-hans branch (and eventually move it
to pdx86/for-next)

I guess that we should maybe update the MAINTAINERS entry to make me +
Mark the maintainers and add a:

T:  git 
git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git

Entry ?

Regards,

Hans



>> v2 --> v3
>> Update copyright info.
>>
>> v1 --> v2
>> Remove unused headers.
>> Add comma to arrays where last line is not a termination.
>> Use kstrtoint in place of sscanf.
>> UUID manipulation follows drivers/platform/mellanox/mlxbf-bootctl.c
>>
>> Signed-off-by: Shravan Kumar Ramani 
>> Reviewed-by: Vadim Pasternak 
>> Reviewed-by: Jiri Pirko 
>> ---
>>  drivers/platform/mellanox/Kconfig |   10 +
>>  drivers/platform/mellanox/Makefile|1 +
>>  drivers/platform/mellanox/mlxbf-pmc.c | 1478 
>> +
>>  3 files changed, 1489 insertions(+)
>>  create mode 100644 drivers/platform/mellanox/mlxbf-pmc.c
>>
>> diff --git a/drivers/platform/mellanox/Kconfig 
>> b/drivers/platform/mellanox/Kconfig
>> index 916b39d..edd17e1 100644
>> --- a/drivers/platform/mellanox/Kconfig
>> +++ b/drivers/platform/mellanox/Kconfig
>> @@ -56,4 +56,14 @@ config MLXBF_BOOTCTL
>>to the userspace tools, to be used in conjunction with the eMMC
>>device driver to do necessary initial swap of the boot partition.
>>  
>> +config MLXBF_PMC
>> +tristate "Mellanox BlueField Performance Monitoring Counters driver"
>> +depends on ARM64
>> +depends on HWMON
>> +depends on ACPI
>> +help
>> +  Say y here to enable PMC support. The PMC driver provides access
>> +  to performance monitoring counters within various blocks in the
>> +  Mellanox BlueField SoC via a sysfs interface.
>> +
>>  endif # MELLANOX_PLATFORM
>> diff --git a/drivers/platform/mellanox/Makefile 
>> b/drivers/platform/mellanox/Makefile
>> index 499623c..000ddaa 100644
>> --- a/drivers/platform/mellanox/Makefile
>> +++ b/drivers/platform/mellanox/Makefile
>> @@ -4,6 +4,7 @@
>>  # Mellanox Platform-Specific Drivers
>>  #
>>  obj-$(CONFIG_MLXBF_BOOTCTL) += mlxbf-bootctl.o
>> +obj-$(CONFIG_MLXBF_PMC) += mlxbf-pmc.o
>>  obj-$(CONFIG_MLXBF_TMFIFO)  += mlxbf-tmfifo.o
>>  obj-$(CONFIG_MLXREG_HOTPLUG)+= mlxreg-hotplug.o
>>  obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
>> diff --git a/drivers/platform/mellanox/mlxbf-pmc.c 
>> b/drivers/platform/mellanox/mlxbf-pmc.c
>> new file mode 100644
>> index 000..3588398
>> --- /dev/null
>> +++ b/drivers/platform/mellanox/mlxbf-pmc.c
>> @@ -0,0 +1,1478 @@
>> +// SPDX-License-Identifier: GPL-2.0-only OR Linux-OpenIB
>> +/*
>> + * Mellanox BlueField Performance Monitoring Counters driver
>> + *
>> + * This driver provides a sysfs interface for monitoring
>> + * performance statistics in BlueField SoC.
>> + *
>> + * Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
>> + */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#define 

Re: [PATCH 1/2] fs:regfs: add register easy filesystem

2020-10-28 Thread Al Viro
On Tue, Oct 20, 2020 at 02:30:07PM +0800, Zou Cao wrote:
> +ssize_t regfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> +{
> + struct file *file = iocb->ki_filp;
> + struct inode *inode = file->f_mapping->host;
> + ssize_t ret;
> +
> + inode_lock(inode);
> + ret = generic_write_checks(iocb, from);
> + if (ret > 0)
> + ret = __generic_file_write_iter(iocb, from);
> + inode_unlock(inode);
> +
> + if (ret > 0)
> + ret = generic_write_sync(iocb, ret);
> + return ret;
> +}

Huh?  How is that different from generic_file_write_iter()?  And who's
using it, anyway?

> + struct regfs_inode_info  *info = REGFS_I(mapping->host);
> + char str[67];
> + unsigned long val = 0;
> + loff_t pos = *ppos;
> + size_t res;
> +
> + if (pos < 0)
> + return -EINVAL;
> + if (pos >= len || len > 66)
> + return 0;

This is completely bogus.  "If current position is greater than the
length of string we are asking to write, quietly return 0"?

> + res = copy_from_user(str, buf, len);
> + if (res)
> + return -EFAULT;
> + str[len] = 0;
> +
> + if (kstrtoul(str, 16, ) < 0)
> + return -EINVAL;

Where does 67 come from?  If you are expecting a hexadecimal representation
of a unsigned long on arm64, you should have at most 16 digits.  67 looks
rather odd...

> + writel_relaxed(val, info->base + info->offset);

... and you are promptly discarding the upper 32 bits, since writel_relaxed()
takes u32:
((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
is going to truncate to 32bit, no matter what.  Quietly truncate, at that...

> +const struct address_space_operations regfs_aops = {
> + .readpage   = simple_readpage,
> + .write_begin= simple_write_begin,
> + .write_end  = simple_write_end,
> + .set_page_dirty = __set_page_dirty_buffers,
> +};

Again, huh?  What would use the page cache there, anyway?

> +static LIST_HEAD(regfs_head);

Protected by...?

> +static const struct inode_operations regfs_dir_inode_operations;
> +int regfs_debug;
> +module_param(regfs_debug, int, S_IRUGO);
> +MODULE_PARM_DESC(regfs_debug, "enable regfs debug mode");
> +
> +struct inode *regfs_get_inode(struct super_block *sb, const struct inode 
> *dir, umode_t mode, dev_t dev)
> +{
> + struct inode *inode = new_inode(sb);
> +
> + if (inode) {
> + inode->i_ino = get_next_ino();
> + inode_init_owner(inode, dir, mode);
> + inode->i_mapping->a_ops = _aops;
> + //inode->i_mapping->backing_dev_info = _backing_dev_info;
> + mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
> + mapping_set_unevictable(inode->i_mapping);
> + inode->i_atime = inode->i_mtime = inode->i_ctime = 
> current_time(inode);
> + switch (mode & S_IFMT) {
> + default:
> + init_special_inode(inode, mode, dev);
> + break;
> + case S_IFREG:
> + inode->i_op = _file_inode_operations;
> + inode->i_fop = _file_operations;
> + break;
> + case S_IFDIR:
> + inode->i_op = _dir_inode_operations;
> + inode->i_fop = _dir_operations;
> +
> + /* directory inodes start off with i_nlink == 2 (for 
> "." entry) */
> + inc_nlink(inode);
> + break;
> + case S_IFLNK:
> + inode->i_op = _symlink_inode_operations;
> + break;
> + }
> + }
> +
> + return inode;
> +}

Seriously?  Where would symlinks, device nodes, FIFOs and sockets come from?
And you are open-coding the regular file case in the new_dentry_create() anyway,
so the only thing this is actually used for is the root directory.

> +static const struct inode_operations regfs_dir_inode_operations = {
> + .lookup = simple_lookup,
> +};

... and simple_dir_inode_operations is wrong, because...?

> +static struct dentry *new_dentry_create(struct super_block *sb, struct 
> dentry *parent,
> +  const char *name, bool is_dir, struct res_data *res)
> +{
> + struct dentry *dentry;
> + struct inode *inode;
> + struct regfs_inode_info *ei;
> + struct regfs_fs_info *fsi = sb->s_fs_info;
> +
> + dentry = d_alloc_name(parent, name);
> + if (!dentry)
> + return NULL;
> +
> + inode = new_inode(sb);
> + if (!inode)
> + goto out;
> +
> + ei = REGFS_I(inode);
> + inode->i_ino = get_next_ino();;
> + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
> + inode->i_uid =  GLOBAL_ROOT_UID;
> + inode->i_gid =  GLOBAL_ROOT_GID;
> + if (is_dir) {
> + inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR;
> + inode->i_op = _dir_inode_operations;
> +

[PATCH RFC v2 0/4] x86/bus_lock: Enable bus lock detection

2020-10-28 Thread Fenghua Yu
A bus lock [1] is acquired either through split locked access to
writeback (WB) memory or by using locks to uncacheable (UC) memory
(e.g. direct device assignment). This is typically >1000 cycles slower
than an atomic operation within a cache line. It also disrupts performance
on other cores.

Although split lock can be detected by #AC trap, the trap is triggered
before the instruction acquires bus lock. This makes it difficult to
mitigate bus lock (e.g. throttle the user application).

Some CPUs have ability to notify the kernel by an #DB trap after a user
instruction acquires a bus lock and is executed. This allows the kernel
to enforce user application throttling or mitigations.

#DB for bus lock detect fixes issues in #AC for split lock detect:
1) It's architectural ... just need to look at one CPUID bit to know it
   exists
2) The IA32_DEBUGCTL MSR, which reports bus lock in #DB, is per-thread.
   So each process or guest can have different behavior.
3) It has support for VMM/guests (new VMEXIT codes, etc).

Hardware only generates #DB for bus lock detect when CPL>0 to avoid
nested #DB from multiple bus locks while the first #DB is being handled.

Use the existing kernel command line option "split_lock_detect=" to handle
#DB for bus lock:

split_lock_detect=
#AC for split lock  #DB for bus lock

off Do nothing  Do nothing

warnKernel OOPs Warn once per task and
Warn once per task and  and continues to run.
disable future checking When both features are
supported, warn in #DB

fatal   Kernel OOPs Send SIGBUS to user
Send SIGBUS to user
When both features are
supported, fatal in #AC.

ratelimit:N Do nothing  Limit bus lock rate to
N per second in the
current non root user.

Default split_lock_detect is "warn".

[1] Chapter 8 
https://software.intel.com/sites/default/files/managed/c5/15/architecture-instruction-set-extensions-programming-reference.pdf

Change Log:
RFC v2:
- Architecture changed based on feedback from Thomas and PeterZ. #DB is
  no longer generated for bus lock in ring0.
- Split the one single patch into four patches.
[RFC v1 can be found at: 
https://lore.kernel.org/lkml/1595021700-68460-1-git-send-email-fenghua...@intel.com/]

Fenghua Yu (4):
  x86/cpufeatures: Enumerate #DB for bus lock detection
  x86/bus_lock: Handle warn and fatal in #DB for bus lock
  x86/bus_lock: Set rate limit for bus lock
  Documentation: Change doc for split_lock_detect parameter

 .../admin-guide/kernel-parameters.txt |  47 +-
 arch/x86/include/asm/cpu.h|  10 +-
 arch/x86/include/asm/cpufeatures.h|   1 +
 arch/x86/include/asm/msr-index.h  |   1 +
 arch/x86/include/uapi/asm/debugreg.h  |   3 +-
 arch/x86/kernel/cpu/common.c  |   2 +-
 arch/x86/kernel/cpu/intel.c   | 145 +++---
 arch/x86/kernel/traps.c   |   7 +
 include/linux/sched/user.h|   4 +-
 kernel/user.c |   7 +
 10 files changed, 193 insertions(+), 34 deletions(-)

-- 
2.29.0



RE: [PATCH V3 2/4] misc: vop: do not allocate and reassign the used ring

2020-10-28 Thread Sherry Sun
Hi Arnd,

> Subject: Re: [PATCH V3 2/4] misc: vop: do not allocate and reassign the used
> ring
> 
> (resending from the kernel.org address after getting bounces again)
> 
> On Wed, Oct 28, 2020 at 7:29 AM Sherry Sun  wrote:
> > > Subject: Re: [PATCH V3 2/4] misc: vop: do not allocate and reassign
> > > the used
> > >
> > > Both Ashutosh and I have moved on to other projects. The MIC devices
> > > have been discontinued. I have just sent across a patch to remove
> > > the MIC drivers from the kernel tree.
> > >
> > > We are very glad to see that Sherry is able to reuse some of the VOP
> > > logic and it is working well. It is best if the MIC drivers are
> > > removed so Sherry can add the specific VOP logic required for imx8qm
> > > subsequently without having to worry about other driver dependencies.
> > > Hoping this results in a cleaner imx8qm driver moving forward.
> >
> > I'm ok with your patch.
> > Since you have deprecated the MIC related code, may I ask do you have
> > a better solution instead of vop/scif?
> 
> I think we should try to do something on top of the PCIe endpoint subsystem
> to make it work across arbitrary combinations of host and device
> implementations, and provide a superset of what the MIC driver, (out-of-
> tree) Bluefield endpoint driver, and the NTB subsystem as well as a couple of
> others used to do, each of them tunneling block/network/serial/... over a
> PCIe link of some sort, usually with virtio.
> 
> At the moment, there is only one driver for the endpoint framework in the
> kernel, in drivers/pci/endpoint/functions/pci-epf-test.c, but I think this can
> serve as a starting point.
> 

Thanks for your detailed reply.
Yes, the PCIe endpoint subsystem is the base code, actually we have implemented 
a set of pci endpoint code similar to pci-epf-test.c, combine with vop (Virtio 
Over PCIe).

But now the vop code is going to be removed, we planned to change to NTB 
framework, I saw Kishon has done some jobs based on NTB and PCIe endpoint 
subsystem, will get a deep look. Maybe it is a good solution.

Best regards
Sherry

> The PCI endpoint subsystem already uses configfs for configuring the
> available devices, and this seems like a good fit for making it work in 
> general.
> However, there are a number of use cases that have somewhat conflicting
> requirements, so the first step would be to figure out what everyone actually
> needs for virtio communication.
> 
> These are some of the main differences that I have noticed in the
> past:
> 
> - The simple case would be to use one PCIe endpoint device
>   for each virtio device, but I think this needs to be multiplexed
>   so that hardware that only supports a single PCIe endpoint
>   can still have multiple virtio devices tunneled through it.
> 
> - While sometimes the configuration is hardcoded in the driver, ideally
>   the type of virtio device(s) that is tunneled over the PCIe link should
>   be configurable. The configuration of the endpoint device itself is
>   done on the machine running on the endpoint side, but for the
>   virtio devices, this might be either on the host or the endpoint.
>   Not sure if one of the two ways is common enough, or we have to
>   allow both.
> 
> - When the link is configured, you still need one side to provide a
>   virtio device host implementation, while the other side would
>   run the normal virtio device driver. Again, these could be done
>   either way, and it is independent of which side has configured
>   the link, and we might want to only allow one of the two options,
>   or do both, or tie it to who configures it (e.g. the side that creates
>   the device must be the virtio device host, while the other side
>   just sees the device pop up and uses a virtio driver).
> 
>Arnd


[PATCH RFC v2 4/4] Documentation: Change doc for split_lock_detect parameter

2020-10-28 Thread Fenghua Yu
Since #DB for bus lock detect changes the split_lock_detect parameter,
update the documentation for the changes.

Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
 .../admin-guide/kernel-parameters.txt | 47 +++
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 526d65d8573a..51312484c2b6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5044,27 +5044,58 @@
spia_peddr=
 
split_lock_detect=
-   [X86] Enable split lock detection
+   [X86] Enable split lock detection or bus lock detection
 
When enabled (and if hardware support is present), 
atomic
instructions that access data across cache line
-   boundaries will result in an alignment check exception.
+   boundaries will result in an alignment check exception
+   for split lock detection or an debug exception for
+   bus lock detection.
 
off - not enabled
 
-   warn- the kernel will emit rate limited warnings
- about applications triggering the #AC
- exception. This mode is the default on CPUs
- that supports split lock detection.
+   warn- Default mode.
 
-   fatal   - the kernel will send SIGBUS to applications
- that trigger the #AC exception.
+ If split lock detection is enabled in
+ hardware, the kernel will emit rate limited
+ warnings about applications triggering the #AC
+ exception.
+
+ If bus lock detection is enabled in hardware,
+ the kernel will emit rate limited warnings
+ about applications triggering the #DB
+ exception.
+
+ Default behavior is from bus lock detection
+ if both features are enabled in hardware.
+
+   fatal   - If split lock detection is enabled in
+ hardware, the kernel will send SIGBUS to
+ applications that trigger the #AC exception.
+
+ If bus lock detection is enabled in hardware,
+ the kernel will send SIGBUS to application
+ that trigger the #DB exception.
+
+ Default behavior is from split lock detection
+ if both are enabled in hardware.
+
+   ratelimit:N
+ Set rate limit to N bus locks per second
+ for bus lock detection. 0 < N <= HZ/2 and
+ N is approximate. Only applied to non root
+ user.
+
+ N/A for split lock detection.
 
If an #AC exception is hit in the kernel or in
firmware (i.e. not while executing in user mode)
the kernel will oops in either "warn" or "fatal"
mode.
 
+   #DB exception for bus lock is triggered only when
+   CPL > 0.
+
srbds=  [X86,INTEL]
Control the Special Register Buffer Data Sampling
(SRBDS) mitigation.
-- 
2.29.0



Re: [PATCH v2] usb: gadget: configfs: Fix use-after-free issue with udc_name

2020-10-28 Thread Macpaul Lin
On Thu, 2020-10-29 at 01:55 +0800, Macpaul Lin wrote:
> From: Eddie Hung 
> 
> There is a use-after-free issue, if access udc_name
> in function gadget_dev_desc_UDC_store after another context
> free udc_name in function unregister_gadget.
> 
> Context 1:
> gadget_dev_desc_UDC_store()->unregister_gadget()->
> free udc_name->set udc_name to NULL
> 
> Context 2:
> gadget_dev_desc_UDC_show()-> access udc_name
> 
> Call trace:
> dump_backtrace+0x0/0x340
> show_stack+0x14/0x1c
> dump_stack+0xe4/0x134
> print_address_description+0x78/0x478
> __kasan_report+0x270/0x2ec
> kasan_report+0x10/0x18
> __asan_report_load1_noabort+0x18/0x20
> string+0xf4/0x138
> vsnprintf+0x428/0x14d0
> sprintf+0xe4/0x12c
> gadget_dev_desc_UDC_show+0x54/0x64
> configfs_read_file+0x210/0x3a0
> __vfs_read+0xf0/0x49c
> vfs_read+0x130/0x2b4
> SyS_read+0x114/0x208
> el0_svc_naked+0x34/0x38
> 
> Add mutex_lock to protect this kind of scenario.
> 
> Signed-off-by: Eddie Hung 
> Signed-off-by: Macpaul Lin 
> Reviewed-by: Peter Chen 
> Cc: sta...@vger.kernel.org
> ---
> Changes for v2:
>   - Fix typo %s/contex/context, Thanks Peter.
> 
>  drivers/usb/gadget/configfs.c |   11 +--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
> index 56051bb..d9743f4 100644
> --- a/drivers/usb/gadget/configfs.c
> +++ b/drivers/usb/gadget/configfs.c
> @@ -221,9 +221,16 @@ static ssize_t gadget_dev_desc_bcdUSB_store(struct 
> config_item *item,
>  
>  static ssize_t gadget_dev_desc_UDC_show(struct config_item *item, char *page)
>  {
> - char *udc_name = to_gadget_info(item)->composite.gadget_driver.udc_name;
> + struct gadget_info *gi = to_gadget_info(item);
> + char *udc_name;
> + int ret;
> +
> + mutex_lock(>lock);
> + udc_name = gi->composite.gadget_driver.udc_name;
> + ret = sprintf(page, "%s\n", udc_name ?: "");
> + mutex_unlock(>lock);
>  
> - return sprintf(page, "%s\n", udc_name ?: "");
> + return ret;
>  }
>  
>  static int unregister_gadget(struct gadget_info *gi)

Sorry, it looks like still a base64 encoded mail.
I'll feedback to our IT department again.
Please ignore this mail.

Thanks
Macpaul Lin



Re: [PATCH net-next 5/5] net: mscc: ocelot: support L2 multicast entries

2020-10-28 Thread Florian Fainelli



On 10/28/2020 7:27 PM, Vladimir Oltean wrote:
> There is one main difference in mscc_ocelot between IP multicast and L2
> multicast. With IP multicast, destination ports are encoded into the
> upper bytes of the multicast MAC address. Example: to deliver the
> address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
> program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
> multicast, the MAC table entry points to a Port Group ID (PGID), and
> that PGID contains the port mask that the packet will be forwarded to.
> As to why it is this way, no clue. My guess is that not all port
> combinations can be supported simultaneously with the limited number of
> PGIDs, and this was somehow an issue for IP multicast but not for L2
> multicast. Anyway.
> 
> Prior to this change, the raw L2 multicast code was bogus, due to the
> fact that there wasn't really any way to test it using the bridge code.
> There were 2 issues:
> - A multicast PGID was allocated for each MDB entry, but it wasn't in
>   fact programmed to hardware. It was dummy.
> - In fact we don't want to reserve a multicast PGID for every single MDB
>   entry. That would be odd because we can only have ~60 PGIDs, but
>   thousands of MDB entries. So instead, we want to reserve a multicast
>   PGID for every single port combination for multicast traffic. And
>   since we can have 2 (or more) MDB entries delivered to the same port
>   group (and therefore PGID), we need to reference-count the PGIDs.
> 
> Signed-off-by: Vladimir Oltean 

Reviewed-by: Florian Fainelli 

I believe you have the same gfp_t comment applicable here as in patch #4.
-- 
Florian


[PATCH v2 3/3] watchdog: sprd: change to use usleep_range() instead of busy loop

2020-10-28 Thread Chunyan Zhang
From: Chunyan Zhang 

After changing to check busy bit for the previous loading operation instead
of the current one, for most of cases, the busy bit is not set for the
first time of read, so there's no need to check so frequently, so this
patch use usleep_range() to replace cpu_relax() to avoid busy loop.

Also this patch change the max times to 11 which would be enough, since
according to the specification, the busy bit would be set after a new
loading operation and last 2 or 3 RTC clock cycles (about 60us~92us).

Fixes: 477603467009 ("watchdog: Add Spreadtrum watchdog driver")
Original-by: Lingling Xu 
Signed-off-by: Chunyan Zhang 
---
 drivers/watchdog/sprd_wdt.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c
index b9b1daa9e2a4..e8097551dfcd 100644
--- a/drivers/watchdog/sprd_wdt.c
+++ b/drivers/watchdog/sprd_wdt.c
@@ -53,7 +53,7 @@
 
 #define SPRD_WDT_CNT_HIGH_SHIFT16
 #define SPRD_WDT_LOW_VALUE_MASKGENMASK(15, 0)
-#define SPRD_WDT_LOAD_TIMEOUT  1000
+#define SPRD_WDT_LOAD_TIMEOUT  11
 
 struct sprd_wdt {
void __iomem *base;
@@ -109,15 +109,17 @@ static int sprd_wdt_load_value(struct sprd_wdt *wdt, u32 
timeout,
u32 prtmr_step = pretimeout * SPRD_WDT_CNT_STEP;
 
/*
-* Waiting the load value operation done,
-* it needs two or three RTC clock cycles.
+* Checking busy bit to make sure the previous loading operation is
+* done. According to the specification, the busy bit would be set
+* after a new loading operation and last 2 or 3 RTC clock
+* cycles (about 60us~92us).
 */
do {
val = readl_relaxed(wdt->base + SPRD_WDT_INT_RAW);
if (!(val & SPRD_WDT_LD_BUSY_BIT))
break;
 
-   cpu_relax();
+   usleep_range(10, 100);
} while (delay_cnt++ < SPRD_WDT_LOAD_TIMEOUT);
 
if (delay_cnt >= SPRD_WDT_LOAD_TIMEOUT)
-- 
2.20.1



[PATCH v2 2/3] watchdog: sprd: check busy bit before new loading rather than after that

2020-10-28 Thread Chunyan Zhang
From: Lingling Xu 

As the specification described, users must check busy bit before start
a new loading operation to make sure that the previous loading is done
and the device is ready to accept a new one.

[ chunyan: Massaged changelog ]

Fixes: 477603467009 ("watchdog: Add Spreadtrum watchdog driver")
Signed-off-by: Lingling Xu 
Signed-off-by: Chunyan Zhang 
---
 drivers/watchdog/sprd_wdt.c | 25 +
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c
index f3c90b4afead..b9b1daa9e2a4 100644
--- a/drivers/watchdog/sprd_wdt.c
+++ b/drivers/watchdog/sprd_wdt.c
@@ -108,18 +108,6 @@ static int sprd_wdt_load_value(struct sprd_wdt *wdt, u32 
timeout,
u32 tmr_step = timeout * SPRD_WDT_CNT_STEP;
u32 prtmr_step = pretimeout * SPRD_WDT_CNT_STEP;
 
-   sprd_wdt_unlock(wdt->base);
-   writel_relaxed((tmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) &
- SPRD_WDT_LOW_VALUE_MASK, wdt->base + SPRD_WDT_LOAD_HIGH);
-   writel_relaxed((tmr_step & SPRD_WDT_LOW_VALUE_MASK),
-  wdt->base + SPRD_WDT_LOAD_LOW);
-   writel_relaxed((prtmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) &
-   SPRD_WDT_LOW_VALUE_MASK,
-  wdt->base + SPRD_WDT_IRQ_LOAD_HIGH);
-   writel_relaxed(prtmr_step & SPRD_WDT_LOW_VALUE_MASK,
-  wdt->base + SPRD_WDT_IRQ_LOAD_LOW);
-   sprd_wdt_lock(wdt->base);
-
/*
 * Waiting the load value operation done,
 * it needs two or three RTC clock cycles.
@@ -134,6 +122,19 @@ static int sprd_wdt_load_value(struct sprd_wdt *wdt, u32 
timeout,
 
if (delay_cnt >= SPRD_WDT_LOAD_TIMEOUT)
return -EBUSY;
+
+   sprd_wdt_unlock(wdt->base);
+   writel_relaxed((tmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) &
+ SPRD_WDT_LOW_VALUE_MASK, wdt->base + SPRD_WDT_LOAD_HIGH);
+   writel_relaxed((tmr_step & SPRD_WDT_LOW_VALUE_MASK),
+  wdt->base + SPRD_WDT_LOAD_LOW);
+   writel_relaxed((prtmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) &
+   SPRD_WDT_LOW_VALUE_MASK,
+  wdt->base + SPRD_WDT_IRQ_LOAD_HIGH);
+   writel_relaxed(prtmr_step & SPRD_WDT_LOW_VALUE_MASK,
+  wdt->base + SPRD_WDT_IRQ_LOAD_LOW);
+   sprd_wdt_lock(wdt->base);
+
return 0;
 }
 
-- 
2.20.1



Re: [seccomp] Request for a "enable on execve" mode for Seccomp filters

2020-10-28 Thread Rich Felker
On Wed, Oct 28, 2020 at 01:42:13PM +0100, Jann Horn wrote:
> +luto just in case he has opinions on this
> 
> On Wed, Oct 28, 2020 at 12:18 PM Camille Mougey  wrote:
> > From my understanding, there is no way to delay the activation of
> > seccomp filters, for instance "until an _execve_ call".
> 
> (FWIW, there are some tricks that you can use for this. In particular,
> you can attach to the child with ptrace before the child runs
> execve(), and then use seccomp to inject a filter after execve(), or
> something like that. The disadvantage is that this is not super pretty
> because it interferes with debugging of the parent process. IIRC e.g.
> Ubuntu's launchd did things this way.)

Yes, in principle everything seccomp does could have been done with
ptrace but the whole point was not to use ptrace as a primitive to
build hacks upon. So this is not a good solution.

> > But this might be useful, especially for tools who sandbox other,
> > non-cooperative, executables, such as "systemd" or "FireJail".
> >
> > It seems to be a caveat of seccomp specific to the system call
> > _execve_. For now, some tools such as "systemd" explicitly mention
> > this exception, and do not support it (from the man page):
> > > Note that strict system call filters may impact execution and
> > > error handling code paths of the service invocation.
> > > Specifically, access to the execve system call is required for
> > > the execution of the service binary — if it is blocked service
> > > invocation will necessarily fail
> >
> > "FireJail" takes a different approach[1], with a kind of workaround:
> > the project uses an external library to be loaded through LD_PRELOAD
> > mechanism, in order to install filters during the loader stage.
> > This approach, a bit hacky, also has several caveats:
> > * _openat_, _mmap_, etc. must be allowed in order to reach the
> > LD_PRELOAD mechanism, and for the crafted library to work ;
> 
> Those caveats are not specific to the LD_PRELOAD approach. Actually,
> the LD_PRELOAD approach is the only one which I would expect to *not*
> have that caveat. (Of course, non-executable mmap() and probably also
> openat() are anyway needed for almost any real-world service to do its
> job correctly.)
> 
> > * it doesn't work for static binaries.
> 
> IMO the important thing about LD_PRELOAD is that it is unreliable:
> When the LD_PRELOAD library can't be opened, glibc just prints a
> warning and continues execution - and an attacker may be able to cause
> opening an LD_PRELOAD library to fail by opening so many files in
> other processes that the global limit is reached. So you can't build
> reliable security infrastructure on LD_PRELOAD. This is not a
> fundamental problem though - glibc could address this.

Using LD_PRELOAD for security infrastructure is a really bad idea
anyway. There are nearly unboundedly many ways code could end up
executing before the preloaded ctors. Preinit arrays, malformed ELF
headers seizing control of execution of ldso, etc. The seccomp filters
really need to be in place *before* the untrusted code runs.

> > I only see hackish ways to restrict the use of _execve_ in a
> > non-cooperative executable. These methods seem globally bypassables
> > and not satisfactory from a security point of view.
> 
> You're just focusing on execve() - I think it's important to keep in
> mind what happens after execve() for normal, dynamically-linked
> binaries: The next step is that the dynamic linker runs, and it will
> poke around in the file system with access() and openat() and fstat(),
> it will mmap() executable libraries into memory, it will mprotect()
> some memory regions, it will set up thread-local storage (e.g. using
> arch_prctl(); even if the process is single-threaded), and so on.
> 
> The earlier you install the seccomp filter, the more of these steps
> you have to permit in the filter. And if you want the filter to take
> effect directly after execve(), the syscalls you'll be forced to
> permit are sufficient to cobble something together in userspace that
> effectively does almost the same thing as execve().

I would assume you use SECCOMP_RET_USER_NOTIF to implement policy for
controlling these operations and allowing only the ones that are valid
during dynamic linking. This also allows you to defer application of
the filter until after execve. So unless I'm missing some reason why
this doesn't work, I think the requested functionality is already
available.

If you really just want the "activate at exec" behavior, it might be
possible (depending on how SECCOMP_RET_USER_NOTIF behaves when there's
no notify fd open; I forget) to setup the filter so that the "mode
switch" happens automatically at exec by having the notify fd being
close-on-exec (notifications handled by a thread before exec). If this
works it would avoid having an extra process involved and managing its
lifetime.

> Your usecase might be better served by adding a glibc feature for
> "unskippable LD_PRELOAD" 

[PATCH v2 0/3] A few fixes to sprd watchdog driver

2020-10-28 Thread Chunyan Zhang
From: Chunyan Zhang 

A few issues about sprd watchdog driver were found recently, this
patchset would fix them.

Changes since v1:
* Added Reviewed-by from Guenter Roeck;
* Abandon original patch 2, add a new patch to use usleep_range() instead of 
busy loop;
* Revised the max times of loop, also revised the comments for checking busy 
bit;
* Revised commit message for the whole patchset;

Chunyan Zhang (1):
  watchdog: sprd: change to use usleep_range() instead of busy loop

Lingling Xu (2):
  watchdog: sprd: remove watchdog disable from resume fail path
  watchdog: sprd: check busy bit before new loading rather than after
that

 drivers/watchdog/sprd_wdt.c | 42 ++---
 1 file changed, 20 insertions(+), 22 deletions(-)

-- 
2.20.1



[PATCH v2 1/3] watchdog: sprd: remove watchdog disable from resume fail path

2020-10-28 Thread Chunyan Zhang
From: Lingling Xu 

sprd_wdt_start() would return fail if the loading operation is not completed
in a certain time, disabling watchdog for that case would probably cause
the kernel crash when kick watchdog later, that's too bad, so remove the
watchdog disable operation for the fail case to make sure other parts in
the kernel can run normally.

[ chunyan: Massaged changelog ]

Fixes: 477603467009 ("watchdog: Add Spreadtrum watchdog driver")
Signed-off-by: Lingling Xu 
Signed-off-by: Chunyan Zhang 
Reviewed-by: Guenter Roeck 
---
 drivers/watchdog/sprd_wdt.c | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c
index 65cb55f3916f..f3c90b4afead 100644
--- a/drivers/watchdog/sprd_wdt.c
+++ b/drivers/watchdog/sprd_wdt.c
@@ -345,15 +345,10 @@ static int __maybe_unused sprd_wdt_pm_resume(struct 
device *dev)
if (ret)
return ret;
 
-   if (watchdog_active(>wdd)) {
+   if (watchdog_active(>wdd))
ret = sprd_wdt_start(>wdd);
-   if (ret) {
-   sprd_wdt_disable(wdt);
-   return ret;
-   }
-   }
 
-   return 0;
+   return ret;
 }
 
 static const struct dev_pm_ops sprd_wdt_pm_ops = {
-- 
2.20.1



[PATCH -next] net: stmmac: platform: remove useless if/else

2020-10-28 Thread Zou Wei
Fix the following coccinelle report:

./drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c:233:6-8:
WARNING: possible condition with no effect (if == else)

Both branches are the same, so remove the else if/else altogether.

Reported-by: Hulk Robot 
Signed-off-by: Zou Wei 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index af34a4c..f6c69d0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -230,8 +230,6 @@ static int stmmac_mtl_setup(struct platform_device *pdev,
plat->tx_sched_algorithm = MTL_TX_ALGORITHM_WFQ;
else if (of_property_read_bool(tx_node, "snps,tx-sched-dwrr"))
plat->tx_sched_algorithm = MTL_TX_ALGORITHM_DWRR;
-   else if (of_property_read_bool(tx_node, "snps,tx-sched-sp"))
-   plat->tx_sched_algorithm = MTL_TX_ALGORITHM_SP;
else
plat->tx_sched_algorithm = MTL_TX_ALGORITHM_SP;
 
-- 
2.6.2



Re: [PATCH v2 net] net: sch_generic: aviod concurrent reset and enqueue op for lockless qdisc

2020-10-28 Thread Yunsheng Lin
On 2020/10/29 4:04, Vishwanath Pai wrote:
> On 10/28/20 1:47 PM, Cong Wang wrote:
>> On Wed, Oct 28, 2020 at 8:37 AM Pai, Vishwanath  wrote:
>>> Hi,
>>>
>>> We noticed some problems when testing the latest 5.4 LTS kernel and traced 
>>> it
>>> back to this commit using git bisect. When running our tests the machine 
>>> stops
>>> responding to all traffic and the only way to recover is a reboot. I do not 
>>> see
>>> a stack trace on the console.
>>
>> Do you mean the machine is still running fine just the network is down?
>>
>> If so, can you dump your tc config with stats when the problem is happening?
>> (You can use `tc -s -d qd show ...`.)
>>
>>>
>>> This can be reproduced using the packetdrill test below, it should be run a
>>> few times or in a loop. You should hit this issue within a few tries but
>>> sometimes might take up to 15-20 tries.
>> ...
>>> I can reproduce the issue easily on v5.4.68, and after reverting this 
>>> commit it
>>> does not happen anymore.
>>
>> This is odd. The patch in this thread touches netdev reset path, if 
>> packetdrill
>> is the only thing you use to trigger the bug (that is netdev is always 
>> active),
>> I can not connect them.
>>
>> Thanks.
> 
> Hi Cong,
> 
>> Do you mean the machine is still running fine just the network is down?
> 
> I was able to access the machine via serial console, it looks like it is
> up and running, just that networking is down.
> 
>> If so, can you dump your tc config with stats when the problem is happening?
>> (You can use `tc -s -d qd show ...`.)
> 
> If I try running tc when the machine is in this state the command never
> returns. It doesn't print anything but doesn't exit either.
> 
>> This is odd. The patch in this thread touches netdev reset path, if 
>> packetdrill
>> is the only thing you use to trigger the bug (that is netdev is always 
>> active),
>> I can not connect them.
> 
> I think packetdrill creates a tun0 interface when it starts the
> test and tears it down at the end, so it might be hitting this code path
> during teardown.

Hi, Is there any preparation setup before running the above packetdrill test
case, I run the above test case in 5.9-rc4 with this patch applied without any
preparation setup, did not reproduce it.

By the way, I am newbie to packetdrill:), it would be good to provide the
detail setup to reproduce it,thanks.

> 
> P.S: My mail server is having connectivity issues with vger.kernel.org
> so messages aren't getting delivered to netdev. It'll hopefully get
> resolved soon.
> 
> Thanks,
> Vishwanath
> 
> 
> .
> 


linux-next: manual merge of the phy-next tree with the regulator-fixes tree

2020-10-28 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the phy-next tree got a conflict in:

  MAINTAINERS

between commit:

  43c3e148830a ("MAINTAINERS: Add entry for Qualcomm IPQ4019 VQMMC regulator")

from the regulator-fixes tree and commit:

  c36f74566cef ("MAINTAINERS: Add entry for Qualcomm IPQ4019 USB PHY")

from the phy-next tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc MAINTAINERS
index 0e8f57817184,f01ce8f451c8..
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@@ -14552,14 -14547,14 +14552,22 @@@ F:
Documentation/devicetree/bindings/ma
  F:drivers/mailbox/qcom-ipcc.c
  F:include/dt-bindings/mailbox/qcom-ipcc.h
  
+ QUALCOMM IPQ4019 USB PHY DRIVER
+ M:Robert Marko 
+ M:Luka Perkov 
+ L:linux-arm-...@vger.kernel.org
+ S:Maintained
+ F:Documentation/devicetree/bindings/phy/qcom-usb-ipq4019-phy.yaml
+ F:drivers/phy/qualcomm/phy-qcom-ipq4019-usb.c
+ 
 +QUALCOMM IPQ4019 VQMMC REGULATOR DRIVER
 +M:Robert Marko 
 +M:Luka Perkov 
 +L:linux-arm-...@vger.kernel.org
 +S:Maintained
 +F:Documentation/devicetree/bindings/regulator/vqmmc-ipq4019-regulator.yaml
 +F:drivers/regulator/vqmmc-ipq4019-regulator.c
 +
  QUALCOMM RMNET DRIVER
  M:Subash Abhinov Kasiviswanathan 
  M:Sean Tranchetti 


pgp2sLsgFATgr.pgp
Description: OpenPGP digital signature


Re: [PATCH 01/18] dmaengine: of-dma: Add support for optional router configuration callback

2020-10-28 Thread Peter Ujfalusi
Hi Vinod,

On 28/10/2020 7.55, Vinod Koul wrote:

>> To summarize:
>> In of_dma_route_allocate() the router does not yet know the channel we
>> are going to get.
>> In of_dma_xlate() the DMA driver does not yet know if the channel will
>> use router or not.
>> I need to tell the router the event number it has to send, which is
>> based on the channel number I got.
> 
> Sounds reasonable, btw why not pass this information in xlate. Router
> will have a different xlate rather than non router right, or is it same.

Yes, the router's have their separate xlate, but in that xlate we do not
yet have a channel. I don't know what is the event I need to send from
the router to trigger the channel.

> If this information is anyway available in DT might be better to get it
> and use from DT

Without a channel number I can not do anything.
It is close to a chicken and egg problem.

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki


Re: [PATCH] agp: amd64: remove unneeded initialization

2020-10-28 Thread Nathan Chancellor
On Wed, Oct 28, 2020 at 02:31:06PM +0100, Lukas Bulwahn wrote:
> make clang-analyzer on x86_64 defconfig caught my attention with:
> 
>   drivers/char/agp/amd64-agp.c:336:2: warning: \
>   Value stored to 'i' is never read [clang-analyzer-deadcode.DeadStores]
>   i = 0;
>   ^
> 
> Remove this unneeded initialization to make clang-analyzer happy.
> 
> Commit a32073bffc65 ("x86_64: Clean and enhance up K8 northbridge access
> code") refactored cache_nbs() and introduced this unneeded dead-store
> initialization.
> 
> As compilers will detect this unneeded assignment and optimize this anyway,
> the resulting binary is identical before and after this change.
> 
> No functional change. No change in binary code.
> 
> Signed-off-by: Lukas Bulwahn 

Seems obvious :)

Reviewed-by: Nathan Chancellor 

> ---
> applies cleanly on current master and next-20201028
> 
> David, please pick this minor non-urgent clean-up patch.
> 
>  drivers/char/agp/amd64-agp.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
> index b40edae32817..0413b3136541 100644
> --- a/drivers/char/agp/amd64-agp.c
> +++ b/drivers/char/agp/amd64-agp.c
> @@ -333,7 +333,6 @@ static int cache_nbs(struct pci_dev *pdev, u32 cap_ptr)
>   if (!amd_nb_has_feature(AMD_NB_GART))
>   return -ENODEV;
>  
> - i = 0;
>   for (i = 0; i < amd_nb_num(); i++) {
>   struct pci_dev *dev = node_to_amd_nb(i)->misc;
>   if (fix_northbridge(dev, pdev, cap_ptr) < 0) {
> -- 
> 2.17.1
> 


Re: [PATCH net-next 4/5] net: mscc: ocelot: make entry_type a member of struct ocelot_multicast

2020-10-28 Thread Florian Fainelli



On 10/28/2020 7:27 PM, Vladimir Oltean wrote:
> This saves a re-classification of the MDB address on deletion.
> 
> Signed-off-by: Vladimir Oltean 
> ---

[snip]

>   mc = ocelot_multicast_get(ocelot, mdb->addr, vid);
>   if (!mc) {
>   /* New entry */
> - int pgid = ocelot_mdb_get_pgid(ocelot, entry_type);
> + int pgid;
> +
> + mc = devm_kzalloc(ocelot->dev, sizeof(*mc), GFP_KERNEL);

If the MDB object is programmed with SWITCHDEV_OBJ_ID_HOST_MDB then you
would need this gfp_t to be GFP_ATOMIC per
net/bridge/br_mdb.c::__br_mdb_notify, if this is a regular
SWITCHDEV_OBJ_ID_MDB then GFP_KERNEL appears to be fine.

Looks like this existed before, so that might have to be fixed separately.

Reviewed-by: Florian Fainelli 
-- 
Florian


Re: [PATCH] x86/unwind: remove unneeded initialization

2020-10-28 Thread Nathan Chancellor
On Wed, Oct 28, 2020 at 01:21:02PM +0100, Lukas Bulwahn wrote:
> make clang-analyzer on x86_64 defconfig caught my attention with:
> 
>   arch/x86/kernel/unwind_orc.c:38:7: warning: Value stored to 'mid' during
>   its initialization is never read [clang-analyzer-deadcode.DeadStores]
>   int *mid = first, *found = first;
>^
> 
> Commit ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") introduced
> __orc_find() with this unneeded dead-store initialization.
> 
> Put the variable in local scope and initialize only once the value is
> needed to make clang-analyzer happy.
> 
> As compilers will detect this unneeded assignment and optimize this
> anyway, the resulting object code is effectively identical before and
> after this change.
> 
> No functional change. Effectively, no change to object code.
> 
> Signed-off-by: Lukas Bulwahn 

Seems fine to me.

Reviewed-by: Nathan Chancellor 

> ---
> applies cleanly on current master and next-20201028
> 
> Josh, please ack.
> Ingo, Borislav, please pick this minor non-urgent clean-up patch.
> 
>  arch/x86/kernel/unwind_orc.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
> index 6a339ce328e0..5c64eed08257 100644
> --- a/arch/x86/kernel/unwind_orc.c
> +++ b/arch/x86/kernel/unwind_orc.c
> @@ -35,7 +35,7 @@ static struct orc_entry *__orc_find(int *ip_table, struct 
> orc_entry *u_table,
>  {
>   int *first = ip_table;
>   int *last = ip_table + num_entries - 1;
> - int *mid = first, *found = first;
> + int *found = first;
>  
>   if (!num_entries)
>   return NULL;
> @@ -47,7 +47,7 @@ static struct orc_entry *__orc_find(int *ip_table, struct 
> orc_entry *u_table,
>* ignored when they conflict with a real entry.
>*/
>   while (first <= last) {
> - mid = first + ((last - first) / 2);
> + int *mid = first + ((last - first) / 2);
>  
>   if (orc_ip(mid) <= ip) {
>   found = mid;
> -- 
> 2.17.1
> 


[RFC PATCH] irqchip/sifive-plic: Fix getting wrong chip_data when interrupt is hierarchy

2020-10-28 Thread Greentime Hu
This oops is caused by a wrong chip_data and it is because plic_irq_unmask
uses irq_get_chip_data(irq_data->irq) to get the chip_data. However it may
get another irq_data with the same irq_data->irq if it is hierarchy.

In this case, it will get irq_data of sifive_gpio_irqchip instead of
plic_chip so that it will get a wrong chip_data and then the wrong lmask
of it to cause this oops.

To fix this issue, we can use irq_data_get_irq_chip_data(irq_data) to get
the correct chip_data of plic_chip.

(gdb) p d
$11 = (struct irq_data *) 0xffe1f695f620
(gdb) p *d
$9 = {
  mask = 0,
  irq = 57,
  hwirq = 6,
  common = 0xffe1f695f600,
  chip = 0xffe0018b5630 ,
  domain = 0xffe1f692c400,
  parent_data = 0xffe1f68482c0,
  chip_data = 0xffe1f564a820
}

(gdb) p d
$6 = (struct irq_data *) 0xffe1f68482c0
(gdb) p *d
$7 = {
  mask = 0,
  irq = 57,
  hwirq = 29,
  common = 0xffe1f695f600,
  chip = 0xffe0018b5070 ,
  domain = 0xffe1f6635e00,
  parent_data = 0x0,
  chip_data = 0xffe1f660f1a0
}

[3.030165] [ cut here ]
[3.034614] WARNING: CPU: 1 PID: 1 at drivers/irqchip/irq-sifive-plic.c:125 
plic_irq_unmask+0xc4/0x114
[3.043887] Modules linked in:
[3.046932] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.9.0 #1
[3.052748] epc: ffe000588e90 ra : ffe000588e88 sp : ffe1f6753940
[3.059869]  gp : ffe001978f48 tp : ffe1f6748000 t0 : 
ffe001995cb0
[3.067080]  t1 : ffe001995be8 t2 : 73616d61202c343a s0 : 
ffe1f67539a0
[3.074288]  s1 : ffe1f4968140 a0 : 00b2 a1 : 

[3.081497]  a2 : 00c2 a3 :  a4 : 
381c5a89432fe900
[3.088707]  a5 : 0004 a6 :  a7 : 
01aa
[3.095916]  s2 : ffe1f5901020 s3 : ffe00197a0a8 s4 : 
ffe001978b0c
[3.103125]  s5 : ffe00197a1f0 s6 : 0008 s7 : 
ffe1f4983c9c
[3.110335]  s8 : ffe1f4983c68 s9 : ffe1f4983c00 s10: 
ffe0117c
[3.117544]  s11:  t3 : 0007 t4 : 

[3.124753]  t5 : 663a6b73 t6 : ffe001988479
[3.130052] status: 00020100 badaddr: ffe001978b0c cause: 
0003
[3.137959] ---[ end trace dbc1129f842ecba3 ]---

Fixes: f1ad1133b18f ("irqchip/sifive-plic: Add support for multiple PLICs")
Signed-off-by: Greentime Hu 
---
 drivers/irqchip/irq-sifive-plic.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/irqchip/irq-sifive-plic.c 
b/drivers/irqchip/irq-sifive-plic.c
index 4048657ece0a..6f432d2a5ceb 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -99,7 +99,7 @@ static inline void plic_irq_toggle(const struct cpumask *mask,
   struct irq_data *d, int enable)
 {
int cpu;
-   struct plic_priv *priv = irq_get_chip_data(d->irq);
+   struct plic_priv *priv = irq_data_get_irq_chip_data(d);
 
writel(enable, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
for_each_cpu(cpu, mask) {
@@ -115,7 +115,7 @@ static void plic_irq_unmask(struct irq_data *d)
 {
struct cpumask amask;
unsigned int cpu;
-   struct plic_priv *priv = irq_get_chip_data(d->irq);
+   struct plic_priv *priv = irq_data_get_irq_chip_data(d);
 
cpumask_and(, >lmask, cpu_online_mask);
cpu = cpumask_any_and(irq_data_get_affinity_mask(d),
@@ -127,7 +127,7 @@ static void plic_irq_unmask(struct irq_data *d)
 
 static void plic_irq_mask(struct irq_data *d)
 {
-   struct plic_priv *priv = irq_get_chip_data(d->irq);
+   struct plic_priv *priv = irq_data_get_irq_chip_data(d);
 
plic_irq_toggle(>lmask, d, 0);
 }
@@ -138,7 +138,7 @@ static int plic_set_affinity(struct irq_data *d,
 {
unsigned int cpu;
struct cpumask amask;
-   struct plic_priv *priv = irq_get_chip_data(d->irq);
+   struct plic_priv *priv = irq_data_get_irq_chip_data(d);
 
cpumask_and(, >lmask, mask_val);
 
-- 
2.28.0



Re: [PATCH] stop_machine: Mark functions as notrace

2020-10-28 Thread Zong Li
On Thu, Oct 29, 2020 at 8:23 AM Atish Patra  wrote:
>
> On Wed, Oct 28, 2020 at 8:44 AM Guo Ren  wrote:
> >
> > Hi Zong & Atish,
> >
> > In our 2 harts c910 chip, we found:
> >
> > echo function > /sys/kernel/debug/tracing/current_tracer
> > echo function_graph > /sys/kernel/debug/tracing/current_tracer
> > echo function > /sys/kernel/debug/tracing/current_tracer
> > echo function_graph > /sys/kernel/debug/tracing/current_tracer
> >
> > Then one core halted at stop_machine_yield:
> > arch_cpu_idle () at arch/riscv/kernel/process.c:39
> > 39  local_irq_enable();
> > (gdb) i th
> >   Id   Target Id Frame
> > * 1Thread 1 (CPU#0)  arch_cpu_idle () at arch/riscv/kernel/process.c:39
> >   2Thread 2 (CPU#1)  stop_machine_yield
> > (cpumask=0xffe001371fa8 <__cpu_online_mask>) at
> > ./arch/riscv/include/asm/vdso/processor.h:12
> > (gdb) thread 2
> > [Switching to thread 2 (Thread 2)]
> > #0  stop_machine_yield (cpumask=0xffe001371fa8
> > <__cpu_online_mask>) at ./arch/riscv/include/asm/vdso/processor.h:12
> > 12  __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
> >
> > With your patch, it's solved. For this patch, I'll give:
> > Tested by: Guo Ren 
> >
> > But that's not enough, we still need:
> >
> > diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
> > index 226ccce..12b8808 100644
> > --- a/arch/riscv/kernel/sbi.c
> > +++ b/arch/riscv/kernel/sbi.c
> > @@ -376,7 +376,7 @@ EXPORT_SYMBOL(sbi_send_ipi);
> >   *
> >   * Return: None
> >   */
> > -void sbi_remote_fence_i(const unsigned long *hart_mask)
> > +void notrace sbi_remote_fence_i(const unsigned long *hart_mask)
> >  {
> > __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I,
> >  hart_mask, 0, 0, 0, 0);
> > diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
> > index 400b945d..9467d987 100644
> > --- a/arch/riscv/mm/cacheflush.c
> > +++ b/arch/riscv/mm/cacheflush.c
> > @@ -9,12 +9,12 @@
> >
> >  #include 
> >
> > -static void ipi_remote_fence_i(void *info)
> > +static void notrace ipi_remote_fence_i(void *info)
> >  {
> > return local_flush_icache_all();
> >  }
> >
> > -void flush_icache_all(void)
> > +void notrace flush_icache_all(void)
> >  {
> > if (IS_ENABLED(CONFIG_RISCV_SBI))
> > sbi_remote_fence_i(NULL);
> >
>
> Did you see any issue if these functions are not marked as notrace ?
>
> As per Zong's explanation, the issue was that the other harts already
> fetched the next 2 nops and
> executed 1 while kernel patching replaced other with one of the auipc
> + jalr pair.
>
> @Zong can correct me if I am wrong.
>
> These functions are too far ahead. Can it cause such issues ? If yes,
> then we need to mark each and every function
> that can be invoked from patch_text_nosync and are not inlined.
>
> That includes copy_to_kernel_nofault, __sbi_rfence_v02,
> __sbi_rfence_v02_call, sbi_ecall.
>
> Few of these functions may be inlined by compiler. Can we depend on that ?
>
> > Because:
> > (gdb) bt
> > #0  flush_icache_all () at arch/riscv/mm/cacheflush.c:20
> > #1  0xffe00020473a in patch_text_nosync (addr=, insns=
> > , len=) at arch/riscv/kernel/patch.c:96
> > #2  0xffe000206792 in ftrace_make_call (rec=,
> > addr=) at arch/riscv/kernel/ftrace.c:109
> > #3  0xffe0002c9be4 in __ftrace_replace_code (rec=0xffe01ae40020, e
> > nable=true) at kernel/trace/ftrace.c:2503
> > #4  0xffe0002ca092 in ftrace_replace_code (mod_flags= > out>) at kernel/trace/ftrace.c:2530
> > #5  0xffe0002ca24a in ftrace_modify_all_code (command=9) at kernel
> >/trace/ftrace.c:2677
> > #6  0xffe0002ca2ee in __ftrace_modify_code (data=) at
> >kernel/trace/ftrace.c:2703
> > #7  0xffe0002c1390 in multi_cpu_stop (data=0x0) at kernel/stop_machin
> >e.c:224
> > #8  0xffe0002c0fbe in cpu_stopper_thread (cpu=) at kern
> >el/stop_machine.c:491
> > #9  0xffe0002343be in smpboot_thread_fn (data=0x0) at kernel/smpboot.
> >c:165
> > #10 0xffe00022f894 in kthread (_create=0xffe01af13040) at kern
> >el/kthread.c:292
> > #11 0xffe000201fac in handle_exception () at 
> > arch/riscv/kernel/entry.S:236
> >

It seems to me that the problem happens on the waiting threads, it
doesn't cause the issue on the patching code thread, so it is OK that
these functions are traceable. I probably don't figure out all
possible situations, do you find any issue and reason to change the
annotation of these functions?

> > On Wed, Oct 21, 2020 at 3:38 PM Zong Li  wrote:
> > >
> > > Like the commit cb9d7fd51d9f ("watchdog: Mark watchdog touch functions
> > > as notrace"), some architectures assume that the stopped CPUs don't make
> > > function calls to traceable functions when they are in the stopped
> > > state. For example, it causes unexpected kernel crashed when switching
> > > tracer on RISC-V.
> > >
> > > The following patches added calls to these two functions, fix it by
> > > adding the notrace 

Re: [PATCH V3 1/1] nvme: Add quirk for LiteON CL1 devices running FW 220TQ,22001

2020-10-28 Thread Keith Busch
On Thu, Oct 29, 2020 at 02:20:27AM +, Gloria Tsai wrote:
> Corrected the description of this bug that SSD will not do GC after receiving 
> shutdown cmd.
> Do GC before shutdown -> delete IO Q -> shutdown from host -> breakup GC -> 
> D3hot -> enter PS4 -> have a chance swap block -> use wrong pointer on device 
> SRAM -> over program

What do you mean by "wrong pointer"? At the place in the sequence you're
referring to, the PCI BME is disabled: you can't access *any* host RAM,
so there's no "correct" pointer either.


Re: [seccomp] Request for a "enable on execve" mode for Seccomp filters

2020-10-28 Thread Rich Felker
On Wed, Oct 28, 2020 at 07:25:45PM +0100, Jann Horn wrote:
> On Wed, Oct 28, 2020 at 6:52 PM Rich Felker  wrote:
> > On Wed, Oct 28, 2020 at 06:34:56PM +0100, Jann Horn wrote:
> > > On Wed, Oct 28, 2020 at 5:49 PM Rich Felker  wrote:
> > > > On Wed, Oct 28, 2020 at 01:42:13PM +0100, Jann Horn wrote:
> > > > > On Wed, Oct 28, 2020 at 12:18 PM Camille Mougey  
> > > > > wrote:
> > > > > You're just focusing on execve() - I think it's important to keep in
> > > > > mind what happens after execve() for normal, dynamically-linked
> > > > > binaries: The next step is that the dynamic linker runs, and it will
> > > > > poke around in the file system with access() and openat() and fstat(),
> > > > > it will mmap() executable libraries into memory, it will mprotect()
> > > > > some memory regions, it will set up thread-local storage (e.g. using
> > > > > arch_prctl(); even if the process is single-threaded), and so on.
> > > > >
> > > > > The earlier you install the seccomp filter, the more of these steps
> > > > > you have to permit in the filter. And if you want the filter to take
> > > > > effect directly after execve(), the syscalls you'll be forced to
> > > > > permit are sufficient to cobble something together in userspace that
> > > > > effectively does almost the same thing as execve().
> > > >
> > > > I would assume you use SECCOMP_RET_USER_NOTIF to implement policy for
> > > > controlling these operations and allowing only the ones that are valid
> > > > during dynamic linking. This also allows you to defer application of
> > > > the filter until after execve. So unless I'm missing some reason why
> > > > this doesn't work, I think the requested functionality is already
> > > > available.
> > >
> > > Ah, yeah, good point.
> > >
> > > > If you really just want the "activate at exec" behavior, it might be
> > > > possible (depending on how SECCOMP_RET_USER_NOTIF behaves when there's
> > > > no notify fd open; I forget)
> > >
> > > syscall returns -ENOSYS. Yeah, that'd probably do the job. (Even
> > > though it might be a bit nicer if userspace had control over the errno
> > > there, such that it could be EPERM instead... oh well.)
> >
> > EPERM is a major bug in current sandbox implementations, so ENOSYS is
> > at least mildly better, but indeed it should be controllable, probably
> > by allowing a code path for the BPF to continue with a jump to a
> > different logic path if the notify listener is missing.
> 
> I guess we might be able to expose the listener status through a bit /
> a field in the struct seccomp_data, and then filters could branch on
> that. (And the kernel would run the filter twice if we raced with
> filter detachment.) I don't know whether it would look pretty, but I
> think it should be doable...

I was thinking the race wouldn't be salvagable, but indeed since the
filter is side-effect-free you can just re-run it if the status
changes between start of filter processing and the attempt at
notification. This sounds like it should work.

I guess it's not possible to chain two BPF filters to do this, because
that only works when the first one allows? Or am I misunderstanding
the multiple-filters case entirely? (I've never gotten that far with
programming it.)

Rich


Re: [PATCH net-next 2/5] net: mscc: ocelot: use ether_addr_copy

2020-10-28 Thread Florian Fainelli



On 10/28/2020 7:27 PM, Vladimir Oltean wrote:
> Since a helper is available for copying Ethernet addresses, let's use it.
> 
> Signed-off-by: Vladimir Oltean 

Reviewed-by: Florian Fainelli 
-- 
Florian


Re: [PATCH net-next 3/5] net: mscc: ocelot: remove the "new" variable in ocelot_port_mdb_add

2020-10-28 Thread Florian Fainelli



On 10/28/2020 7:27 PM, Vladimir Oltean wrote:
> It is Not Needed, a comment will suffice.
> 
> Signed-off-by: Vladimir Oltean 

Reviewed-by: Florian Fainelli 
-- 
Florian


Re: [PATCH net-next 1/5] net: mscc: ocelot: classify L2 mdb entries as LOCKED

2020-10-28 Thread Florian Fainelli



On 10/28/2020 7:27 PM, Vladimir Oltean wrote:
> ocelot.h says:
> 
> /* MAC table entry types.
>  * ENTRYTYPE_NORMAL is subject to aging.
>  * ENTRYTYPE_LOCKED is not subject to aging.
>  * ENTRYTYPE_MACv4 is not subject to aging. For IPv4 multicast.
>  * ENTRYTYPE_MACv6 is not subject to aging. For IPv6 multicast.
>  */
> 
> We don't want the permanent entries added with 'bridge mdb' to be
> subject to aging.
> 
> Signed-off-by: Vladimir Oltean 

Reviewed-by: Florian Fainelli 
-- 
Florian


linux-next: manual merge of the staging tree with the kselftest-fixes tree

2020-10-28 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the staging tree got conflicts in:

  tools/testing/selftests/android/ion/ipcsocket.c
  tools/testing/selftests/android/ion/ipcsocket.h

between commit:

  08c5d41130e5 ("selftests: android: fix multiple definition of sock_name")

from the kselftest-fixes tree and commit:

  e722a295cf49 ("staging: ion: remove from the tree")

from the staging tree.

I fixed it up (I just removed the files) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging.  You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.

-- 
Cheers,
Stephen Rothwell


pgpxJv4klX92x.pgp
Description: OpenPGP digital signature


[PATCH net-next 5/5] net: mscc: ocelot: support L2 multicast entries

2020-10-28 Thread Vladimir Oltean
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.

Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
  fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
  entry. That would be odd because we can only have ~60 PGIDs, but
  thousands of MDB entries. So instead, we want to reserve a multicast
  PGID for every single port combination for multicast traffic. And
  since we can have 2 (or more) MDB entries delivered to the same port
  group (and therefore PGID), we need to reference-count the PGIDs.

Signed-off-by: Vladimir Oltean 
---
 drivers/net/ethernet/mscc/ocelot.c | 109 ++---
 drivers/net/ethernet/mscc/ocelot.h |  16 -
 include/soc/mscc/ocelot.h  |   1 +
 3 files changed, 100 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c 
b/drivers/net/ethernet/mscc/ocelot.c
index 713ab6ec8c8d..323dbd30661a 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -961,10 +961,37 @@ static enum macaccess_entry_type 
ocelot_classify_mdb(const unsigned char *addr)
return ENTRYTYPE_LOCKED;
 }
 
-static int ocelot_mdb_get_pgid(struct ocelot *ocelot,
-  const struct ocelot_multicast *mc)
+static struct ocelot_pgid *ocelot_pgid_alloc(struct ocelot *ocelot, int index,
+unsigned long ports)
 {
-   int pgid;
+   struct ocelot_pgid *pgid;
+
+   pgid = kzalloc(sizeof(*pgid), GFP_KERNEL);
+   if (!pgid)
+   return ERR_PTR(-ENOMEM);
+
+   pgid->ports = ports;
+   pgid->index = index;
+   refcount_set(>refcount, 1);
+   list_add_tail(>list, >pgids);
+
+   return pgid;
+}
+
+static void ocelot_pgid_free(struct ocelot *ocelot, struct ocelot_pgid *pgid)
+{
+   if (!refcount_dec_and_test(>refcount))
+   return;
+
+   list_del(>list);
+   kfree(pgid);
+}
+
+static struct ocelot_pgid *ocelot_mdb_get_pgid(struct ocelot *ocelot,
+  const struct ocelot_multicast 
*mc)
+{
+   struct ocelot_pgid *pgid;
+   int index;
 
/* According to VSC7514 datasheet 3.9.1.5 IPv4 Multicast Entries and
 * 3.9.1.6 IPv6 Multicast Entries, "Instead of a lookup in the
@@ -973,24 +1000,34 @@ static int ocelot_mdb_get_pgid(struct ocelot *ocelot,
 */
if (mc->entry_type == ENTRYTYPE_MACv4 ||
mc->entry_type == ENTRYTYPE_MACv6)
-   return 0;
+   return ocelot_pgid_alloc(ocelot, 0, mc->ports);
+
+   list_for_each_entry(pgid, >pgids, list) {
+   /* When searching for a nonreserved multicast PGID, ignore the
+* dummy PGID of zero that we have for MACv4/MACv6 entries
+*/
+   if (pgid->index && pgid->ports == mc->ports) {
+   refcount_inc(>refcount);
+   return pgid;
+   }
+   }
 
-   for_each_nonreserved_multicast_dest_pgid(ocelot, pgid) {
-   struct ocelot_multicast *mc;
+   /* Search for a free index in the nonreserved multicast PGID area */
+   for_each_nonreserved_multicast_dest_pgid(ocelot, index) {
bool used = false;
 
-   list_for_each_entry(mc, >multicast, list) {
-   if (mc->pgid == pgid) {
+   list_for_each_entry(pgid, >pgids, list) {
+   if (pgid->index == index) {
used = true;
break;
}
}
 
if (!used)
-   return pgid;
+   return ocelot_pgid_alloc(ocelot, index, mc->ports);
}
 
-   return -1;
+   return ERR_PTR(-ENOSPC);
 }
 
 static void ocelot_encode_ports_to_mdb(unsigned char *addr,
@@ -1014,6 +1051,7 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
struct ocelot_port *ocelot_port = ocelot->ports[port];
unsigned char addr[ETH_ALEN];
struct ocelot_multicast *mc;
+   struct 

[PATCH net-next 3/5] net: mscc: ocelot: remove the "new" variable in ocelot_port_mdb_add

2020-10-28 Thread Vladimir Oltean
It is Not Needed, a comment will suffice.

Signed-off-by: Vladimir Oltean 
---
 drivers/net/ethernet/mscc/ocelot.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c 
b/drivers/net/ethernet/mscc/ocelot.c
index 763d0277eeae..ea49d715c9d0 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1017,7 +1017,6 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
unsigned char addr[ETH_ALEN];
struct ocelot_multicast *mc;
u16 vid = mdb->vid;
-   bool new = false;
 
if (port == ocelot->npi)
port = ocelot->num_phys_ports;
@@ -1029,6 +1028,7 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
 
mc = ocelot_multicast_get(ocelot, mdb->addr, vid);
if (!mc) {
+   /* New entry */
int pgid = ocelot_mdb_get_pgid(ocelot, entry_type);
 
if (pgid < 0) {
@@ -1047,10 +1047,7 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
mc->pgid = pgid;
 
list_add_tail(>list, >multicast);
-   new = true;
-   }
-
-   if (!new) {
+   } else {
ocelot_encode_ports_to_mdb(addr, mc, entry_type);
ocelot_mact_forget(ocelot, addr, vid);
}
-- 
2.25.1



[PATCH net-next 4/5] net: mscc: ocelot: make entry_type a member of struct ocelot_multicast

2020-10-28 Thread Vladimir Oltean
This saves a re-classification of the MDB address on deletion.

Signed-off-by: Vladimir Oltean 
---
 drivers/net/ethernet/mscc/ocelot.c | 51 +++---
 drivers/net/ethernet/mscc/ocelot.h | 17 +-
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c 
b/drivers/net/ethernet/mscc/ocelot.c
index ea49d715c9d0..713ab6ec8c8d 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -962,7 +962,7 @@ static enum macaccess_entry_type ocelot_classify_mdb(const 
unsigned char *addr)
 }
 
 static int ocelot_mdb_get_pgid(struct ocelot *ocelot,
-  enum macaccess_entry_type entry_type)
+  const struct ocelot_multicast *mc)
 {
int pgid;
 
@@ -971,8 +971,8 @@ static int ocelot_mdb_get_pgid(struct ocelot *ocelot,
 * destination mask table (PGID), the destination set is programmed as
 * part of the entry MAC address.", and the DEST_IDX is set to 0.
 */
-   if (entry_type == ENTRYTYPE_MACv4 ||
-   entry_type == ENTRYTYPE_MACv6)
+   if (mc->entry_type == ENTRYTYPE_MACv4 ||
+   mc->entry_type == ENTRYTYPE_MACv6)
return 0;
 
for_each_nonreserved_multicast_dest_pgid(ocelot, pgid) {
@@ -994,16 +994,15 @@ static int ocelot_mdb_get_pgid(struct ocelot *ocelot,
 }
 
 static void ocelot_encode_ports_to_mdb(unsigned char *addr,
-  struct ocelot_multicast *mc,
-  enum macaccess_entry_type entry_type)
+  struct ocelot_multicast *mc)
 {
ether_addr_copy(addr, mc->addr);
 
-   if (entry_type == ENTRYTYPE_MACv4) {
+   if (mc->entry_type == ENTRYTYPE_MACv4) {
addr[0] = 0;
addr[1] = mc->ports >> 8;
addr[2] = mc->ports & 0xff;
-   } else if (entry_type == ENTRYTYPE_MACv6) {
+   } else if (mc->entry_type == ENTRYTYPE_MACv6) {
addr[0] = mc->ports >> 8;
addr[1] = mc->ports & 0xff;
}
@@ -1013,7 +1012,6 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
const struct switchdev_obj_port_mdb *mdb)
 {
struct ocelot_port *ocelot_port = ocelot->ports[port];
-   enum macaccess_entry_type entry_type;
unsigned char addr[ETH_ALEN];
struct ocelot_multicast *mc;
u16 vid = mdb->vid;
@@ -1024,12 +1022,20 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
if (!vid)
vid = ocelot_port->pvid;
 
-   entry_type = ocelot_classify_mdb(mdb->addr);
-
mc = ocelot_multicast_get(ocelot, mdb->addr, vid);
if (!mc) {
/* New entry */
-   int pgid = ocelot_mdb_get_pgid(ocelot, entry_type);
+   int pgid;
+
+   mc = devm_kzalloc(ocelot->dev, sizeof(*mc), GFP_KERNEL);
+   if (!mc)
+   return -ENOMEM;
+
+   mc->entry_type = ocelot_classify_mdb(mdb->addr);
+   ether_addr_copy(mc->addr, mdb->addr);
+   mc->vid = vid;
+
+   pgid = ocelot_mdb_get_pgid(ocelot, mc);
 
if (pgid < 0) {
dev_err(ocelot->dev,
@@ -1038,24 +1044,19 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
return -ENOSPC;
}
 
-   mc = devm_kzalloc(ocelot->dev, sizeof(*mc), GFP_KERNEL);
-   if (!mc)
-   return -ENOMEM;
-
-   ether_addr_copy(mc->addr, mdb->addr);
-   mc->vid = vid;
mc->pgid = pgid;
 
list_add_tail(>list, >multicast);
} else {
-   ocelot_encode_ports_to_mdb(addr, mc, entry_type);
+   ocelot_encode_ports_to_mdb(addr, mc);
ocelot_mact_forget(ocelot, addr, vid);
}
 
mc->ports |= BIT(port);
-   ocelot_encode_ports_to_mdb(addr, mc, entry_type);
+   ocelot_encode_ports_to_mdb(addr, mc);
 
-   return ocelot_mact_learn(ocelot, mc->pgid, addr, vid, entry_type);
+   return ocelot_mact_learn(ocelot, mc->pgid, addr, vid,
+mc->entry_type);
 }
 EXPORT_SYMBOL(ocelot_port_mdb_add);
 
@@ -1063,7 +1064,6 @@ int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
const struct switchdev_obj_port_mdb *mdb)
 {
struct ocelot_port *ocelot_port = ocelot->ports[port];
-   enum macaccess_entry_type entry_type;
unsigned char addr[ETH_ALEN];
struct ocelot_multicast *mc;
u16 vid = mdb->vid;
@@ -1078,9 +1078,7 @@ int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
if (!mc)
return -ENOENT;
 
-   entry_type = ocelot_classify_mdb(mdb->addr);
-
-   ocelot_encode_ports_to_mdb(addr, mc, entry_type);
+   ocelot_encode_ports_to_mdb(addr, mc);

[PATCH net-next 0/5] L2 multicast forwarding for Ocelot switch

2020-10-28 Thread Vladimir Oltean
This series enables the mscc_ocelot switch to forward raw L2 (non-IP)
mdb entries as configured by the bridge driver after this patch:

https://patchwork.ozlabs.org/project/netdev/patch/20201028233831.610076-1-vladimir.olt...@nxp.com/

Vladimir Oltean (5):
  net: mscc: ocelot: classify L2 mdb entries as LOCKED
  net: mscc: ocelot: use ether_addr_copy
  net: mscc: ocelot: remove the "new" variable in ocelot_port_mdb_add
  net: mscc: ocelot: make entry_type a member of struct ocelot_multicast
  net: mscc: ocelot: support L2 multicast entries

 drivers/net/ethernet/mscc/ocelot.c | 147 -
 drivers/net/ethernet/mscc/ocelot.h |  31 --
 include/soc/mscc/ocelot.h  |   1 +
 3 files changed, 125 insertions(+), 54 deletions(-)

-- 
2.25.1



Re: Re: [PATCH 1/1] clk: aspeed: modify some default clks are critical

2020-10-28 Thread Samuel Holland
Stephen,

On 10/14/20 12:16 PM, Stephen Boyd wrote:
> Quoting Joel Stanley (2020-10-13 22:28:00)
>> On Wed, 14 Oct 2020 at 02:50, Stephen Boyd  wrote:
>>>
>>> Quoting Ryan Chen (2020-09-28 00:01:08)
 In ASPEED SoC LCLK is LPC clock for all SuperIO device, UART1/UART2 are
 default for Host SuperIO UART device, eSPI clk for Host eSPI bus access
 eSPI slave channel, those clks can't be disable should keep default,
 otherwise will affect Host side access SuperIO and SPI slave device.

 Signed-off-by: Ryan Chen 
 ---
>>>
>>> Is there resolution on this thread?
>>
>> Not yet.
>>
>> We have a system where the BMC (management controller) controls some
>> clocks, but the peripherals that it's clocking are outside the BMC's
>> control. In this case, the host processor us using some UARTs and what
>> not independent of any code running on the BMC.
>>
>> Ryan wants to have them marked as critical so the BMC never powers them down.
>>
>> However, there are systems that don't use this part of the soc, so for
>> those implementations they are not critical and Linux on the BMC can
>> turn them off.
>>
>> Do you have any thoughts? Has anyone solved a similar problem already?
>>
> 
> Is this critical clocks in DT? Where we want to have different DT for
> different device configurations to indicate that some clks should be
> marked critical so they're never turned off and other times they aren't
> so they're turned off?
> 
> It also sounds sort of like the protected-clocks binding. Where you
> don't want to touch certain clks depending on the usage configuration of
> the SoC. There is a patch to make that generic that I haven't applied
> because it looks wrong at first glance[1]. Maybe not registering those
> clks to the framework on the configuration that Ryan has is good enough?

Could you please be more specific than the patch "looks wrong"? I'm more than
happy to update the patch to address your concerns, but I cannot do that unless
I know what your concerns are.

Regards,
Samuel

> [1] https://lore.kernel.org/r/20200903040015.5627-2-sam...@sholland.org


[PATCH net-next 1/5] net: mscc: ocelot: classify L2 mdb entries as LOCKED

2020-10-28 Thread Vladimir Oltean
ocelot.h says:

/* MAC table entry types.
 * ENTRYTYPE_NORMAL is subject to aging.
 * ENTRYTYPE_LOCKED is not subject to aging.
 * ENTRYTYPE_MACv4 is not subject to aging. For IPv4 multicast.
 * ENTRYTYPE_MACv6 is not subject to aging. For IPv6 multicast.
 */

We don't want the permanent entries added with 'bridge mdb' to be
subject to aging.

Signed-off-by: Vladimir Oltean 
---
 drivers/net/ethernet/mscc/ocelot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c 
b/drivers/net/ethernet/mscc/ocelot.c
index 70bf8c67d7ef..25152f1f2939 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -958,7 +958,7 @@ static enum macaccess_entry_type ocelot_classify_mdb(const 
unsigned char *addr)
return ENTRYTYPE_MACv4;
if (addr[0] == 0x33 && addr[1] == 0x33)
return ENTRYTYPE_MACv6;
-   return ENTRYTYPE_NORMAL;
+   return ENTRYTYPE_LOCKED;
 }
 
 static int ocelot_mdb_get_pgid(struct ocelot *ocelot,
-- 
2.25.1



[PATCH net-next 2/5] net: mscc: ocelot: use ether_addr_copy

2020-10-28 Thread Vladimir Oltean
Since a helper is available for copying Ethernet addresses, let's use it.

Signed-off-by: Vladimir Oltean 
---
 drivers/net/ethernet/mscc/ocelot.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c 
b/drivers/net/ethernet/mscc/ocelot.c
index 25152f1f2939..763d0277eeae 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -997,7 +997,7 @@ static void ocelot_encode_ports_to_mdb(unsigned char *addr,
   struct ocelot_multicast *mc,
   enum macaccess_entry_type entry_type)
 {
-   memcpy(addr, mc->addr, ETH_ALEN);
+   ether_addr_copy(addr, mc->addr);
 
if (entry_type == ENTRYTYPE_MACv4) {
addr[0] = 0;
@@ -1042,7 +1042,7 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
if (!mc)
return -ENOMEM;
 
-   memcpy(mc->addr, mdb->addr, ETH_ALEN);
+   ether_addr_copy(mc->addr, mdb->addr);
mc->vid = vid;
mc->pgid = pgid;
 
-- 
2.25.1



Re: [seccomp] Request for a "enable on execve" mode for Seccomp filters

2020-10-28 Thread Rich Felker
On Wed, Oct 28, 2020 at 07:39:41PM +0100, Jann Horn wrote:
> On Wed, Oct 28, 2020 at 7:35 PM Rich Felker  wrote:
> > On Wed, Oct 28, 2020 at 07:25:45PM +0100, Jann Horn wrote:
> > > On Wed, Oct 28, 2020 at 6:52 PM Rich Felker  wrote:
> > > > On Wed, Oct 28, 2020 at 06:34:56PM +0100, Jann Horn wrote:
> > > > > On Wed, Oct 28, 2020 at 5:49 PM Rich Felker  wrote:
> > > > > > On Wed, Oct 28, 2020 at 01:42:13PM +0100, Jann Horn wrote:
> > > > > > > On Wed, Oct 28, 2020 at 12:18 PM Camille Mougey 
> > > > > > >  wrote:
> > > > > > > You're just focusing on execve() - I think it's important to keep 
> > > > > > > in
> > > > > > > mind what happens after execve() for normal, dynamically-linked
> > > > > > > binaries: The next step is that the dynamic linker runs, and it 
> > > > > > > will
> > > > > > > poke around in the file system with access() and openat() and 
> > > > > > > fstat(),
> > > > > > > it will mmap() executable libraries into memory, it will 
> > > > > > > mprotect()
> > > > > > > some memory regions, it will set up thread-local storage (e.g. 
> > > > > > > using
> > > > > > > arch_prctl(); even if the process is single-threaded), and so on.
> > > > > > >
> > > > > > > The earlier you install the seccomp filter, the more of these 
> > > > > > > steps
> > > > > > > you have to permit in the filter. And if you want the filter to 
> > > > > > > take
> > > > > > > effect directly after execve(), the syscalls you'll be forced to
> > > > > > > permit are sufficient to cobble something together in userspace 
> > > > > > > that
> > > > > > > effectively does almost the same thing as execve().
> > > > > >
> > > > > > I would assume you use SECCOMP_RET_USER_NOTIF to implement policy 
> > > > > > for
> > > > > > controlling these operations and allowing only the ones that are 
> > > > > > valid
> > > > > > during dynamic linking. This also allows you to defer application of
> > > > > > the filter until after execve. So unless I'm missing some reason why
> > > > > > this doesn't work, I think the requested functionality is already
> > > > > > available.
> > > > >
> > > > > Ah, yeah, good point.
> > > > >
> > > > > > If you really just want the "activate at exec" behavior, it might be
> > > > > > possible (depending on how SECCOMP_RET_USER_NOTIF behaves when 
> > > > > > there's
> > > > > > no notify fd open; I forget)
> > > > >
> > > > > syscall returns -ENOSYS. Yeah, that'd probably do the job. (Even
> > > > > though it might be a bit nicer if userspace had control over the errno
> > > > > there, such that it could be EPERM instead... oh well.)
> > > >
> > > > EPERM is a major bug in current sandbox implementations, so ENOSYS is
> > > > at least mildly better, but indeed it should be controllable, probably
> > > > by allowing a code path for the BPF to continue with a jump to a
> > > > different logic path if the notify listener is missing.
> > >
> > > I guess we might be able to expose the listener status through a bit /
> > > a field in the struct seccomp_data, and then filters could branch on
> > > that. (And the kernel would run the filter twice if we raced with
> > > filter detachment.) I don't know whether it would look pretty, but I
> > > think it should be doable...
> >
> > I was thinking the race wouldn't be salvagable, but indeed since the
> > filter is side-effect-free you can just re-run it if the status
> > changes between start of filter processing and the attempt at
> > notification. This sounds like it should work.
> >
> > I guess it's not possible to chain two BPF filters to do this, because
> > that only works when the first one allows? Or am I misunderstanding
> > the multiple-filters case entirely? (I've never gotten that far with
> > programming it.)
> 
> I'm not sure if I'm understanding the question correctly...
> At the moment you basically can't have multiple filters with notifiers.
> The rule with multiple filters is always that all the filters get run,
> and the actual action taken is the most restrictive result of all of
> them.

I probably just don't understand how multiple filters work then, which
is pretty much what I expected. But in any case it seems correct that
they're not a tool for solving the problem here.

Rich


[PATCH 5/5] ARM: dts: sun8i: s3: Add dts for the Elimo Initium SBC

2020-10-28 Thread Matteo Scordino
The Elimo Engineering Initium is an Open Source Hardware Single Board
Computer based on the Elimo Impetus SoM.

It is meant as the first development platform for the Impetus, providing
convenient access to the peripherals on the Impetus.

It provides:
USB-C power input
UART-to-USB bridge on the USB-C connector, connected to UART1
USB-A connector for USB2.0 (Host, Device, OTG)
Audio Line In/Out
Pin header to access all signals on the M2 connector of the SoM

Signed-off-by: Matteo Scordino 
---
 arch/arm/boot/dts/Makefile   |  1 +
 arch/arm/boot/dts/sun8i-s3-elimo-initium.dts | 27 
 2 files changed, 28 insertions(+)
 create mode 100644 arch/arm/boot/dts/sun8i-s3-elimo-initium.dts

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 4f0adfead547..dcfb8d39c267 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -1210,6 +1210,7 @@ dtb-$(CONFIG_MACH_SUN8I) += \
sun8i-r40-bananapi-m2-ultra.dtb \
sun8i-s3-lichee-zero-plus.dtb \
sun8i-s3-pinecube.dtb \
+   sun8i-s3-elimo-initium.dtb \
sun8i-t3-cqa3t-bv3.dtb \
sun8i-v3s-licheepi-zero.dtb \
sun8i-v3s-licheepi-zero-dock.dtb \
diff --git a/arch/arm/boot/dts/sun8i-s3-elimo-initium.dts 
b/arch/arm/boot/dts/sun8i-s3-elimo-initium.dts
new file mode 100644
index ..5ddd4dbd636c
--- /dev/null
+++ b/arch/arm/boot/dts/sun8i-s3-elimo-initium.dts
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2020 Matteo Scordino 
+ */
+
+/dts-v1/;
+#include "sun8i-s3-elimo-impetus.dtsi"
+
+/ {
+   model = "Elimo Initium";
+   compatible = "elimo,initium", "elimo,impetus", "allwinner,sun8i-s3";
+
+   aliases {
+   serial0 = 
+   serial1 = 
+   };
+
+   chosen {
+   stdout-path = "serial0:115200n8";
+   };
+};
+
+ {
+   phy-handle = <_mii_phy>;
+   phy-mode = "mii";
+   status = "okay";
+};
-- 
2.20.1



[PATCH 3/5] ARM: dts: sun8i: s3: Add dtsi for the Elimo Impetus SoM

2020-10-28 Thread Matteo Scordino
The Elimo Engineering Impetus is an Open Source Hardware System-on-Module
based on the SoChip S3 SoC.

It is meant for integration into carrier boards or, more generally,
larger designs, and uses an M2 connector to facilitate that.

Interfaces on the M.2/NGFF 42mm connector:
WiFi IEEE 802. 11abgn (on-module Realtek)
Bluetooth 4.2/BLE (on-module Realtek)
RGB LCD Interface (on-module connector)
MIPI Camera Interface (on-module connector)
IEEE 802. 3u Ethernet MAC (external connecto)
USB2.0 (Host, Device, OTG) (external connector)
Audio Line In/Out (external connector)

Signed-off-by: Matteo Scordino 
---
 arch/arm/boot/dts/sun8i-s3-elimo-impetus.dtsi | 51 +++
 1 file changed, 51 insertions(+)
 create mode 100644 arch/arm/boot/dts/sun8i-s3-elimo-impetus.dtsi

diff --git a/arch/arm/boot/dts/sun8i-s3-elimo-impetus.dtsi 
b/arch/arm/boot/dts/sun8i-s3-elimo-impetus.dtsi
new file mode 100644
index ..3550125cf334
--- /dev/null
+++ b/arch/arm/boot/dts/sun8i-s3-elimo-impetus.dtsi
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2020 Matteo Scordino 
+ */
+
+/dts-v1/;
+#include "sun8i-v3.dtsi"
+#include "sunxi-common-regulators.dtsi"
+
+/ {
+   model = "Elimo Impetus SoM";
+   compatible = "elimo,impetus", "sochip,s3", "allwinner,sun8i-s3";
+
+   aliases {
+   serial0 = 
+   serial1 = 
+   };
+
+   chosen {
+   stdout-path = "serial0:115200n8";
+   };
+};
+
+ {
+   broken-cd;
+   bus-width = <4>;
+   vmmc-supply = <_vcc3v3>;
+   status = "okay";
+};
+
+ {
+   pinctrl-0 = <_pb_pins>;
+   pinctrl-names = "default";
+   status = "okay";
+};
+
+ {
+   pinctrl-0 = <_pg_pins>;
+   pinctrl-names = "default";
+   status = "okay";
+};
+
+_otg {
+   dr_mode = "otg";
+   status = "okay";
+};
+
+ {
+   usb0_id_det-gpio = < 5 6 GPIO_ACTIVE_HIGH>;
+   status = "okay";
+};
-- 
2.20.1



Re: [PATCH 4.4 000/112] 4.4.241-rc1 review

2020-10-28 Thread Daniel Díaz
Hello!

On Wed, 28 Oct 2020 at 13:46, Guenter Roeck  wrote:
> On Wed, Oct 28, 2020 at 10:06:21AM -0700, Guenter Roeck wrote:
> > On Tue, Oct 27, 2020 at 02:48:30PM +0100, Greg Kroah-Hartman wrote:
> > > This is the start of the stable review cycle for the 4.4.241 release.
> > > There are 112 patches in this series, all will be posted as a response
> > > to this one.  If anyone has any issues with these being applied, please
> > > let me know.
> > >
> > > Responses should be made by Thu, 29 Oct 2020 13:48:36 +.
> > > Anything received after that time might be too late.
> > >
> >
> > Build results:
> >   total: 165 pass: 165 fail: 0
> > Qemu test results:
> >   total: 332 pass: 332 fail: 0
> >
>
> Did anyone receive the original e-mail ? Looks like I have been tagged as
> spammer, and I am having trouble sending e-mails.

If the original is from 3.5 hours ago, yeah, we got it. I'm not seeing
lore updated, but that's probably another issue.

Greetings!

Daniel Díaz
daniel.d...@linaro.org


RE: [PATCH V3 1/1] nvme: Add quirk for LiteON CL1 devices running FW 220TQ,22001

2020-10-28 Thread Gloria Tsai
Corrected the description of this bug that SSD will not do GC after receiving 
shutdown cmd.
Do GC before shutdown -> delete IO Q -> shutdown from host -> breakup GC -> 
D3hot -> enter PS4 -> have a chance swap block -> use wrong pointer on device 
SRAM -> over program

SSD gets in low power mode only in suspend, this issue only happens in low 
power mode. 
As for Hibernate, power off, unbinding the driver or even warm boot, EC boot, 
SSD won't be in low power state, so there'll be no problem.


Regards,
Gloria Tsai
_

Sales PM Division
Solid State Storage Technology Corporation
TEL: +886-3-612-3888 ext. 2201
E-Mail: gloria.t...@ssstc.com
_

-Original Message-
From: Christoph Hellwig  
Sent: Thursday, October 29, 2020 1:17 AM
To: Jongpil Jung 
Cc: Keith Busch ; Jens Axboe ; Christoph 
Hellwig ; Sagi Grimberg ; 
linux-n...@lists.infradead.org; linux-kernel@vger.kernel.org; Gloria Tsai 
; jongpil19.j...@samsung.com; jongheony@samsung.com; 
dj54.s...@samsung.com
Subject: Re: [PATCH V3 1/1] nvme: Add quirk for LiteON CL1 devices running FW 
220TQ,22001



This message was sent from outside of the company. Please do not click links or 
open attachments unless you recognize the source of this email and know the 
content is safe.


On Wed, Oct 28, 2020 at 06:14:21PM +0900, Jongpil Jung wrote:
> LiteON(SSSTC) CL1 device running FW 220TQ,22001 has bugs with simple 
> suspend.
>
> When NVMe device receive D3hot from host, NVMe firmware will do 
> garbage collection. While NVMe device do Garbage collection, firmware 
> has chance to going incorrect address.

I'm still lost on what this means.  The device doesn't receive 'D3hot'
from the host, it receives a shutdown request.  And we don't just do that when 
hibernating, but also when doing an actual shutdown, a suspend to disk, 
unbinding the driver, etc.  So if the device has a problem with shutdowns we 
really are in trouble.


[PATCH 2/5] ARM: dts: sun8i: V3/S3: Add UART1 pin definitions to the V3/S3 dtsi

2020-10-28 Thread Matteo Scordino
The Allwinner V3 and S3 can use PG6/7 as RX/TX for UART1. Since no other
functions are assigned to those pins, they are a convenient choice for
a debugging or application UART.
This is specific to V3/S3 as the V3s's non-BGA package did not have
those pins.

Signed-off-by: Matteo Scordino 
---
 arch/arm/boot/dts/sun8i-v3.dtsi | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/boot/dts/sun8i-v3.dtsi b/arch/arm/boot/dts/sun8i-v3.dtsi
index ca4672ed2e02..c279e13583ba 100644
--- a/arch/arm/boot/dts/sun8i-v3.dtsi
+++ b/arch/arm/boot/dts/sun8i-v3.dtsi
@@ -24,4 +24,9 @@
 
  {
compatible = "allwinner,sun8i-v3-pinctrl";
+
+   uart1_pg_pins: uart1-pg-pins {
+   pins = "PG6", "PG7";
+   function = "uart1";
+   };
 };
-- 
2.20.1



[PATCH 4/5] dt-bindings: arm: sunxi: add Elimo bindings

2020-10-28 Thread Matteo Scordino
Document board compatible names for Elimo Engineering Impetus and Initium

Signed-off-by: Matteo Scordino 
---
 Documentation/devicetree/bindings/arm/sunxi.yaml | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml 
b/Documentation/devicetree/bindings/arm/sunxi.yaml
index 0f23133672a3..55405809ec91 100644
--- a/Documentation/devicetree/bindings/arm/sunxi.yaml
+++ b/Documentation/devicetree/bindings/arm/sunxi.yaml
@@ -898,3 +898,13 @@ properties:
 items:
   - const: xunlong,orangepi-zero-plus2-h3
   - const: allwinner,sun8i-h3
+
+  - description: Elimo Engineering Impetus SoM
+items:
+  - const: elimo,impetus
+  - const: allwinner,sun8i-s3
+
+  - description: Elimo Engineering Initium
+items:
+  - const: elimo,initium
+  - const: allwinner,sun8i-s3
-- 
2.20.1



[PATCH 1/5] dt-bindings: vendors: add Elimo Engineering vendor prefix

2020-10-28 Thread Matteo Scordino
Add elimo as vendor prefix for dt bindings, since we are adding a dtsi
for a SoM and a dts for an SBC

Signed-off-by: Matteo Scordino 
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml 
b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 2735be1a8470..b877a3516277 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -319,6 +319,8 @@ patternProperties:
 description: Elgin S/A.
   "^elida,.*":
 description: Shenzhen Elida Technology Co., Ltd.
+  "^elimo,.*":
+description: Elimo Engineering Ltd.
   "^embest,.*":
 description: Shenzhen Embest Technology Co., Ltd.
   "^emlid,.*":
-- 
2.20.1



[PATCH 0/5] Elimo Impetus and Initium support

2020-10-28 Thread Matteo Scordino
The Elimo Impetus is an Open Source Hardware System-on-Module based on the 
SoChip S3 SoC.
It is meant for integration into carrier boards or, more generally, larger 
designs, and uses an M2 connector to facilitate that.
The Elimo Initium is a carrier board for the Impetus, together they provide a 
fully open source SBC.

This patch set adds support for both. It it based off the sunxi/dt-for-5.11
branch of the https://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git
tree.

Matteo Scordino (5):
  dt-bindings: vendors: add Elimo Engineering vendor prefix
  ARM: dts: sun8i: V3/S3: Add UART1 pin definitions to the V3/S3 dtsi
  ARM: dts: sun8i: s3: Add dtsi for the Elimo Impetus SoM
  dt-bindings: arm: sunxi: add Elimo bindings
  ARM: dts: sun8i: s3: Add dts for the Elimo Initium SBC

 .../devicetree/bindings/arm/sunxi.yaml| 10 
 .../devicetree/bindings/vendor-prefixes.yaml  |  2 +
 arch/arm/boot/dts/Makefile|  1 +
 arch/arm/boot/dts/sun8i-s3-elimo-impetus.dtsi | 51 +++
 arch/arm/boot/dts/sun8i-s3-elimo-initium.dts  | 27 ++
 arch/arm/boot/dts/sun8i-v3.dtsi   |  5 ++
 6 files changed, 96 insertions(+)
 create mode 100644 arch/arm/boot/dts/sun8i-s3-elimo-impetus.dtsi
 create mode 100644 arch/arm/boot/dts/sun8i-s3-elimo-initium.dts

-- 
2.20.1



[PATCH -next] net: nvidia: forcedeth: remove useless if/else

2020-10-28 Thread Zou Wei
Fix the following coccinelle report:

./drivers/net/ethernet/nvidia/forcedeth.c:3479:8-10:
WARNING: possible condition with no effect (if == else)

Both branches are the same, so remove the else if/else altogether.

Reported-by: Hulk Robot 
Signed-off-by: Zou Wei 
---
 drivers/net/ethernet/nvidia/forcedeth.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c 
b/drivers/net/ethernet/nvidia/forcedeth.c
index 2fc10a3..87ed7e1 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -3476,9 +3476,6 @@ static int nv_update_linkspeed(struct net_device *dev)
} else if (adv_lpa & LPA_10FULL) {
newls = NVREG_LINKSPEED_FORCE|NVREG_LINKSPEED_10;
newdup = 1;
-   } else if (adv_lpa & LPA_10HALF) {
-   newls = NVREG_LINKSPEED_FORCE|NVREG_LINKSPEED_10;
-   newdup = 0;
} else {
newls = NVREG_LINKSPEED_FORCE|NVREG_LINKSPEED_10;
newdup = 0;
-- 
2.6.2



[seccomp] Request for a "enable on execve" mode for Seccomp filters

2020-10-28 Thread Camille Mougey
Hello,

(This is my first message to the kernel list, I hope I'm doing it right)

>From my understanding, there is no way to delay the activation of
seccomp filters, for instance "until an _execve_ call".
But this might be useful, especially for tools who sandbox other,
non-cooperative, executables, such as "systemd" or "FireJail".

It seems to be a caveat of seccomp specific to the system call
_execve_. For now, some tools such as "systemd" explicitly mention
this exception, and do not support it (from the man page):
> Note that strict system call filters may impact execution and error handling 
> code paths of the service invocation. Specifically, access to the execve 
> system call is required for the execution of the service binary — if it is 
> blocked service invocation will necessarily fail

"FireJail" takes a different approach[1], with a kind of workaround:
the project uses an external library to be loaded through LD_PRELOAD
mechanism, in order to install filters during the loader stage.
This approach, a bit hacky, also has several caveats:
* _openat_, _mmap_, etc. must be allowed in order to reach the
LD_PRELOAD mechanism, and for the crafted library to work ;
* it doesn't work for static binaries.

I only see hackish ways to restrict the use of _execve_ in a
non-cooperative executable. These methods seem globally bypassables
and not satisfactory from a security point of view.

IMHO, a way to prepare filter and enable them only on the next
_execve_ would have some benefit:
* have a way to restrict _execve_ in a non-cooperative executable;
* install filters atomically, ie. before the _execve_ system call
return. That would limit racy situations, and have the very firsts
instructions of potentially untrusted binaries already subject to
seccomp filters. It would also ensure there is only one thread running
at the filter enabling time.

>From what I understand, there is a relative use case[2] where the
"enable on exec" mode would also be a solution.

Thanks for your attention,
C. Mougey

[1]: https://github.com/netblue30/firejail/issues/3685
[2]: https://lore.kernel.org/linux-man/202010250759.F9745E0B6@keescook/


Re: [PATCH] platform/x86: remove unneeded break

2020-10-28 Thread Hans de Goede
Hi,

On 10/19/20 3:32 PM, t...@redhat.com wrote:
> From: Tom Rix 
> 
> A break is not needed if it is preceded by a return
> 
> Signed-off-by: Tom Rix 

Thank you for your patch, I've applied this patch to my review-hans 
branch:
https://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git/log/?h=review-hans

Note it will show up there once I've pushed my local branch there,
which might take a while.

Once I've run some tests on this branch the patches there will be
added to the platform-drivers-x86/for-next branch and eventually
will be included in the pdx86 pull-request to Linus for the next
merge-window.

Regards,

Hans

> ---
>  drivers/platform/x86/acer-wmi.c| 1 -
>  drivers/platform/x86/sony-laptop.c | 3 ---
>  drivers/platform/x86/wmi.c | 3 ---
>  3 files changed, 7 deletions(-)
> 
> diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
> index 49f4b73be513..1c2084c74a57 100644
> --- a/drivers/platform/x86/acer-wmi.c
> +++ b/drivers/platform/x86/acer-wmi.c
> @@ -792,7 +792,6 @@ static acpi_status AMW0_set_u32(u32 value, u32 cap)
>   switch (quirks->brightness) {
>   default:
>   return ec_write(0x83, value);
> - break;
>   }
>   default:
>   return AE_ERROR;
> diff --git a/drivers/platform/x86/sony-laptop.c 
> b/drivers/platform/x86/sony-laptop.c
> index e5a1b5533408..704813374922 100644
> --- a/drivers/platform/x86/sony-laptop.c
> +++ b/drivers/platform/x86/sony-laptop.c
> @@ -2467,13 +2467,11 @@ static int __sony_nc_gfx_switch_status_get(void)
>* 0: integrated GFX (stamina)
>*/
>   return result & 0x1 ? SPEED : STAMINA;
> - break;
>   case 0x015B:
>   /* 0: discrete GFX (speed)
>* 1: integrated GFX (stamina)
>*/
>   return result & 0x1 ? STAMINA : SPEED;
> - break;
>   case 0x0128:
>   /* it's a more elaborated bitmask, for now:
>* 2: integrated GFX (stamina)
> @@ -2482,7 +2480,6 @@ static int __sony_nc_gfx_switch_status_get(void)
>   dprintk("GFX Status: 0x%x\n", result);
>   return result & 0x80 ? AUTO :
>   result & 0x02 ? STAMINA : SPEED;
> - break;
>   }
>   return -EINVAL;
>  }
> diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
> index d88f388a3450..44e802f9f1b4 100644
> --- a/drivers/platform/x86/wmi.c
> +++ b/drivers/platform/x86/wmi.c
> @@ -1260,13 +1260,10 @@ acpi_wmi_ec_space_handler(u32 function, 
> acpi_physical_address address,
>   switch (result) {
>   case -EINVAL:
>   return AE_BAD_PARAMETER;
> - break;
>   case -ENODEV:
>   return AE_NOT_FOUND;
> - break;
>   case -ETIME:
>   return AE_TIME;
> - break;
>   default:
>   return AE_OK;
>   }
> 



Re: [PATCH v2] libata: Fix retrieving of active qcs

2020-10-28 Thread Jens Axboe
On 10/28/20 7:55 AM, Pali Rohár wrote:
> On Friday 08 May 2020 07:46:44 Sascha Hauer wrote:
>> From fcdcfa9e7a4ee4faf411de1df4f3c4e12c78545c Mon Sep 17 00:00:00 2001
>> From: Sascha Hauer 
>> Date: Fri, 8 May 2020 07:28:19 +0200
>> Subject: [PATCH] ata: sata_nv: Fix retrieving of active qcs
>>
>> ata_qc_complete_multiple() has to be called with the tags physically
>> active, that is the hw tag is at bit 0. ap->qc_active has the same tag
>> at bit ATA_TAG_INTERNAL instead, so call ata_qc_get_active() to fix that
>> up. This is done in the vein of 8385d756e114 ("libata: Fix retrieving of
>> active qcs").
>>
>> Signed-off-by: Sascha Hauer 
> 
> I tested this second change on nforce4 box with sata_nv controllers:
> 
>   00:07.0 IDE interface [0101]: NVIDIA Corporation CK804 Serial ATA 
> Controller [10de:0054] (rev f3)
>   00:08.0 IDE interface [0101]: NVIDIA Corporation CK804 Serial ATA 
> Controller [10de:0055] (rev f3)
> 
> Both disks are working fine, I do not see any regression or change, so
> you can add my:
> 
> Tested-by: Pali Rohár 
> 
> Ideally add also Fixes line:
> 
> Fixes: 28361c403683 ("libata: add extra internal command")
> 
> Jens, do you need something more from me? Some special tests, etc?

Thanks, I'll queue this up.

-- 
Jens Axboe



Re: [PATCH 0/4] arch, mm: improve robustness of direct map manipulation

2020-10-28 Thread Edgecombe, Rick P
On Wed, 2020-10-28 at 13:09 +0200, Mike Rapoport wrote:
> On Tue, Oct 27, 2020 at 09:46:35AM +0100, David Hildenbrand wrote:
> > On 27.10.20 09:38, Mike Rapoport wrote:
> > > On Mon, Oct 26, 2020 at 06:05:30PM +, Edgecombe, Rick P
> > > wrote:
> > > 
> > > > Beyond whatever you are seeing, for the latter case of new
> > > > things
> > > > getting introduced to an interface with hidden dependencies...
> > > > Another
> > > > edge case could be a new caller to set_memory_np() could result
> > > > in
> > > > large NP pages. None of the callers today should cause this
> > > > AFAICT, but
> > > > it's not great to rely on the callers to know these details.
> > > A caller of set_memory_*() or set_direct_map_*() should expect a
> > > failure
> > > and be ready for that. So adding a WARN to safe_copy_page() is
> > > the first
> > > step in that direction :)
> > > 
> > 
> > I am probably missing something important, but why are we
> > saving/restoring
> > the content of pages that were explicitly removed from the identity
> > mapping
> > such that nobody will access them?
> 
> Actually, we should not be saving/restoring free pages during
> hibernation as there are several calls to mark_free_pages() that
> should
> exclude the free pages from the snapshot. I've tried to find why the
> fix
> that maps/unmaps a page to save it was required at the first place,
> but
> I could not find bug reports.
> 
> The closest I've got is an email from Rafael that asked to update
> "hibernate: handle DEBUG_PAGEALLOC" patch:
> 
> https://lore.kernel.org/linux-pm/200802200133.44098@sisk.pl/
> 
> Could it be that safe_copy_page() tries to workaround a non-existent
> problem?

It looks like inside page_alloc.c it unmaps the page before it actually
frees it, so to hibernate it could look like the page is still
allocated even though it's unmapped? Maybe that small window is what it
cared about initially.

There is also now the vmalloc case, which I am actually working on
expanding. So I think the re-mapping logic is needed.


[PATCH v1] kernel.h: Split out mathematical helpers

2020-10-28 Thread Andy Shevchenko
kernel.h is being used as a dump for all kinds of stuff for a long time.
Here is the attempt to start cleaning it up by splitting out mathematical
helpers.

At the same time convert users in header and lib folder to use new header.
Though for time being include new header back to kernel.h to avoid twisted
indirected includes for existing users.

Signed-off-by: Andy Shevchenko 
---
 fs/nfs/callback_proc.c|   5 +
 include/linux/bitops.h|  11 ++-
 include/linux/dcache.h|   1 +
 include/linux/iommu-helper.h  |   4 +-
 include/linux/kernel.h| 173 +
 include/linux/math.h  | 177 ++
 include/linux/rcu_node_tree.h |   2 +
 include/linux/units.h |   2 +-
 lib/errname.c |   1 +
 lib/errseq.c  |   1 +
 lib/find_bit.c|   3 +-
 lib/math/div64.c  |   3 +-
 lib/math/int_pow.c|   2 +-
 lib/math/int_sqrt.c   |   3 +-
 lib/math/reciprocal_div.c |   9 +-
 15 files changed, 214 insertions(+), 183 deletions(-)
 create mode 100644 include/linux/math.h

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index e61dbc9b86ae..f7786e00a6a7 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -6,10 +6,15 @@
  *
  * NFSv4 callback procedures
  */
+
+#include 
+#include 
 #include 
 #include 
 #include 
 #include 
+#include 
+
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 5ace312bedfa..aad53f399983 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -1,9 +1,12 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_BITOPS_H
 #define _LINUX_BITOPS_H
+
 #include 
 #include 
 
+#include 
+
 /* Set bits in the first 'n' bytes when loaded from memory */
 #ifdef __LITTLE_ENDIAN
 #  define aligned_byte_mask(n) ((1UL << 8*(n))-1)
@@ -12,10 +15,10 @@
 #endif
 
 #define BITS_PER_TYPE(type)(sizeof(type) * BITS_PER_BYTE)
-#define BITS_TO_LONGS(nr)  DIV_ROUND_UP(nr, BITS_PER_TYPE(long))
-#define BITS_TO_U64(nr)DIV_ROUND_UP(nr, BITS_PER_TYPE(u64))
-#define BITS_TO_U32(nr)DIV_ROUND_UP(nr, BITS_PER_TYPE(u32))
-#define BITS_TO_BYTES(nr)  DIV_ROUND_UP(nr, BITS_PER_TYPE(char))
+#define BITS_TO_LONGS(nr)  __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long))
+#define BITS_TO_U64(nr)__KERNEL_DIV_ROUND_UP(nr, 
BITS_PER_TYPE(u64))
+#define BITS_TO_U32(nr)__KERNEL_DIV_ROUND_UP(nr, 
BITS_PER_TYPE(u32))
+#define BITS_TO_BYTES(nr)  __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(char))
 
 extern unsigned int __sw_hweight8(unsigned int w);
 extern unsigned int __sw_hweight16(unsigned int w);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index ffcb7b79f151..81033567f250 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -4,6 +4,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index 70d01edcbf8b..74be34f3a20a 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -3,7 +3,9 @@
 #define _LINUX_IOMMU_HELPER_H
 
 #include 
-#include 
+#include 
+#include 
+#include 
 
 static inline unsigned long iommu_device_max_index(unsigned long size,
   unsigned long offset,
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2f05e9128201..f97ab3283a8b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -2,7 +2,6 @@
 #ifndef _LINUX_KERNEL_H
 #define _LINUX_KERNEL_H
 
-
 #include 
 #include 
 #include 
@@ -11,12 +10,14 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
+
 #include 
-#include 
+
 #include 
 
 #define STACK_MAGIC0xdeadbeef
@@ -54,125 +55,11 @@
 }  \
 )
 
-/*
- * This looks more complex than it should be. But we need to
- * get the type for the ~ right in round_down (it needs to be
- * as wide as the result!), and we want to evaluate the macro
- * arguments just once each.
- */
-#define __round_mask(x, y) ((__typeof__(x))((y)-1))
-/**
- * round_up - round up to next specified power of 2
- * @x: the value to round
- * @y: multiple to round up to (must be a power of 2)
- *
- * Rounds @x up to next multiple of @y (which must be a power of 2).
- * To perform arbitrary rounding up, use roundup() below.
- */
-#define round_up(x, y) x)-1) | __round_mask(x, y))+1)
-/**
- * round_down - round down to next specified power of 2
- * @x: the value to round
- * @y: multiple to round down to (must be a power of 2)
- *
- * Rounds @x down to next multiple of @y (which must be a power of 2).
- * To perform arbitrary rounding down, use rounddown() below.
- */
-#define round_down(x, y) ((x) & ~__round_mask(x, y))
-
 #define typeof_member(T, m)typeof(((T*)0)->m)
 
-#define 

Re: [PATCH] ext4: properly check for dirty state in ext4_inode_datasync_dirty()

2020-10-28 Thread Ritesh Harjani




On 10/28/20 8:59 PM, Theodore Y. Ts'o wrote:

On Wed, Oct 28, 2020 at 08:57:03AM +0530, Ritesh Harjani wrote:


Well, I too noticed this yesterday while I was testing xfstests -g swap.
Those tests were returning _notrun, hence that could be the reason why
it didn't get notice in XFSTESTing from Ted.


Yeah, one of the things I discussed with Harshad is we really need a
test that looks like generic/472, but which is in shared/NNN, and
which unconditionally tries to use swapon for those file systems where
swapfiles are expected to work.  This is actually the second
regression caused by our breaking swapfile support (the other being
the iomap bmap change), which escaped our testing because we didn't
notice that generic/472 was skipped.


Yes, agreed this is second in a row.
So with fast-commit, swap tests returned _not_run, since
swapon syscall returned -EINVAL in _require_scratch_swapfile() itself.
This is due to some old commit in fstests to make swap tests work on
btrfs on both kernels (with and w/o support of swapon in btrfs), it
first checks in _require_scratch_swapfile() to see if swapon even works
or not. Hence it skips to run further if _require_scratch_swapfile()
fails.

Secondly with bmap to iomap interface, I guess it should pass
all tests except for case with fallocate files, which I think is
tests/generic/496. But here too it assumes that if 1st time it fails
with falloc then swapon may not be supported for that fs and hence does
_notrun.

I am actually working on this to make these swap tests return some
definitive pass or failure status. Will be sending some patches soon.
I could use your idea to add a test in shared/NNN for testing swap with
fallocate files for ext4 and xfs (for bmap to iomap ext4 regression
category of tests)

Thanks
-ritesh



[PATCH 0/3] mwifiex: disable ps_mode by default for stability

2020-10-28 Thread Tsuchiya Yuto
Hello all,

On Microsoft Surface devices (PCIe-88W8897), we are observing stability
issues when ps_mode (IEEE power_save) is enabled, then eventually causes
firmware crash. Especially on 5GHz APs, the connection is completely
unstable and almost unusable.

I think the most desirable change is to fix the ps_mode itself. But is
seems to be hard work [1], I'm afraid we have to go this way.

Therefore, the first patch of this series disables the ps_mode by default
instead of enabling it on driver init. I'm not sure if explicitly
disabling it is really required or not. I don't have access to the details
of this chip. Let me know if it's enough to just remove the code that
enables ps_mode.

The Second patch adds a new module parameter named "allow_ps_mode". Since
other wifi drivers just disable power_save by default by module parameter
like this, I also added this.

The third patch adds a message when ps_mode will be changed. Useful when
diagnosing connection issues.

Thanks,
Tsuchiya Yuto

[1] https://bugzilla.kernel.org/show_bug.cgi?id=109681

Tsuchiya Yuto (3):
  mwifiex: disable ps_mode explicitly by default instead
  mwifiex: add allow_ps_mode module parameter
  mwifiex: print message when changing ps_mode

 .../net/wireless/marvell/mwifiex/cfg80211.c   | 23 +++
 .../net/wireless/marvell/mwifiex/sta_cmd.c| 11 ++---
 2 files changed, 31 insertions(+), 3 deletions(-)

-- 
2.29.1



Re: [PATCH] ext4: properly check for dirty state in ext4_inode_datasync_dirty()

2020-10-28 Thread Ritesh Harjani




On 10/28/20 9:18 AM, harshad shirwadkar wrote:

Actually the simpler fix for this in case of fast commits is to check
if the inode is on the fast commit list or not. Since we clear the
fast commit list after every fast and / or full commit, it's always
true that if the inode is not on the list, that means it isn't dirty.
This will simplify the logic here and then we can probably get rid of
i_fc_committed_subtid field altogether. I'll test this and send out a
patch.


Yes, sounds like a better solution. Thanks!

-ritesh


[PATCH 0/4] Add GCC and RPMh clock support for SDX55

2020-10-28 Thread Manivannan Sadhasivam
Hello,

This series adds Global Clock Controller (GCC) and RPMh clock support
for SDX55 SoC from Qualcomm with relevant DT bindings.

This series has been tested on SDX55 MTP board. The dts patches for this
SoC/board will be posted later.

Thanks,
Mani

Manivannan Sadhasivam (1):
  clk: qcom: Add support for SDX55 RPMh clocks

Naveen Yadav (1):
  clk: qcom: Add SDX55 GCC support

Vinod Koul (2):
  dt-bindings: clock: Add SDX55 GCC clock bindings
  dt-bindings: clock: Introduce RPMHCC bindings for SDX55

 .../bindings/clock/qcom,gcc-sdx55.yaml|   71 +
 .../bindings/clock/qcom,rpmhcc.yaml   |1 +
 drivers/clk/qcom/Kconfig  |8 +
 drivers/clk/qcom/Makefile |1 +
 drivers/clk/qcom/clk-rpmh.c   |   20 +
 drivers/clk/qcom/gcc-sdx55.c  | 1667 +
 include/dt-bindings/clock/qcom,gcc-sdx55.h|  112 ++
 include/dt-bindings/clock/qcom,rpmh.h |1 +
 8 files changed, 1881 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/qcom,gcc-sdx55.yaml
 create mode 100644 drivers/clk/qcom/gcc-sdx55.c
 create mode 100644 include/dt-bindings/clock/qcom,gcc-sdx55.h

-- 
2.17.1



Re: [RESEND PATCH 05/42] mfd: axp20x: use PLATFORM_DEVID_NONE

2020-10-28 Thread Chen-Yu Tsai
On Thu, Oct 29, 2020 at 6:30 AM Krzysztof Kozlowski  wrote:
>
> Use PLATFORM_DEVID_NONE define instead of "-1" value because:
>  - it brings some meaning,
>  - it might point attention why auto device ID was not used.
>
> Signed-off-by: Krzysztof Kozlowski 
> Reviewed-by: Andy Shevchenko 

Acked-by: Chen-Yu Tsai 


[PATCH -next] coresight: core: Remove unneeded semicolon

2020-10-28 Thread Zou Wei
Fixes coccicheck warning:

./drivers/hwtracing/coresight/coresight-core.c:421:4-5: Unneeded semicolon

Reported-by: Hulk Robot 
Signed-off-by: Zou Wei 
---
 drivers/hwtracing/coresight/coresight-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwtracing/coresight/coresight-core.c 
b/drivers/hwtracing/coresight/coresight-core.c
index cc9e802..29c83ea 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -418,7 +418,7 @@ static int coresight_enable_source(struct coresight_device 
*csdev, u32 mode)
if (ret) {
coresight_control_assoc_ectdev(csdev, false);
return ret;
-   };
+   }
}
csdev->enable = true;
}
-- 
2.6.2



[PATCH] Staging: rtl8723bs: core: rtw_cmd: Fixed two if-statement coding style issues

2020-10-28 Thread Manuel Palenzuela
Fixed two cases where the if-statement coding style wasn't following the 
guidelines. (rtw_cmd.c)

Signed-off-by: Manuel Palenzuela 
---
 drivers/staging/rtl8723bs/core/rtw_cmd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/rtl8723bs/core/rtw_cmd.c 
b/drivers/staging/rtl8723bs/core/rtw_cmd.c
index 4cf09d947d32..1723473005e7 100644
--- a/drivers/staging/rtl8723bs/core/rtw_cmd.c
+++ b/drivers/staging/rtl8723bs/core/rtw_cmd.c
@@ -344,7 +344,7 @@ int rtw_enqueue_cmd(struct cmd_priv *pcmdpriv, struct 
cmd_obj *cmd_obj)
cmd_obj->padapter = padapter;
 
res = rtw_cmd_filter(pcmdpriv, cmd_obj);
-   if (_FAIL == res) {
+   if (res == _FAIL) {
rtw_free_cmd_obj(cmd_obj);
goto exit;
}
@@ -460,7 +460,7 @@ int rtw_cmd_thread(void *context)
 
cmd_start_time = jiffies;
 
-   if (_FAIL == rtw_cmd_filter(pcmdpriv, pcmd)) {
+   if (rtw_cmd_filter(pcmdpriv, pcmd) == _FAIL) {
pcmd->res = H2C_DROPPED;
goto post_process;
}
-- 
2.26.2



Re: [PATCH 2/2] builddeb: Consolidate consecutive chmod calls into one

2020-10-28 Thread Masahiro Yamada
On Tue, Oct 27, 2020 at 4:32 AM Sven Joachim  wrote:
>
> No need to call chmod three times when it can do everything at once.
>
> Signed-off-by: Sven Joachim 
> ---
>  scripts/package/builddeb | 6 +-
>  1 file changed, 1 insertion(+), 5 deletions(-)
>
> diff --git a/scripts/package/builddeb b/scripts/package/builddeb
> index 91a502bb97e8..81ec6414726c 100755
> --- a/scripts/package/builddeb
> +++ b/scripts/package/builddeb
> @@ -42,11 +42,7 @@ create_package() {
> else
> chown -R root:root "$pdir"
> fi
> -   chmod -R go-w "$pdir"
> -   # in case we are in a restrictive umask environment like 0077
> -   chmod -R a+rX "$pdir"
> -   # in case we build in a setuid/setgid directory
> -   chmod -R ug-s "$pdir"
> +   chmod -R go-w,a+rX,ug-s "$pdir"


You added the comment in 1/2, then
you are deleting it in this patch.

Could you keep the comments for clarification?


# a+rX in case we are in a restrictive umask environment like 0077
# ug-s in case we build in a setuid/setgid directory
chmod -R go-w,a+rX,ug-s "$pdir"






> # Create the package
> dpkg-gencontrol -p$pname -P"$pdir"
> --
> 2.28.0
>


--
Best Regards
Masahiro Yamada


[tip:x86/cleanups] BUILD SUCCESS 0d847ce7c17613d63401ac82336ee1d5df749120

2020-10-28 Thread kernel test robot
 allnoconfig
c6x  allyesconfig
nds32   defconfig
nios2allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
parisc   allyesconfig
s390defconfig
i386 allyesconfig
sparcallyesconfig
sparc   defconfig
i386defconfig
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a002-20201028
i386 randconfig-a005-20201028
i386 randconfig-a003-20201028
i386 randconfig-a001-20201028
i386 randconfig-a004-20201028
i386 randconfig-a006-20201028
x86_64   randconfig-a011-20201028
x86_64   randconfig-a013-20201028
x86_64   randconfig-a016-20201028
x86_64   randconfig-a015-20201028
x86_64   randconfig-a012-20201028
x86_64   randconfig-a014-20201028
x86_64   randconfig-a011-20201026
x86_64   randconfig-a013-20201026
x86_64   randconfig-a016-20201026
x86_64   randconfig-a015-20201026
x86_64   randconfig-a012-20201026
x86_64   randconfig-a014-20201026
i386 randconfig-a016-20201028
i386 randconfig-a014-20201028
i386 randconfig-a015-20201028
i386 randconfig-a013-20201028
i386 randconfig-a012-20201028
i386 randconfig-a011-20201028
riscvnommu_k210_defconfig
riscvallyesconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  kexec

clang tested configs:
x86_64   randconfig-a001-20201028
x86_64   randconfig-a002-20201028
x86_64   randconfig-a003-20201028
x86_64   randconfig-a006-20201028
x86_64   randconfig-a005-20201028
x86_64   randconfig-a004-20201028

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


[PATCH] arm64: dts: exynos: Fix reboot/poweroff issues on Exynos7

2020-10-28 Thread Paweł Chmiel
In vendor sources for Exynos 7420, psci is not used to reboot or
poweroff device. Instead we should use syscon reboot/poweroff.
Previously it was not possible to poweroff (no syscon poweroff node) or
reboot (because it was handled by psci and this way is not working for
Exynos).

Fixes: fb026cb65247 ("arm64: dts: Add reboot node for exynos7")
Fixes: b9024cbc937d ("arm64: dts: Add initial device tree support for exynos7")
Signed-off-by: Paweł Chmiel 
---
 arch/arm64/boot/dts/exynos/exynos7.dtsi | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi 
b/arch/arm64/boot/dts/exynos/exynos7.dtsi
index 959918f4ca45..47d54c369d03 100644
--- a/arch/arm64/boot/dts/exynos/exynos7.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi
@@ -117,8 +117,10 @@ cpu_atlas3: cpu@3 {
};
 
psci {
-   compatible = "arm,psci-0.2";
+   compatible = "arm,psci";
method = "smc";
+   cpu_off = <0x8402>;
+   cpu_on = <0xC403>;
};
 
soc: soc@0 {
@@ -552,6 +554,13 @@ pmu_system_controller: system-controller@105c {
compatible = "samsung,exynos7-pmu", "syscon";
reg = <0x105c 0x5000>;
 
+   poweroff: syscon-poweroff {
+   compatible = "syscon-poweroff";
+   regmap = <_system_controller>;
+   offset = <0x330C>; /* PS_HOLD_CONTROL */
+   mask = <0x5200>; /* reset value */
+   };
+
reboot: syscon-reboot {
compatible = "syscon-reboot";
regmap = <_system_controller>;
-- 
2.27.0



[PATCH v4 2/5] x86/boot/compressed/64: Add CPUID sanity check to early #VC handler

2020-10-28 Thread Joerg Roedel
From: Joerg Roedel 

The early #VC handler which doesn't have a GHCB can only handle CPUID
exit codes. It is needed by the early boot code to handle #VC
exceptions raised in verify_cpu() and to get the position of the C
bit.

But the CPUID information comes from the hypervisor, which is untrusted
and might return results which trick the guest into the no-SEV boot path
with no C bit set in the page-tables. All data written to memory would
then be unencrypted and could leak sensitive data to the hypervisor.

Add sanity checks to the early #VC handlers to make sure the hypervisor
can not pretend that SEV is disabled.

Signed-off-by: Joerg Roedel 
---
 arch/x86/kernel/sev-es-shared.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c
index 5f83ccaab877..56d16c405b03 100644
--- a/arch/x86/kernel/sev-es-shared.c
+++ b/arch/x86/kernel/sev-es-shared.c
@@ -178,6 +178,32 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned 
long exit_code)
goto fail;
regs->dx = val >> 32;
 
+   /*
+* This is a VC handler and the #VC is only raised when SEV-ES is
+* active, which means SEV must be active too. Do sanity checks on the
+* CPUID results to make sure the hypervisor does not trick the kernel
+* into the no-sev path. This could map sensitive data unencrypted and
+* make it accessible to the hypervisor.
+*
+* In particular, check for:
+*  - Hypervisor CPUID bit
+*  - Availability of CPUID leaf 0x801f
+*  - SEV CPUID bit.
+*
+* The hypervisor might still report the wrong C-bit position, but this
+* can't be checked here.
+*/
+
+   if ((fn == 1 && !(regs->cx & BIT(31
+   /* Hypervisor bit */
+   goto fail;
+   else if (fn == 0x8000 && (regs->ax < 0x801f))
+   /* SEV Leaf check */
+   goto fail;
+   else if ((fn == 0x801f && !(regs->ax & BIT(1
+   /* SEV Bit */
+   goto fail;
+
/* Skip over the CPUID two-byte opcode */
regs->ip += 2;
 
-- 
2.28.0



[PATCH v3 2/3] arm64: dts: qcom: sc7180: Add gpu cooling support

2020-10-28 Thread Akhil P Oommen
Add cooling-cells property and the cooling maps for the gpu tzones
to support GPU cooling.

Signed-off-by: Akhil P Oommen 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index d46b383..a7ea029 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -2,7 +2,7 @@
 /*
  * SC7180 SoC device tree source
  *
- * Copyright (c) 2019, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2019-20, The Linux Foundation. All rights reserved.
  */
 
 #include 
@@ -1886,6 +1886,8 @@
operating-points-v2 = <_opp_table>;
qcom,gmu = <>;
 
+   #cooling-cells = <2>;
+
interconnects = <_noc MASTER_GFX3D _virt 
SLAVE_EBI1>;
interconnect-names = "gfx-mem";
 
@@ -3825,16 +3827,16 @@
};
 
gpuss0-thermal {
-   polling-delay-passive = <0>;
+   polling-delay-passive = <100>;
polling-delay = <0>;
 
thermal-sensors = < 13>;
 
trips {
gpuss0_alert0: trip-point0 {
-   temperature = <9>;
+   temperature = <95000>;
hysteresis = <2000>;
-   type = "hot";
+   type = "passive";
};
 
gpuss0_crit: gpuss0_crit {
@@ -3843,19 +3845,26 @@
type = "critical";
};
};
+
+   cooling-maps {
+   map0 {
+   trip = <_alert0>;
+   cooling-device = < THERMAL_NO_LIMIT 
THERMAL_NO_LIMIT>;
+   };
+   };
};
 
gpuss1-thermal {
-   polling-delay-passive = <0>;
+   polling-delay-passive = <100>;
polling-delay = <0>;
 
thermal-sensors = < 14>;
 
trips {
gpuss1_alert0: trip-point0 {
-   temperature = <9>;
+   temperature = <95000>;
hysteresis = <2000>;
-   type = "hot";
+   type = "passive";
};
 
gpuss1_crit: gpuss1_crit {
@@ -3864,6 +3873,13 @@
type = "critical";
};
};
+
+   cooling-maps {
+   map0 {
+   trip = <_alert0>;
+   cooling-device = < THERMAL_NO_LIMIT 
THERMAL_NO_LIMIT>;
+   };
+   };
};
 
aoss1-thermal {
-- 
2.7.4



[tip:master] BUILD SUCCESS 6f72faf4a32303c8bdc6491186b79391e9cf0c7e

2020-10-28 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git  master
branch HEAD: 6f72faf4a32303c8bdc6491186b79391e9cf0c7e  Merge branch 
'locking/urgent'

elapsed time: 721m

configs tested: 94
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm defconfig
arm64allyesconfig
arm64   defconfig
arm  allyesconfig
arm  allmodconfig
m68k apollo_defconfig
powerpc   ebony_defconfig
mipsjmr3927_defconfig
powerpc stx_gp3_defconfig
xtensa  nommu_kc705_defconfig
arm lpc18xx_defconfig
shecovec24-romimage_defconfig
arm   cns3420vb_defconfig
armmvebu_v7_defconfig
powerpc  ppc44x_defconfig
powerpc mpc8315_rdb_defconfig
powerpc   motionpro_defconfig
mips   gcw0_defconfig
powerpc mpc8272_ads_defconfig
sh  r7780mp_defconfig
mipsar7_defconfig
mips   ip22_defconfig
mips  maltasmvp_eva_defconfig
mips  bmips_stb_defconfig
powerpc canyonlands_defconfig
ia64 allmodconfig
ia64defconfig
ia64 allyesconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
nds32   defconfig
nios2allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
nios2   defconfig
arc  allyesconfig
nds32 allnoconfig
c6x  allyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
parisc   allyesconfig
s390defconfig
i386 allyesconfig
sparcallyesconfig
sparc   defconfig
i386defconfig
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a002-20201026
i386 randconfig-a003-20201026
i386 randconfig-a005-20201026
i386 randconfig-a001-20201026
i386 randconfig-a006-20201026
i386 randconfig-a004-20201026
x86_64   randconfig-a011-20201028
x86_64   randconfig-a013-20201028
x86_64   randconfig-a016-20201028
x86_64   randconfig-a015-20201028
x86_64   randconfig-a012-20201028
x86_64   randconfig-a014-20201028
i386 randconfig-a016-20201028
i386 randconfig-a014-20201028
i386 randconfig-a015-20201028
i386 randconfig-a013-20201028
i386 randconfig-a012-20201028
i386 randconfig-a011-20201028
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  kexec

clang tested configs:
x86_64   randconfig-a001-20201028
x86_64   randconfig-a002-20201028
x86_64   randconfig-a003-20201028
x86_64   randconfig-a006-20201028
x86_64   randconfig-a005-20201028
x86_64   randconfig-a004-20201028

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


Re: [PATCH v2 net 5/5] net: ipa: avoid going past end of resource group array

2020-10-28 Thread Willem de Bruijn
On Wed, Oct 28, 2020 at 3:42 PM Alex Elder  wrote:
>
> The minimum and maximum limits for resources assigned to a given
> resource group are programmed in pairs, with the limits for two
> groups set in a single register.
>
> If the number of supported resource groups is odd, only half of the
> register that defines these limits is valid for the last group; that
> group has no second group in the pair.
>
> Currently we ignore this constraint, and it turns out to be harmless,
> but it is not guaranteed to be.  This patch addresses that, and adds
> support for programming the 5th resource group's limits.
>
> Rework how the resource group limit registers are programmed by
> having a single function program all group pairs rather than having
> one function program each pair.  Add the programming of the 4-5
> resource group pair limits to this function.  If a resource group is
> not supported, pass a null pointer to ipa_resource_config_common()
> for that group and have that function write zeroes in that case.
>
> Fixes: cdf2e9419dd91 ("soc: qcom: ipa: main code")
> Tested-by: Sujit Kautkar 
> Signed-off-by: Alex Elder 

Acked-by: Willem de Bruijn 


[PATCH v4 0/5] x86/sev-es: Mitigate some HV attack vectors

2020-10-28 Thread Joerg Roedel
From: Joerg Roedel 

Hi,

here are some enhancements to the SEV(-ES) code in the Linux kernel to
self-protect it against some newly detected hypervisor attacks. There are 3
attacks addressed here:

1) Hypervisor does not present the SEV-enabled bit via CPUID

2) The Hypervisor presents the wrong C-bit position via CPUID

3) An encrypted RAM page is mapped as MMIO in the nested
   page-table, causing #VC exceptions and possible leak of the
   data to the hypervisor or data/code injection from the
   Hypervisor.

The attacks are described in more detail in this paper:

https://arxiv.org/abs/2010.07094

Please review.

Thanks,

Joerg

Changes to v3:

- Addressed Boris' review comments

Changes to v2:

- Use %r8/%r9 to modify %cr4 in sev_verify_cbit()
  and return the new page-table pointer in that function.

Changes to v1:

- Disable CR4.PGE during C-bit test

- Do not safe/restore caller-safed registers in
  set_sev_encryption_mask()

Joerg Roedel (5):
  x86/boot/compressed/64: Introduce sev_status
  x86/boot/compressed/64: Add CPUID sanity check to early #VC handler
  x86/boot/compressed/64: Check SEV encryption in 64-bit boot-path
  x86/head/64: Check SEV encryption before switching to kernel
page-table
  x86/sev-es: Do not support MMIO to/from encrypted memory

 arch/x86/boot/compressed/ident_map_64.c |  1 +
 arch/x86/boot/compressed/mem_encrypt.S  | 20 +-
 arch/x86/boot/compressed/misc.h |  2 +
 arch/x86/kernel/head_64.S   | 16 +
 arch/x86/kernel/sev-es-shared.c | 26 +++
 arch/x86/kernel/sev-es.c| 20 --
 arch/x86/kernel/sev_verify_cbit.S   | 90 +
 arch/x86/mm/mem_encrypt.c   |  1 +
 8 files changed, 168 insertions(+), 8 deletions(-)
 create mode 100644 arch/x86/kernel/sev_verify_cbit.S

-- 
2.28.0



[PATCH v4 5/5] x86/sev-es: Do not support MMIO to/from encrypted memory

2020-10-28 Thread Joerg Roedel
From: Joerg Roedel 

MMIO memory is usually not mapped encrypted, so there is no reason to
support emulated MMIO when it is mapped encrypted.

Prevent a possible hypervisor attack where a RAM page is mapped as
an MMIO page in the nested page-table, so that any guest access to it
will trigger a #VC exception and leak the data on that page to the
hypervisor via the GHCB (like with valid MMIO). On the read side this
attack would allow the HV to inject data into the guest.

Signed-off-by: Joerg Roedel 
---
 arch/x86/kernel/sev-es.c | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 4a96726fbaf8..0bd1a0fc587e 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -374,8 +374,8 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
return ES_EXCEPTION;
 }
 
-static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
-unsigned long vaddr, phys_addr_t *paddr)
+static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct 
es_em_ctxt *ctxt,
+  unsigned long vaddr, phys_addr_t 
*paddr)
 {
unsigned long va = (unsigned long)vaddr;
unsigned int level;
@@ -394,15 +394,19 @@ static bool vc_slow_virt_to_phys(struct ghcb *ghcb, 
struct es_em_ctxt *ctxt,
if (user_mode(ctxt->regs))
ctxt->fi.error_code |= X86_PF_USER;
 
-   return false;
+   return ES_EXCEPTION;
}
 
+   if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
+   /* Emulated MMIO to/from encrypted memory not supported */
+   return ES_UNSUPPORTED;
+
pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
pa |= va & ~page_level_mask(level);
 
*paddr = pa;
 
-   return true;
+   return ES_OK;
 }
 
 /* Include code shared with pre-decompression boot stage */
@@ -731,6 +735,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct 
es_em_ctxt *ctxt,
 {
u64 exit_code, exit_info_1, exit_info_2;
unsigned long ghcb_pa = __pa(ghcb);
+   enum es_result res;
phys_addr_t paddr;
void __user *ref;
 
@@ -740,11 +745,12 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, 
struct es_em_ctxt *ctxt,
 
exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
 
-   if (!vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, )) {
-   if (!read)
+   res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, );
+   if (res != ES_OK) {
+   if (res == ES_EXCEPTION && !read)
ctxt->fi.error_code |= X86_PF_WRITE;
 
-   return ES_EXCEPTION;
+   return res;
}
 
exit_info_1 = paddr;
-- 
2.28.0



Re: [PATCH v3 3/5] x86/boot/compressed/64: Check SEV encryption in 64-bit boot-path

2020-10-28 Thread Joerg Roedel
On Tue, Oct 27, 2020 at 12:08:12PM +0100, Borislav Petkov wrote:
> On Wed, Oct 21, 2020 at 02:39:36PM +0200, Joerg Roedel wrote:
> > diff --git a/arch/x86/kernel/sev_verify_cbit.S 
> > b/arch/x86/kernel/sev_verify_cbit.S
> > new file mode 100644
> > index ..5075458ecad0
> > --- /dev/null
> > +++ b/arch/x86/kernel/sev_verify_cbit.S
> 
> Why a separate file? You're using it just like verify_cpu.S and this is
> kinda verifying CPU so you could simply add the functionality there...

verify_cpu.S is also used on 32bit and this function is 64bit code. It
can be made working with some #ifdef'fery but I think it is cleaner to
just keep it in a separate file, also given that sev_verify_cbit() is
not needed at every place verify_cpu() is called.

> Yeah, can you please use the callee-clobbered registers in the order as
> they're used by the ABI, see arch/x86/entry/calling.h.
> 
> Because I'm looking at this and wondering are rsi, rdx and rcx somehow
> live here and you're avoiding them...

Makes sense, will update the function.

Regards,

Joerg



[PATCH v4 3/5] x86/boot/compressed/64: Check SEV encryption in 64-bit boot-path

2020-10-28 Thread Joerg Roedel
From: Joerg Roedel 

Check whether the hypervisor reported the correct C-bit when running as
an SEV guest. Using a wrong C-bit position could be used to leak
sensitive data from the guest to the hypervisor.

The check function is in arch/x86/kernel/sev_verify_cbit.S so that it
can be re-used in the running kernel image.

Signed-off-by: Joerg Roedel 
---
 arch/x86/boot/compressed/ident_map_64.c |  1 +
 arch/x86/boot/compressed/mem_encrypt.S  |  4 ++
 arch/x86/boot/compressed/misc.h |  2 +
 arch/x86/kernel/sev_verify_cbit.S   | 90 +
 4 files changed, 97 insertions(+)
 create mode 100644 arch/x86/kernel/sev_verify_cbit.S

diff --git a/arch/x86/boot/compressed/ident_map_64.c 
b/arch/x86/boot/compressed/ident_map_64.c
index a5e5db6ada3c..39b2eded7bc2 100644
--- a/arch/x86/boot/compressed/ident_map_64.c
+++ b/arch/x86/boot/compressed/ident_map_64.c
@@ -164,6 +164,7 @@ void initialize_identity_maps(void *rmode)
add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
 
/* Load the new page-table. */
+   sev_verify_cbit(top_level_pgt);
write_cr3(top_level_pgt);
 }
 
diff --git a/arch/x86/boot/compressed/mem_encrypt.S 
b/arch/x86/boot/compressed/mem_encrypt.S
index 0bae1ca658d9..3275dbab085d 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -68,6 +68,9 @@ SYM_FUNC_START(get_sev_encryption_bit)
 SYM_FUNC_END(get_sev_encryption_bit)
 
.code64
+
+#include "../../kernel/sev_verify_cbit.S"
+
 SYM_FUNC_START(set_sev_encryption_mask)
 #ifdef CONFIG_AMD_MEM_ENCRYPT
push%rbp
@@ -111,4 +114,5 @@ SYM_FUNC_END(set_sev_encryption_mask)
.balign 8
 SYM_DATA(sme_me_mask,  .quad 0)
 SYM_DATA(sev_status,   .quad 0)
+SYM_DATA(sev_check_data,   .quad 0)
 #endif
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 6d31f1b4c4d1..d9a631c5973c 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -159,4 +159,6 @@ void boot_page_fault(void);
 void boot_stage1_vc(void);
 void boot_stage2_vc(void);
 
+unsigned long sev_verify_cbit(unsigned long cr3);
+
 #endif /* BOOT_COMPRESSED_MISC_H */
diff --git a/arch/x86/kernel/sev_verify_cbit.S 
b/arch/x86/kernel/sev_verify_cbit.S
new file mode 100644
index ..b96f0573f8af
--- /dev/null
+++ b/arch/x86/kernel/sev_verify_cbit.S
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * sev_verify_cbit.S - Code for verification of the C-bit position reported
+ * by the Hypervisor when running with SEV enabled.
+ *
+ * Copyright (c) 2020  Joerg Roedel (jroe...@suse.de)
+ *
+ * Implements sev_verify_cbit() which is called before switching to a new
+ * long-mode page-table at boot.
+ *
+ * It verifies that the C-bit position is correct by writing a random value to
+ * an encrypted memory location while on the current page-table. Then it
+ * switches to the new page-table to verify the memory content is still the
+ * same. After that it switches back to the current page-table and when the
+ * check succeeded it returns. If the check failed the code invalidates the
+ * stack pointer and goes into a hlt loop. The stack-pointer is invalidated to
+ * make sure no interrupt or exception can get the CPU out of the hlt loop.
+ *
+ * New page-table pointer is expected in %rdi (first parameter)
+ *
+ */
+SYM_FUNC_START(sev_verify_cbit)
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+   /* First check if a C-bit was detected */
+   movqsme_me_mask(%rip), %rsi
+   testq   %rsi, %rsi
+   jz  3f
+
+   /* sme_me_mask != 0 could mean SME or SEV - Check also for SEV */
+   movqsev_status(%rip), %rsi
+   testq   %rsi, %rsi
+   jz  3f
+
+   /* Save CR4 in %rsi */
+   movq%cr4, %rsi
+
+   /* Disable Global Pages */
+   movq%rsi, %rdx
+   andq$(~X86_CR4_PGE), %rdx
+   movq%rdx, %cr4
+
+   /*
+* Verified that running under SEV - now get a random value using
+* RDRAND. This instruction is mandatory when running as an SEV guest.
+*
+* Don't bail out of the loop if RDRAND returns errors. It is better to
+* prevent forward progress than to work with a non-random value here.
+*/
+1: rdrand  %rdx
+   jnc 1b
+
+   /* Store value to memory and keep it in %r10 */
+   movq%rdx, sev_check_data(%rip)
+
+   /* Backup current %cr3 value to restore it later */
+   movq%cr3, %rcx
+
+   /* Switch to new %cr3 - This might unmap the stack */
+   movq%rdi, %cr3
+
+   /*
+* Compare value in %rdx with memory location - If C-Bit is incorrect
+* this would read the encrypted data and make the check fail.
+*/
+   cmpq%rdx, sev_check_data(%rip)
+
+   /* Restore old %cr3 */
+   movq%rcx, %cr3
+
+   /* Restore previous CR4 */
+   movq%rsi, %cr4
+
+   

RE: [PATCH v3] usb: cdns3: Variable 'length' set but not used

2020-10-28 Thread Pawel Laszczak


>
>> Peter,
>>
>> It looks like you missed the " [PATCH v3] usb: cdns3: Variable 'length' set 
>> but
>> not used"
>>
>> It's quite important because compiler complains for this when I use W=1.
>>
>
>Pawel, it is the bug-fix, and located at branch: for-usb-fixes.

But I can't see it in this branch:

https://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git/log/?h=for-usb-fixes=grep=cdns3

I can see there only: usb: cdns3: Rids of duplicate error message.

>
>> Thanks,
>> Pawel
>>
>> >> >
>> >> > A gentle ping.
>> >> >
>> >> > I assume that you should add this and the rest overdue cdsn3
>> >> > patches as first to you ci-for-usb-next branch.
>> >> > Am I right?
>> >> >
>> >>
>> >> Hi Pawel,
>> >>
>> >> I queued them locally, and I waited for v5.10-rc1 which was out
>> >> yesterday, then I will apply them, and add cdns3 patches to my
>> >> kernel.org branch. Will update you these two days.
>> >>
>> >> Peter
>> >
>> >Hi Pawel,
>> >
>> >The cdns3 -next patches pushed to: for-usb-next; cdns3 -fixes patches pushed
>> to: for-usb-fixes.
>> >The git is:
>> >git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git
>> >
>> >Currently, I only pushed three of your patches, would you please review my
>> patches, thanks.
>> >
>> >Peter


Re: [PATCH v6 13/29] arm64/build: Assert for unwanted sections

2020-10-28 Thread Alexei Starovoitov
On Tue, Oct 27, 2020 at 09:15:17PM +0100, Ard Biesheuvel wrote:
> On Tue, 27 Oct 2020 at 21:12, Nick Desaulniers  
> wrote:
> >
> > On Tue, Oct 27, 2020 at 12:25 PM Geert Uytterhoeven
> >  wrote:
> > >
> > > Hi Nick,
> > >
> > > CC Josh
> > >
> > > On Mon, Oct 26, 2020 at 6:49 PM Nick Desaulniers
> > >  wrote:
> > > > On Mon, Oct 26, 2020 at 10:44 AM Geert Uytterhoeven
> > > >  wrote:
> > > > > On Mon, Oct 26, 2020 at 6:39 PM Ard Biesheuvel  
> > > > > wrote:
> > > > > > On Mon, 26 Oct 2020 at 17:01, Geert Uytterhoeven 
> > > > > >  wrote:
> > > > > > > On Mon, Oct 26, 2020 at 2:29 PM Geert Uytterhoeven 
> > > > > > >  wrote:
> > > > > > > > On Mon, Oct 26, 2020 at 1:29 PM Geert Uytterhoeven 
> > > > > > > >  wrote:
> > > > > > > > > I.e. including the ".eh_frame" warning. I have tried 
> > > > > > > > > bisecting that
> > > > > > > > > warning (i.e. with be2881824ae9eb92 reverted), but that leads 
> > > > > > > > > me to
> > > > > > > > > commit b3e5d80d0c48c0cc ("arm64/build: Warn on orphan section
> > > > > > > > > placement"), which is another red herring.
> > > > > > > >
> > > > > > > > kernel/bpf/core.o is the only file containing an eh_frame 
> > > > > > > > section,
> > > > > > > > causing the warning.
> > > >
> > > > When I see .eh_frame, I think -fno-asynchronous-unwind-tables is
> > > > missing from someone's KBUILD_CFLAGS.
> > > > But I don't see anything curious in kernel/bpf/Makefile, unless
> > > > cc-disable-warning is somehow broken.
> > >
> > > I tracked it down to kernel/bpf/core.c:___bpf_prog_run() being tagged
> > > with __no_fgcse aka __attribute__((optimize("-fno-gcse"))).
> > >
> > > Even if the function is trivially empty ("return 0;"), a ".eh_frame" 
> > > section
> > > is generated.  Removing the __no_fgcse tag fixes that.
> >
> > That's weird.  I feel pretty strongly that unless we're working around
> > a well understood compiler bug with a comment that links to a
> > submitted bug report, turning off rando compiler optimizations is a
> > terrible hack for which one must proceed straight to jail; do not pass
> > go; do not collect $200.  But maybe I'd feel differently for this case
> > given the context of the change that added it.  (Ard mentions
> > retpolines+orc+objtool; can someone share the relevant SHA if you have
> > it handy so I don't have to go digging?)
> 
> commit 3193c0836f203a91bef96d88c64cccf0be090d9c
> Author: Josh Poimboeuf 
> Date:   Wed Jul 17 20:36:45 2019 -0500
> 
> bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()
> 
> has
> 
> Fixes: e55a73251da3 ("bpf: Fix ORC unwinding in non-JIT BPF code")

That commit is broken.
I had this patch in my queue:
-#define __no_fgcse __attribute__((optimize("-fno-gcse")))
+#define __no_fgcse 
__attribute__((optimize("-fno-gcse,-fno-omit-frame-pointer")))

Sounds like you want to add -fno-asynchronous-unwind-tables to the above list?

> and mentions objtool and CONFIG_RETPOLINE.
> 
> >  (I feel the same about there
> > being an empty asm(); statement in the definition of asm_volatile_goto
> > for compiler-gcc.h).  Might be time to "fix the compiler."
> >
> > (It sounds like Arvind is both in agreement with my sentiment, and has
> > the root cause).
> >
> 
> I agree that the __no_fgcse hack is terrible. Does Clang support the
> following pragmas?
> 
> #pragma GCC push_options
> #pragma GCC optimize ("-fno-gcse")
> #pragma GCC pop_options

That will work too, but optimize("-fno...,-fno..,-fno..") is imo cleaner.


[PATCH v2] usb: gadget: configfs: Fix use-after-free issue with udc_name

2020-10-28 Thread Macpaul Lin
From: Eddie Hung 

There is a use-after-free issue, if access udc_name
in function gadget_dev_desc_UDC_store after another context
free udc_name in function unregister_gadget.

Context 1:
gadget_dev_desc_UDC_store()->unregister_gadget()->
free udc_name->set udc_name to NULL

Context 2:
gadget_dev_desc_UDC_show()-> access udc_name

Call trace:
dump_backtrace+0x0/0x340
show_stack+0x14/0x1c
dump_stack+0xe4/0x134
print_address_description+0x78/0x478
__kasan_report+0x270/0x2ec
kasan_report+0x10/0x18
__asan_report_load1_noabort+0x18/0x20
string+0xf4/0x138
vsnprintf+0x428/0x14d0
sprintf+0xe4/0x12c
gadget_dev_desc_UDC_show+0x54/0x64
configfs_read_file+0x210/0x3a0
__vfs_read+0xf0/0x49c
vfs_read+0x130/0x2b4
SyS_read+0x114/0x208
el0_svc_naked+0x34/0x38

Add mutex_lock to protect this kind of scenario.

Signed-off-by: Eddie Hung 
Signed-off-by: Macpaul Lin 
Reviewed-by: Peter Chen 
Cc: sta...@vger.kernel.org
---
Changes for v2:
  - Fix typo %s/contex/context, Thanks Peter.

 drivers/usb/gadget/configfs.c |   11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 56051bb..d9743f4 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -221,9 +221,16 @@ static ssize_t gadget_dev_desc_bcdUSB_store(struct 
config_item *item,
 
 static ssize_t gadget_dev_desc_UDC_show(struct config_item *item, char *page)
 {
-   char *udc_name = to_gadget_info(item)->composite.gadget_driver.udc_name;
+   struct gadget_info *gi = to_gadget_info(item);
+   char *udc_name;
+   int ret;
+
+   mutex_lock(>lock);
+   udc_name = gi->composite.gadget_driver.udc_name;
+   ret = sprintf(page, "%s\n", udc_name ?: "");
+   mutex_unlock(>lock);
 
-   return sprintf(page, "%s\n", udc_name ?: "");
+   return ret;
 }
 
 static int unregister_gadget(struct gadget_info *gi)
-- 
1.7.9.5


[PATCH v4 4/5] x86/head/64: Check SEV encryption before switching to kernel page-table

2020-10-28 Thread Joerg Roedel
From: Joerg Roedel 

When SEV is enabled the kernel requests the C-Bit position again from
the hypervisor to built its own page-table. Since the hypervisor is an
untrusted source the C-bit position needs to be verified before the
kernel page-table is used.

Call the sev_verify_cbit() function before writing the CR3.

Signed-off-by: Joerg Roedel 
---
 arch/x86/kernel/head_64.S | 16 
 arch/x86/mm/mem_encrypt.c |  1 +
 2 files changed, 17 insertions(+)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 7eb2a1c87969..3c417734790f 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -161,6 +161,21 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, 
SYM_L_GLOBAL)
 
/* Setup early boot stage 4-/5-level pagetables. */
addqphys_base(%rip), %rax
+
+   /*
+* For SEV guests: Verify that the C-bit is correct. A malicious
+* hypervisor could lie about the C-bit position to perform a ROP
+* attack on the guest by writing to the unencrypted stack and wait for
+* the next RET instruction.
+* %rsi carries pointer to realmode data and is callee-clobbered. Save
+* and restore it.
+*/
+   pushq   %rsi
+   movq%rax, %rdi
+   callsev_verify_cbit
+   popq%rsi
+
+   /* Switch to new page-table */
movq%rax, %cr3
 
/* Ensure I am executing from virtual addresses */
@@ -279,6 +294,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, 
SYM_L_GLOBAL)
 SYM_CODE_END(secondary_startup_64)
 
 #include "verify_cpu.S"
+#include "sev_verify_cbit.S"
 
 #ifdef CONFIG_HOTPLUG_CPU
 /*
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index efbb3de472df..bc0833713be9 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -39,6 +39,7 @@
  */
 u64 sme_me_mask __section(".data") = 0;
 u64 sev_status __section(".data") = 0;
+u64 sev_check_data __section(".data") = 0;
 EXPORT_SYMBOL(sme_me_mask);
 DEFINE_STATIC_KEY_FALSE(sev_enable_key);
 EXPORT_SYMBOL_GPL(sev_enable_key);
-- 
2.28.0



Re: [PATCH] smp: Add bootcpus parameter to boot subset of CPUs

2020-10-28 Thread Sudeep Holla
On Fri, Oct 23, 2020 at 7:24 AM Elliot Berman  wrote:
>
> In a heterogeneous multiprocessor system, specifying the 'maxcpus'
> parameter on kernel command line does not provide sufficient control
> over which CPUs are brought online at kernel boot time, since CPUs may
> have nonuniform performance characteristics. Thus, add bootcpus kernel
> parameter to control which CPUs should be brought online during kernel
> boot. When both maxcpus and bootcpus is set, the more restrictive of the
> two are booted.
>
> Signed-off-by: Elliot Berman 
> ---
>  Documentation/admin-guide/kernel-parameters.txt |  8 +++
>  include/linux/cpu.h |  2 +-
>  kernel/cpu.c|  4 ++--
>  kernel/smp.c| 28 
> +++--
>  4 files changed, 37 insertions(+), 5 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt 
> b/Documentation/admin-guide/kernel-parameters.txt
> index 65d047f..ea31af3 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -449,6 +449,14 @@
>
> See Documentation/admin-guide/bootconfig.rst
>
> +   bootcpus=   [SMP]  List of processors that an SMP kernel
> +   will bring up during bootup. Similar to maxcpus, 
> except
> +   as a cpu list as described above. The more restrictive
> +   of maxcpus and bootcpus applies. If bootcpus=1-3 and
> +   maxcpus=2, only processors 1 and 2 are booted. As with
> +   maxcpus, you can bring up other plugged cpu by 
> executing
> +   "echo 1 > /sys/devices/system/cpu/cpuX/online"
> +

There is a fundamental assumption here that the user of this cmdline
understands how
the logical cpu numbers are allocated for the physical cpus. Based on
the discussion
I understand that you want to boot specific physical cpus for whatever
reasons and
here you want to specify the logical cpu numbers for them. So NACK for
that concept
alone irrespective of whether this concept as a whole is acceptable or not.

-- 
Regards,
Sudeep


[PATCH 3/8] sched: Fix some style issues in test_coresched.c

2020-10-28 Thread John B. Wyatt IV
Line 825: open brace '{' following function definitions go
on the next line.
Line 459: that open brace { should be on the previous line
Line 459: space required before the open parenthesis '('

Issues reported by checkpatch.

There are other issues including over a hundred instances of using spaces
instead of tabs in this file.
I am currently fixing these specific issues in this patch.

Signed-off-by: John B. Wyatt IV 
---
 tools/testing/selftests/sched/test_coresched.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/sched/test_coresched.c 
b/tools/testing/selftests/sched/test_coresched.c
index 91cfb00f15b5..f11ed8370c07 100644
--- a/tools/testing/selftests/sched/test_coresched.c
+++ b/tools/testing/selftests/sched/test_coresched.c
@@ -459,9 +459,8 @@ char *get_task_core_cookie(char *pid)
 sprintf(proc_path, "/proc/%s/sched", pid);
 
 fp = fopen(proc_path, "r");
-while ((fgets(line, 1024, fp)) != NULL)
-{
-if(!strstr(line, "core_cookie"))
+while ((fgets(line, 1024, fp)) != NULL) {
+if (!strstr(line, "core_cookie"))
 continue;
 
 for (j = 0, i = 0; i < 1024 && line[i] != '\0'; i++)
@@ -826,7 +825,8 @@ static void test_prctl_in_group(char *root)
 print_pass();
 }
 
-int main(void) {
+int main(void)
+{
 char *root = make_group(NULL, NULL);
 
 test_cgroup_parent_tag_child_inherit(root);
-- 
2.28.0



[PATCH v4 1/5] x86/boot/compressed/64: Introduce sev_status

2020-10-28 Thread Joerg Roedel
From: Joerg Roedel 

Introduce sev_status and initialize it together with sme_me_mask to have
an indicator which SEV features are enabled.

Signed-off-by: Joerg Roedel 
---
 arch/x86/boot/compressed/mem_encrypt.S | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/mem_encrypt.S 
b/arch/x86/boot/compressed/mem_encrypt.S
index dd07e7b41b11..0bae1ca658d9 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -81,6 +81,19 @@ SYM_FUNC_START(set_sev_encryption_mask)
 
bts %rax, sme_me_mask(%rip) /* Create the encryption mask */
 
+   /*
+* Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in
+* get_sev_encryption_bit() because this function is 32 bit code and
+* shared between 64 bit and 32 bit boot path.
+*/
+   movl$MSR_AMD64_SEV, %ecx/* Read the SEV MSR */
+   rdmsr
+
+   /* Store MSR value in sev_status */
+   shlq$32, %rdx
+   orq %rdx, %rax
+   movq%rax, sev_status(%rip)
+
 .Lno_sev_mask:
movq%rbp, %rsp  /* Restore original stack pointer */
 
@@ -96,5 +109,6 @@ SYM_FUNC_END(set_sev_encryption_mask)
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
.balign 8
-SYM_DATA(sme_me_mask, .quad 0)
+SYM_DATA(sme_me_mask,  .quad 0)
+SYM_DATA(sev_status,   .quad 0)
 #endif
-- 
2.28.0



[PATCH v3 1/3] drm/msm: Add support for GPU cooling

2020-10-28 Thread Akhil P Oommen
Register GPU as a devfreq cooling device so that it can be passively
cooled by the thermal framework.

Signed-off-by: Akhil P Oommen 
---
Changes in v3:
1. Minor fix in binding documentation (RobH)
Changes in v2:
1. Update the dt bindings documentation
 drivers/gpu/drm/msm/msm_gpu.c | 12 
 drivers/gpu/drm/msm/msm_gpu.h |  2 ++
 2 files changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 55d1648..9f9db46 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -107,9 +108,18 @@ static void msm_devfreq_init(struct msm_gpu *gpu)
if (IS_ERR(gpu->devfreq.devfreq)) {
DRM_DEV_ERROR(>pdev->dev, "Couldn't initialize GPU 
devfreq\n");
gpu->devfreq.devfreq = NULL;
+   return;
}
 
devfreq_suspend_device(gpu->devfreq.devfreq);
+
+   gpu->cooling = of_devfreq_cooling_register(gpu->pdev->dev.of_node,
+   gpu->devfreq.devfreq);
+   if (IS_ERR(gpu->cooling)) {
+   DRM_DEV_ERROR(>pdev->dev,
+   "Couldn't register GPU cooling device\n");
+   gpu->cooling = NULL;
+   }
 }
 
 static int enable_pwrrail(struct msm_gpu *gpu)
@@ -1005,4 +1015,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu);
msm_gem_address_space_put(gpu->aspace);
}
+
+   devfreq_cooling_unregister(gpu->cooling);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 6c9e1fd..9a8f20d 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -147,6 +147,8 @@ struct msm_gpu {
struct msm_gpu_state *crashstate;
/* True if the hardware supports expanded apriv (a650 and newer) */
bool hw_apriv;
+
+   struct thermal_cooling_device *cooling;
 };
 
 static inline struct msm_gpu *dev_to_gpu(struct device *dev)
-- 
2.7.4



Re: [PATCH 1/2] ASoC: codecs: wcd934x: Set digital gain range correctly

2020-10-28 Thread Mark Brown
On Wed, 28 Oct 2020 15:43:39 +, Srinivas Kandagatla wrote:
> digital gain range is -84dB min to 40dB max, however this was not
> correctly specified in the range.
> 
> Fix this by with correct range!

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next

Thanks!

[1/2] ASoC: codecs: wcd934x: Set digital gain range correctly
  commit: fc0522bbe02fa4beb95c0514ace66b585616f111
[2/2] ASoC: codecs: wcd9335: Set digital gain range correctly
  commit: 6d6bc54ab4f2404d46078abc04bf4dee4db01def

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark


Re: [PATCH] dcookies: Make dcookies depend on CONFIG_OPROFILE

2020-10-28 Thread Arnd Bergmann
On Wed, Oct 28, 2020 at 5:34 PM William Cohen  wrote:
>
> On 10/27/20 12:54 PM, Linus Torvalds wrote:
> > On Tue, Oct 27, 2020 at 1:52 AM Christoph Hellwig  
> > wrote:
> >>
> >> Is it time to deprecate and eventually remove oprofile while we're at
> >> it?
> >
> > I think it's well past time.
> >
> > I think the user-space "oprofile" program doesn't actually use the
> > legacy kernel code any more, and hasn't for a long time.
> >
> > But I might be wrong. Adding William Cohen to the cc, since he seems
> > to still maintain it to make sure it builds etc.
>
> Yes, current OProfile code uses the existing linux perf infrastructure and
> doesn't use the old oprofile kernel code.  I have thought about removing
> that old oprofile driver code from kernel, but have not submitted patches
> for it. I would be fine with eliminating that code from the kernel.

I notice that arch/ia64/ supports oprofile but not perf. I suppose this just
means that ia64 people no longer care enough about profiling to
add perf support, but it wouldn't stop us from dropping it, right?

There is also a stub implementation of oprofile for microblaze
and no perf code, not sure if it would make any difference for them.

Everything else that has oprofile kernel code also supports perf.

   Arnd


Re: [PATCH] powerpc/smp: Move rcu_cpu_starting() earlier

2020-10-28 Thread Paul E. McKenney
On Wed, Oct 28, 2020 at 02:23:34PM -0400, Qian Cai wrote:
> The call to rcu_cpu_starting() in start_secondary() is not early enough
> in the CPU-hotplug onlining process, which results in lockdep splats as
> follows:
> 
>  WARNING: suspicious RCU usage
>  -
>  kernel/locking/lockdep.c:3497 RCU-list traversed in non-reader section!!
> 
>  other info that might help us debug this:
> 
>  RCU used illegally from offline CPU!
>  rcu_scheduler_active = 1, debug_locks = 1
>  no locks held by swapper/1/0.
> 
>  Call Trace:
>  dump_stack+0xec/0x144 (unreliable)
>  lockdep_rcu_suspicious+0x128/0x14c
>  __lock_acquire+0x1060/0x1c60
>  lock_acquire+0x140/0x5f0
>  _raw_spin_lock_irqsave+0x64/0xb0
>  clockevents_register_device+0x74/0x270
>  register_decrementer_clockevent+0x94/0x110
>  start_secondary+0x134/0x800
>  start_secondary_prolog+0x10/0x14
> 
> This is avoided by moving the call to rcu_cpu_starting up near the
> beginning of the start_secondary() function. Note that the
> raw_smp_processor_id() is required in order to avoid calling into
> lockdep before RCU has declared the CPU to be watched for readers.
> 
> Link: 
> https://lore.kernel.org/lkml/160223032121.7002.1269740091547117869.tip-bot2@tip-bot2/
> Signed-off-by: Qian Cai 

Acked-by: Paul E. McKenney 

> ---
>  arch/powerpc/kernel/smp.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 3c6b9822f978..8c2857cbd960 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -1393,13 +1393,14 @@ static void add_cpu_to_masks(int cpu)
>  /* Activate a secondary processor. */
>  void start_secondary(void *unused)
>  {
> - unsigned int cpu = smp_processor_id();
> + unsigned int cpu = raw_smp_processor_id();
>  
>   mmgrab(_mm);
>   current->active_mm = _mm;
>  
>   smp_store_cpu_info(cpu);
>   set_dec(tb_ticks_per_jiffy);
> + rcu_cpu_starting(cpu);
>   preempt_disable();
>   cpu_callin_map[cpu] = 1;
>  
> -- 
> 2.28.0
> 


Re: [PATCH] hugetlb_cgroup: fix reservation accounting

2020-10-28 Thread Mina Almasry
On Thu, Oct 22, 2020 at 5:21 AM Michael S. Tsirkin  wrote:
>
> On Wed, Oct 21, 2020 at 01:44:26PM -0700, Mike Kravetz wrote:
> > Michal Privoznik was using "free page reporting" in QEMU/virtio-balloon
> > with hugetlbfs and hit the warning below.  QEMU with free page hinting
> > uses fallocate(FALLOC_FL_PUNCH_HOLE) to discard pages that are reported
> > as free by a VM. The reporting granularity is in pageblock granularity.
> > So when the guest reports 2M chunks, we fallocate(FALLOC_FL_PUNCH_HOLE)
> > one huge page in QEMU.
> >
> > [  315.251417] [ cut here ]
> > [  315.251424] WARNING: CPU: 7 PID: 6636 at mm/page_counter.c:57 
> > page_counter_uncharge+0x4b/0x50
> > [  315.251425] Modules linked in: ...
> > [  315.251466] CPU: 7 PID: 6636 Comm: qemu-system-x86 Not tainted 5.9.0 #137
> > [  315.251467] Hardware name: Gigabyte Technology Co., Ltd. X570 AORUS 
> > PRO/X570 AORUS PRO, BIOS F21 07/31/2020
> > [  315.251469] RIP: 0010:page_counter_uncharge+0x4b/0x50
> > ...
> > [  315.251479] Call Trace:
> > [  315.251485]  hugetlb_cgroup_uncharge_file_region+0x4b/0x80
> > [  315.251487]  region_del+0x1d3/0x300
> > [  315.251489]  hugetlb_unreserve_pages+0x39/0xb0
> > [  315.251492]  remove_inode_hugepages+0x1a8/0x3d0
> > [  315.251495]  ? tlb_finish_mmu+0x7a/0x1d0
> > [  315.251497]  hugetlbfs_fallocate+0x3c4/0x5c0
> > [  315.251519]  ? kvm_arch_vcpu_ioctl_run+0x614/0x1700 [kvm]
> > [  315.251522]  ? file_has_perm+0xa2/0xb0
> > [  315.251524]  ? inode_security+0xc/0x60
> > [  315.251525]  ? selinux_file_permission+0x4e/0x120
> > [  315.251527]  vfs_fallocate+0x146/0x290
> > [  315.251529]  __x64_sys_fallocate+0x3e/0x70
> > [  315.251531]  do_syscall_64+0x33/0x40
> > [  315.251533]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> > ...
> > [  315.251542] ---[ end trace 4c88c62ccb1349c9 ]---
> >
> > Investigation of the issue uncovered bugs in hugetlb cgroup reservation
> > accounting.  This patch addresses the found issues.
> >
> > Fixes: 075a61d07a8e ("hugetlb_cgroup: add accounting for shared mappings")
> > Cc: 
> > Reported-by: Michal Privoznik 
> > Co-developed-by: David Hildenbrand 
> > Signed-off-by: David Hildenbrand 
> > Signed-off-by: Mike Kravetz 
>
> Acked-by: Michael S. Tsirkin 
>
> > ---
> >  mm/hugetlb.c | 20 +++-
> >  1 file changed, 11 insertions(+), 9 deletions(-)
> >
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index 67fc6383995b..b853a11de14f 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -655,6 +655,8 @@ static long region_del(struct resv_map *resv, long f, 
> > long t)
> >   }
> >
> >   del += t - f;
> > + hugetlb_cgroup_uncharge_file_region(
> > + resv, rg, t - f);
> >
> >   /* New entry for end of split region */
> >   nrg->from = t;
> > @@ -667,9 +669,6 @@ static long region_del(struct resv_map *resv, long f, 
> > long t)
> >   /* Original entry is trimmed */
> >   rg->to = f;
> >
> > - hugetlb_cgroup_uncharge_file_region(
> > - resv, rg, nrg->to - nrg->from);
> > -
> >   list_add(>link, >link);
> >   nrg = NULL;
> >   break;
> > @@ -685,17 +684,17 @@ static long region_del(struct resv_map *resv, long f, 
> > long t)
> >   }
> >
> >   if (f <= rg->from) {/* Trim beginning of region */
> > - del += t - rg->from;
> > - rg->from = t;
> > -
> >   hugetlb_cgroup_uncharge_file_region(resv, rg,
> >   t - rg->from);
> > - } else {/* Trim end of region */
> > - del += rg->to - f;
> > - rg->to = f;
> >
> > + del += t - rg->from;
> > + rg->from = t;
> > + } else {/* Trim end of region */
> >   hugetlb_cgroup_uncharge_file_region(resv, rg,
> >   rg->to - f);
> > +
> > + del += rg->to - f;
> > + rg->to = f;
> >   }
> >   }
> >
> > @@ -2454,6 +2453,9 @@ struct page *alloc_huge_page(struct vm_area_struct 
> > *vma,
> >
> >   rsv_adjust = hugepage_subpool_put_pages(spool, 1);
> >   hugetlb_acct_memory(h, -rsv_adjust);
> > + if (deferred_reserve)
> > + hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
> > + pages_per_huge_page(h), page);
> >   }
> >   return page;
> >
> > --
> > 2.25.4
>

Sorry for the late review. Looks good to me.

Reviewed-by: Mina Almasry 


Re: [PATCH RFC v2 12/21] kasan: inline and rename kasan_unpoison_memory

2020-10-28 Thread Dmitry Vyukov
On Thu, Oct 22, 2020 at 3:19 PM Andrey Konovalov  wrote:
>
> Currently kasan_unpoison_memory() is used as both an external annotation
> and as internal memory poisoning helper. Rename external annotation to
> kasan_unpoison_data() and inline the internal helper for for hardware
> tag-based mode to avoid undeeded function calls.
>
> There's the external annotation kasan_unpoison_slab() that is currently
> defined as static inline and uses kasan_unpoison_memory(). With this
> change it's turned into a function call. Overall, this results in the
> same number of calls for hardware tag-based mode as
> kasan_unpoison_memory() is now inlined.

Can't we leave kasan_unpoison_slab as is? Or there are other reasons
to uninline it?
It seems that uninling it is orthogonal to the rest of this patch.

> Signed-off-by: Andrey Konovalov 
> Link: 
> https://linux-review.googlesource.com/id/Ia7c8b659f79209935cbaab3913bf7f082cc43a0e
> ---
>  include/linux/kasan.h | 16 ++--
>  kernel/fork.c |  2 +-
>  mm/kasan/common.c | 10 ++
>  mm/kasan/hw_tags.c|  6 --
>  mm/kasan/kasan.h  |  7 +++
>  mm/slab_common.c  |  2 +-
>  6 files changed, 25 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/kasan.h b/include/linux/kasan.h
> index 6377d7d3a951..2b9023224474 100644
> --- a/include/linux/kasan.h
> +++ b/include/linux/kasan.h
> @@ -66,14 +66,15 @@ static inline void kasan_disable_current(void) {}
>
>  #ifdef CONFIG_KASAN
>
> -void kasan_unpoison_memory(const void *address, size_t size);
> -
>  void kasan_alloc_pages(struct page *page, unsigned int order);
>  void kasan_free_pages(struct page *page, unsigned int order);
>
>  void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
> slab_flags_t *flags);
>
> +void kasan_unpoison_data(const void *address, size_t size);
> +void kasan_unpoison_slab(const void *ptr);
> +
>  void kasan_poison_slab(struct page *page);
>  void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
>  void kasan_poison_object_data(struct kmem_cache *cache, void *object);
> @@ -98,11 +99,6 @@ struct kasan_cache {
> int free_meta_offset;
>  };
>
> -size_t __ksize(const void *);
> -static inline void kasan_unpoison_slab(const void *ptr)
> -{
> -   kasan_unpoison_memory(ptr, __ksize(ptr));
> -}
>  size_t kasan_metadata_size(struct kmem_cache *cache);
>
>  bool kasan_save_enable_multi_shot(void);
> @@ -110,8 +106,6 @@ void kasan_restore_multi_shot(bool enabled);
>
>  #else /* CONFIG_KASAN */
>
> -static inline void kasan_unpoison_memory(const void *address, size_t size) {}
> -
>  static inline void kasan_alloc_pages(struct page *page, unsigned int order) 
> {}
>  static inline void kasan_free_pages(struct page *page, unsigned int order) {}
>
> @@ -119,6 +113,9 @@ static inline void kasan_cache_create(struct kmem_cache 
> *cache,
>   unsigned int *size,
>   slab_flags_t *flags) {}
>
> +static inline void kasan_unpoison_data(const void *address, size_t size) { }
> +static inline void kasan_unpoison_slab(const void *ptr) { }
> +
>  static inline void kasan_poison_slab(struct page *page) {}
>  static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
> void *object) {}
> @@ -158,7 +155,6 @@ static inline bool kasan_slab_free(struct kmem_cache *s, 
> void *object,
> return false;
>  }
>
> -static inline void kasan_unpoison_slab(const void *ptr) { }
>  static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 
> 0; }
>
>  #endif /* CONFIG_KASAN */
> diff --git a/kernel/fork.c b/kernel/fork.c
> index b41fecca59d7..858d78eee6ec 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -225,7 +225,7 @@ static unsigned long *alloc_thread_stack_node(struct 
> task_struct *tsk, int node)
> continue;
>
> /* Mark stack accessible for KASAN. */
> -   kasan_unpoison_memory(s->addr, THREAD_SIZE);
> +   kasan_unpoison_data(s->addr, THREAD_SIZE);
>
> /* Clear stale pointers from reused stack. */
> memset(s->addr, 0, THREAD_SIZE);
> diff --git a/mm/kasan/common.c b/mm/kasan/common.c
> index 9008fc6b0810..1a5e6c279a72 100644
> --- a/mm/kasan/common.c
> +++ b/mm/kasan/common.c
> @@ -184,6 +184,16 @@ struct kasan_free_meta *kasan_get_free_meta(struct 
> kmem_cache *cache,
> return (void *)reset_tag(object) + cache->kasan_info.free_meta_offset;
>  }
>
> +void kasan_unpoison_data(const void *address, size_t size)
> +{
> +   kasan_unpoison_memory(address, size);
> +}
> +
> +void kasan_unpoison_slab(const void *ptr)
> +{
> +   kasan_unpoison_memory(ptr, __ksize(ptr));
> +}
> +
>  void kasan_poison_slab(struct page *page)
>  {
> unsigned long i;
> diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
> index f03161f3da19..915142da6b57 100644
> --- 

[PATCH -next] drm/i915: Remove unused variable ret

2020-10-28 Thread Zou Wei
This patch fixes below warnings reported by coccicheck

./drivers/gpu/drm/i915/i915_debugfs.c:789:5-8: Unneeded variable: "ret". Return 
"0" on line 1012

Reported-by: Hulk Robot 
Signed-off-by: Zou Wei 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index ea46916..200f6b8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -786,7 +786,6 @@ static int i915_frequency_info(struct seq_file *m, void 
*unused)
struct intel_uncore *uncore = _priv->uncore;
struct intel_rps *rps = _priv->gt.rps;
intel_wakeref_t wakeref;
-   int ret = 0;
 
wakeref = intel_runtime_pm_get(_priv->runtime_pm);
 
@@ -1009,7 +1008,7 @@ static int i915_frequency_info(struct seq_file *m, void 
*unused)
seq_printf(m, "Max pixel clock frequency: %d kHz\n", 
dev_priv->max_dotclk_freq);
 
intel_runtime_pm_put(_priv->runtime_pm, wakeref);
-   return ret;
+   return 0;
 }
 
 static int i915_ring_freq_table(struct seq_file *m, void *unused)
-- 
2.6.2



Re: [PATCH] dcookies: Make dcookies depend on CONFIG_OPROFILE

2020-10-28 Thread William Cohen
On 10/27/20 12:54 PM, Linus Torvalds wrote:
> On Tue, Oct 27, 2020 at 1:52 AM Christoph Hellwig  wrote:
>>
>> Is it time to deprecate and eventually remove oprofile while we're at
>> it?
> 
> I think it's well past time.
> 
> I think the user-space "oprofile" program doesn't actually use the
> legacy kernel code any more, and hasn't for a long time.
> 
> But I might be wrong. Adding William Cohen to the cc, since he seems
> to still maintain it to make sure it builds etc.
> 
>  Linus
> 

Hi,

Yes, current OProfile code uses the existing linux perf infrastructure and 
doesn't use the old oprofile kernel code.  I have thought about removing that 
old oprofile driver code from kernel, but have not submitted patches for it. I 
would be fine with eliminating that code from the kernel.

-Will



Re: [seccomp] Request for a "enable on execve" mode for Seccomp filters

2020-10-28 Thread Rich Felker
On Wed, Oct 28, 2020 at 06:34:56PM +0100, Jann Horn wrote:
> On Wed, Oct 28, 2020 at 5:49 PM Rich Felker  wrote:
> > On Wed, Oct 28, 2020 at 01:42:13PM +0100, Jann Horn wrote:
> > > On Wed, Oct 28, 2020 at 12:18 PM Camille Mougey  wrote:
> > > You're just focusing on execve() - I think it's important to keep in
> > > mind what happens after execve() for normal, dynamically-linked
> > > binaries: The next step is that the dynamic linker runs, and it will
> > > poke around in the file system with access() and openat() and fstat(),
> > > it will mmap() executable libraries into memory, it will mprotect()
> > > some memory regions, it will set up thread-local storage (e.g. using
> > > arch_prctl(); even if the process is single-threaded), and so on.
> > >
> > > The earlier you install the seccomp filter, the more of these steps
> > > you have to permit in the filter. And if you want the filter to take
> > > effect directly after execve(), the syscalls you'll be forced to
> > > permit are sufficient to cobble something together in userspace that
> > > effectively does almost the same thing as execve().
> >
> > I would assume you use SECCOMP_RET_USER_NOTIF to implement policy for
> > controlling these operations and allowing only the ones that are valid
> > during dynamic linking. This also allows you to defer application of
> > the filter until after execve. So unless I'm missing some reason why
> > this doesn't work, I think the requested functionality is already
> > available.
> 
> Ah, yeah, good point.
> 
> > If you really just want the "activate at exec" behavior, it might be
> > possible (depending on how SECCOMP_RET_USER_NOTIF behaves when there's
> > no notify fd open; I forget)
> 
> syscall returns -ENOSYS. Yeah, that'd probably do the job. (Even
> though it might be a bit nicer if userspace had control over the errno
> there, such that it could be EPERM instead... oh well.)

EPERM is a major bug in current sandbox implementations, so ENOSYS is
at least mildly better, but indeed it should be controllable, probably
by allowing a code path for the BPF to continue with a jump to a
different logic path if the notify listener is missing.

Rich


Re: [PATCH v2 06/15] perf session: load data directory into tool process memory

2020-10-28 Thread Jiri Olsa
On Tue, Oct 27, 2020 at 05:43:20PM +0300, Alexey Budankov wrote:
> 
> On 27.10.2020 15:21, Jiri Olsa wrote:
> > On Tue, Oct 27, 2020 at 10:37:58AM +0300, Alexey Budankov wrote:
> >>
> >> On 24.10.2020 18:43, Jiri Olsa wrote:
> >>> On Wed, Oct 21, 2020 at 07:01:19PM +0300, Alexey Budankov wrote:
> 
>  Read trace files located in data directory into tool process memory.
>  Basic analysis support of data directories is provided for report
>  mode. Raw dump (-D) and aggregated reports are available for data
>  directories, still with no memory consumption optimizations. However
>  data directories collected with --compression-level option enabled
>  can be analyzed with little less memory because trace files are
>  unmaped from tool process memory after loading collected data.
>  The implementation is based on the prototype [1], [2].
> 
>  [1] git clone 
>  https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git -b 
>  perf/record_threads
>  [2] https://lore.kernel.org/lkml/20180913125450.21342-1-jo...@kernel.org/
> 
>  Suggested-by: Jiri Olsa 
> >>>
> >>> very loosely ;-) so there was a reason for all that reader refactoring,
> >>> so we could have __perf_session__process_dir_events function:
> >>>
> >>>   
> >>> https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git/commit/?h=perf/record_threads=308aa7cff1fed335401cfc02c7bac1a4644af68e
> >>
> >> Nonetheless. All that are necessary parts to make threaded data streaming
> >> and analysis eventually merged into the mainline as joint Perf developers
> >> community effort.
> >>
> >>>
> >>> when reporting the threaded record data on really big servers,
> >>> you will run out of memory, so you need to read and flush all
> >>> the files together by smaller pieces
> >>
> >> Yes, handling all that _big_ data after collection to make it
> >> helpful for analysis of performance issues is the other part
> >> of this story so that possible OOM should be somehow avoided.
> >>
> >>>
> >>> IMO we need to have this change before we allow threaded record
> >>
> >> There are use cases of perf tool as a data provider, btw VTune is not
> >> the only one of them, and for those use cases threaded trace streaming
> >> lets its users get to their data that the users just were loosing before.
> >> This is huge difference and whole new level of support for such users.
> >> Post-process scripting around perf (e.g. Python based) will benefit
> >> from threaded trace streaming. Pipe mode can be extended to stream into
> >> open and passed fds using threads (e.g. perf record -o -fd:13,14,15,16).
> >> VTune-like tools can get performance data, load it into a (relational)
> >> DB files and provide analysis. And all that uses perf tool at its core.
> >>
> >> I agree perf report OOM issue can exist on really-big servers but data 
> >> directories support for report mode for not-so-big servers and desktops
> >> is already enabled with this smaller change. Also really-big-servers
> >> come with really-big amount of memory and collection could possibly be
> >> limited to only interesting phases of execution so the issue could likely
> >> be avoided. At the same time threaded trace streaming could clarify on
> >> real use cases that are blocked by perf report OOM issue and that would
> >> clarify on exact required solution. So perf report OOM issue shouldn't
> >> be the showstopper for upstream of threaded trace streaming.
> > 
> > so the short answer is no, right? ;-) 
> 
> Answer to what question? Resolve OOM in perf report for data directories?
> I don't see a simple solution for that. The next issue after OOM is resolved
> is a very long processing of data directories. And again there is no simple
> solution for that as well. But it still need progress in order to be resolved
> eventually.

it's right here:
  
https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git/commit/?h=perf/record_threads=308aa7cff1fed335401cfc02c7bac1a4644af68e

jirka

> 
> > 
> > I understand all the excuses, but from my point of view we are
> > adding another pain point (and there's already few ;-) ) that
> > will make perf (even more) not user friendly
> 
> I would not name it a paint point but instead a growth opportunity. 
> Now --threads can't be and is not enabled by default. When a user
> asks --threads the tool can print warning in advance about lots of
> data and possible perf report OOM limitation so confusion and
> disappointment for users of perf report can be avoided in advance.
> 
> > 
> > if we allow really friendly way to create huge data, we should
> > do our best to be able to process it as best as we can
> 
> It is just little to no more friendly as it is already now.
> Everyone can grab patches apply and get threaded streaming.
> Inclusion into mainline will standardize solution to build
> and evolve upon and this is necessary step towards complete
> support of data directories in perf tool suite.
> 
> Alexei
> 
> 

[PATCH v3 3/3] dt-bindings: drm/msm/gpu: Add cooling device support

2020-10-28 Thread Akhil P Oommen
Add cooling device support to gpu. A cooling device is bound to a
thermal zone to allow thermal mitigation.

Signed-off-by: Akhil P Oommen 
---
 Documentation/devicetree/bindings/display/msm/gpu.txt | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/msm/gpu.txt 
b/Documentation/devicetree/bindings/display/msm/gpu.txt
index 1af0ff1..090dcb3 100644
--- a/Documentation/devicetree/bindings/display/msm/gpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/gpu.txt
@@ -39,6 +39,10 @@ Required properties:
 a4xx Snapdragon SoCs. See
 Documentation/devicetree/bindings/sram/qcom,ocmem.yaml.
 
+Optional properties:
+- #cooling-cells: The value must be 2. For details, please refer
+   Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml.
+
 Example 3xx/4xx:
 
 / {
@@ -61,6 +65,7 @@ Example 3xx/4xx:
power-domains = < OXILICX_GDSC>;
operating-points-v2 = <_opp_table>;
iommus = <_iommu 0>;
+   #cooling-cells = <2>;
};
 
gpu_sram: ocmem@fdd0 {
@@ -98,6 +103,8 @@ Example a6xx (with GMU):
reg = <0x500 0x4>, <0x509e000 0x10>;
reg-names = "kgsl_3d0_reg_memory", "cx_mem";
 
+   #cooling-cells = <2>;
+
/*
 * Look ma, no clocks! The GPU clocks and power are
 * controlled entirely by the GMU
-- 
2.7.4



[PATCH] dt-bindings: phy: Add Cadence Sierra PHY bindings in YAML format

2020-10-28 Thread Swapnil Jakhade
Add Cadence Sierra PHY bindings in YAML format.

Signed-off-by: Swapnil Jakhade 
---
 .../bindings/phy/phy-cadence-sierra.txt   |  70 
 .../bindings/phy/phy-cadence-sierra.yaml  | 152 ++
 2 files changed, 152 insertions(+), 70 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/phy/phy-cadence-sierra.txt
 create mode 100644 
Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml

diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-sierra.txt 
b/Documentation/devicetree/bindings/phy/phy-cadence-sierra.txt
deleted file mode 100644
index 03f5939d3d19..
--- a/Documentation/devicetree/bindings/phy/phy-cadence-sierra.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-Cadence Sierra PHY

-
-Required properties:
-- compatible:  Must be "cdns,sierra-phy-t0" for Sierra in Cadence platform
-   Must be "ti,sierra-phy-t0" for Sierra in TI's J721E SoC.
-- resets:  Must contain an entry for each in reset-names.
-   See ../reset/reset.txt for details.
-- reset-names: Must include "sierra_reset" and "sierra_apb".
-   "sierra_reset" must control the reset line to the PHY.
-   "sierra_apb" must control the reset line to the APB PHY
-   interface ("sierra_apb" is optional).
-- reg: register range for the PHY.
-- #address-cells: Must be 1
-- #size-cells: Must be 0
-
-Optional properties:
-- clocks:  Must contain an entry in clock-names.
-   See ../clocks/clock-bindings.txt for details.
-- clock-names: Must contain "cmn_refclk_dig_div" and
-   "cmn_refclk1_dig_div" for configuring the frequency of
-   the clock to the lanes. "phy_clk" is deprecated.
-- cdns,autoconf:   A boolean property whose presence indicates that the
-   PHY registers will be configured by hardware. If not
-   present, all sub-node optional properties must be
-   provided.
-
-Sub-nodes:
-  Each group of PHY lanes with a single master lane should be represented as
-  a sub-node. Note that the actual configuration of each lane is determined by
-  hardware strapping, and must match the configuration specified here.
-
-Sub-node required properties:
-- #phy-cells:  Generic PHY binding; must be 0.
-- reg: The master lane number.  This is the lowest numbered lane
-   in the lane group.
-- resets:  Must contain one entry which controls the reset line for the
-   master lane of the sub-node.
-   See ../reset/reset.txt for details.
-
-Sub-node optional properties:
-- cdns,num-lanes:  Number of lanes in this group.  From 1 to 4.  The
-   group is made up of consecutive lanes.
-- cdns,phy-type:   Can be PHY_TYPE_PCIE or PHY_TYPE_USB3, depending on
-   configuration of lanes.
-
-Example:
-   pcie_phy4: pcie-phy@fd24 {
-   compatible = "cdns,sierra-phy-t0";
-   reg = <0x0 0xfd24 0x0 0x4>;
-   resets = < 0>, < 1>;
-   reset-names = "sierra_reset", "sierra_apb";
-   clocks = <>;
-   clock-names = "phy_clk";
-   #address-cells = <1>;
-   #size-cells = <0>;
-   pcie0_phy0: pcie-phy@0 {
-   reg = <0>;
-   resets = < 2>;
-   cdns,num-lanes = <2>;
-   #phy-cells = <0>;
-   cdns,phy-type = ;
-   };
-   pcie0_phy1: pcie-phy@2 {
-   reg = <2>;
-   resets = < 4>;
-   cdns,num-lanes = <1>;
-   #phy-cells = <0>;
-   cdns,phy-type = ;
-   };
diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml 
b/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml
new file mode 100644
index ..d210843863df
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/phy/phy-cadence-sierra.yaml#;
+$schema: "http://devicetree.org/meta-schemas/core.yaml#;
+
+title: Cadence Sierra PHY binding
+
+description:
+  This binding describes the Cadence Sierra PHY. Sierra PHY supports multilink
+  multiprotocol combinations including protocols such as PCIe, USB etc.
+
+maintainers:
+  - Swapnil Jakhade 
+  - Yuti Amonkar 
+
+properties:
+  compatible:
+enum:
+  - cdns,sierra-phy-t0
+  - ti,sierra-phy-t0
+
+  '#address-cells':
+const: 1
+
+  '#size-cells':
+const: 0
+
+  resets:
+minItems: 1
+maxItems: 2
+items:
+  - description: Sierra PHY reset.
+  - description: 

  1   2   3   4   5   6   7   8   9   10   >