[PATCH v4 37/39] kasan, mm: reset tags when accessing metadata

2020-10-01 Thread Andrey Konovalov
Kernel allocator code accesses metadata for slab objects, that may lie
out-of-bounds of the object itself, or be accessed when an object is freed.
Such accesses trigger tag faults and lead to false-positive reports with
hardware tag-based KASAN.

Software KASAN modes disable instrumentation for allocator code via
KASAN_SANITIZE Makefile macro, and rely on kasan_enable/disable_current()
annotations which are used to ignore KASAN reports.

With hardware tag-based KASAN neither of those options are available, as
it doesn't use compiler instrumetation, no tag faults are ignored, and MTE
is disabled after the first one.

Instead, reset tags when accessing metadata (currently only for SLUB).

Signed-off-by: Andrey Konovalov 
Signed-off-by: Vincenzo Frascino 
Acked-by: Marco Elver 
---
Change-Id: I39f3c4d4f29299d4fbbda039bedf230db1c746fb
---
 mm/page_alloc.c  |  4 +++-
 mm/page_poison.c |  2 +-
 mm/slub.c| 29 -
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fab5e97dc9ca..e2195602fb38 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1159,8 +1159,10 @@ static void kernel_init_free_pages(struct page *page, 
int numpages)
 
/* s390's use of memset() could override KASAN redzones. */
kasan_disable_current();
-   for (i = 0; i < numpages; i++)
+   for (i = 0; i < numpages; i++) {
+   page_kasan_tag_reset(page + i);
clear_highpage(page + i);
+   }
kasan_enable_current();
 }
 
diff --git a/mm/page_poison.c b/mm/page_poison.c
index 34b9181ee5d1..d90d342a391f 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -43,7 +43,7 @@ static void poison_page(struct page *page)
 
/* KASAN still think the page is in-use, so skip it. */
kasan_disable_current();
-   memset(addr, PAGE_POISON, PAGE_SIZE);
+   memset(kasan_reset_tag(addr), PAGE_POISON, PAGE_SIZE);
kasan_enable_current();
kunmap_atomic(addr);
 }
diff --git a/mm/slub.c b/mm/slub.c
index 68c02b2eecd9..1d3f2355df3b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -249,7 +249,7 @@ static inline void *freelist_ptr(const struct kmem_cache 
*s, void *ptr,
 {
 #ifdef CONFIG_SLAB_FREELIST_HARDENED
/*
-* When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged.
+* When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged.
 * Normally, this doesn't cause any issues, as both set_freepointer()
 * and get_freepointer() are called with a pointer with the same tag.
 * However, there are some issues with CONFIG_SLUB_DEBUG code. For
@@ -275,6 +275,7 @@ static inline void *freelist_dereference(const struct 
kmem_cache *s,
 
 static inline void *get_freepointer(struct kmem_cache *s, void *object)
 {
+   object = kasan_reset_tag(object);
return freelist_dereference(s, object + s->offset);
 }
 
@@ -304,6 +305,7 @@ static inline void set_freepointer(struct kmem_cache *s, 
void *object, void *fp)
BUG_ON(object == fp); /* naive detection of double free or corruption */
 #endif
 
+   freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr);
*(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
 }
 
@@ -538,8 +540,8 @@ static void print_section(char *level, char *text, u8 *addr,
  unsigned int length)
 {
metadata_access_enable();
-   print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
-   length, 1);
+   print_hex_dump(level, kasan_reset_tag(text), DUMP_PREFIX_ADDRESS,
+   16, 1, addr, length, 1);
metadata_access_disable();
 }
 
@@ -570,7 +572,7 @@ static struct track *get_track(struct kmem_cache *s, void 
*object,
 
p = object + get_info_end(s);
 
-   return p + alloc;
+   return kasan_reset_tag(p + alloc);
 }
 
 static void set_track(struct kmem_cache *s, void *object,
@@ -583,7 +585,8 @@ static void set_track(struct kmem_cache *s, void *object,
unsigned int nr_entries;
 
metadata_access_enable();
-   nr_entries = stack_trace_save(p->addrs, TRACK_ADDRS_COUNT, 3);
+   nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
+ TRACK_ADDRS_COUNT, 3);
metadata_access_disable();
 
if (nr_entries < TRACK_ADDRS_COUNT)
@@ -747,7 +750,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, 
struct page *page,
 
 static void init_object(struct kmem_cache *s, void *object, u8 val)
 {
-   u8 *p = object;
+   u8 *p = kasan_reset_tag(object);
 
if (s->flags & SLAB_RED_ZONE)
memset(p - s->red_left_pad, val, s->red_left_pad);
@@ -777,7 +780,7 @@ static int check_bytes_and_report(struct kmem_cache *s, 
struct page *page,
u8 *addr = page_address(page);
 
metadata_access_enable();
-   fault = memchr_inv(start, 

[PATCH v4 32/39] kasan: define KASAN_GRANULE_SIZE for HW_TAGS

2020-10-01 Thread Andrey Konovalov
Hardware tag-based KASAN has granules of MTE_GRANULE_SIZE. Define
KASAN_GRANULE_SIZE to MTE_GRANULE_SIZE for CONFIG_KASAN_HW_TAGS.

Signed-off-by: Andrey Konovalov 
Signed-off-by: Vincenzo Frascino 
Reviewed-by: Marco Elver 
---
Change-Id: I5d1117e6a991cbca00d2cfb4ba66e8ae2d8f513a
---
 mm/kasan/kasan.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 9c73f324e3ce..cf03640c8874 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -5,7 +5,13 @@
 #include 
 #include 
 
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 #define KASAN_GRANULE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT)
+#else
+#include 
+#define KASAN_GRANULE_SIZE MTE_GRANULE_SIZE
+#endif
+
 #define KASAN_GRANULE_MASK (KASAN_GRANULE_SIZE - 1)
 #define KASAN_GRANULE_PAGE (KASAN_GRANULE_SIZE << PAGE_SHIFT)
 
-- 
2.28.0.709.gb0816b6eb0-goog



[PATCH v4 35/39] kasan, arm64: implement HW_TAGS runtime

2020-10-01 Thread Andrey Konovalov
Provide implementation of KASAN functions required for the hardware
tag-based mode. Those include core functions for memory and pointer
tagging (tags_hw.c) and bug reporting (report_tags_hw.c). Also adapt
common KASAN code to support the new mode.

Signed-off-by: Andrey Konovalov 
Signed-off-by: Vincenzo Frascino 
Acked-by: Catalin Marinas 
Reviewed-by: Marco Elver 
---
Change-Id: I8a8689ba098174a4d0ef3f1d008178387c80ee1c
---
 arch/arm64/include/asm/memory.h   |  4 +-
 arch/arm64/kernel/setup.c |  5 ++-
 include/linux/kasan.h |  6 +--
 include/linux/mm.h|  2 +-
 include/linux/page-flags-layout.h |  2 +-
 mm/kasan/Makefile |  5 +++
 mm/kasan/common.c | 15 ---
 mm/kasan/hw_tags.c| 70 +++
 mm/kasan/kasan.h  | 17 ++--
 mm/kasan/report_hw_tags.c | 42 +++
 mm/kasan/report_sw_tags.c |  2 +-
 mm/kasan/shadow.c |  2 +-
 mm/kasan/sw_tags.c|  2 +-
 13 files changed, 152 insertions(+), 22 deletions(-)
 create mode 100644 mm/kasan/hw_tags.c
 create mode 100644 mm/kasan/report_hw_tags.c

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index de9af7bea90d..b5d6b824c21c 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -215,7 +215,7 @@ static inline unsigned long kaslr_offset(void)
(__force __typeof__(addr))__addr;   \
 })
 
-#ifdef CONFIG_KASAN_SW_TAGS
+#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
 #define __tag_shifted(tag) ((u64)(tag) << 56)
 #define __tag_reset(addr)  __untagged_addr(addr)
 #define __tag_get(addr)(__u8)((u64)(addr) >> 56)
@@ -223,7 +223,7 @@ static inline unsigned long kaslr_offset(void)
 #define __tag_shifted(tag) 0UL
 #define __tag_reset(addr)  (addr)
 #define __tag_get(addr)0
-#endif /* CONFIG_KASAN_SW_TAGS */
+#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
 
 static inline const void *__tag_set(const void *addr, u8 tag)
 {
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 77c4c9bad1b8..b07d9fbfa8b6 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -358,7 +358,10 @@ void __init __no_sanitize_address setup_arch(char 
**cmdline_p)
smp_init_cpus();
smp_build_mpidr_hash();
 
-   /* Init percpu seeds for random tags after cpus are set up. */
+   /*
+* For CONFIG_KASAN_SW_TAGS this initializes percpu seeds and must
+* come after cpus are set up.
+*/
kasan_init_tags();
 
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 894eddf42168..3f3f541e5d5f 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -181,7 +181,7 @@ static inline void kasan_record_aux_stack(void *ptr) {}
 
 #endif /* CONFIG_KASAN_GENERIC */
 
-#ifdef CONFIG_KASAN_SW_TAGS
+#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
 
 void kasan_init_tags(void);
 
@@ -190,7 +190,7 @@ void *kasan_reset_tag(const void *addr);
 bool kasan_report(unsigned long addr, size_t size,
bool is_write, unsigned long ip);
 
-#else /* CONFIG_KASAN_SW_TAGS */
+#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
 
 static inline void kasan_init_tags(void) { }
 
@@ -199,7 +199,7 @@ static inline void *kasan_reset_tag(const void *addr)
return (void *)addr;
 }
 
-#endif /* CONFIG_KASAN_SW_TAGS */
+#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS*/
 
 #ifdef CONFIG_KASAN_VMALLOC
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4312c6c808e9..a3cac68c737c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1411,7 +1411,7 @@ static inline bool cpupid_match_pid(struct task_struct 
*task, int cpupid)
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
-#ifdef CONFIG_KASAN_SW_TAGS
+#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
 static inline u8 page_kasan_tag(const struct page *page)
 {
return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
diff --git a/include/linux/page-flags-layout.h 
b/include/linux/page-flags-layout.h
index e200eef6a7fd..7d4ec26d8a3e 100644
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -77,7 +77,7 @@
 #define LAST_CPUPID_SHIFT 0
 #endif
 
-#ifdef CONFIG_KASAN_SW_TAGS
+#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
 #define KASAN_TAG_WIDTH 8
 #else
 #define KASAN_TAG_WIDTH 0
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index f1d68a34f3c9..9fe39a66388a 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -10,8 +10,10 @@ CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_quarantine.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_report_generic.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_report_hw_tags.o = $(CC_FLAGS_FTRACE)
 

[PATCH v4 33/39] kasan, x86, s390: update undef CONFIG_KASAN

2020-10-01 Thread Andrey Konovalov
With the intoduction of hardware tag-based KASAN some kernel checks of
this kind:

  ifdef CONFIG_KASAN

will be updated to:

  if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)

x86 and s390 use a trick to #undef CONFIG_KASAN for some of the code
that isn't linked with KASAN runtime and shouldn't have any KASAN
annotations.

Also #undef CONFIG_KASAN_GENERIC with CONFIG_KASAN.

Signed-off-by: Andrey Konovalov 
Signed-off-by: Vincenzo Frascino 
Reviewed-by: Marco Elver 
---
Change-Id: I2a622db0cb86a8feb60c30d8cb09190075be2a90
---
 arch/s390/boot/string.c | 1 +
 arch/x86/boot/compressed/misc.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c
index b11e8108773a..faccb33b462c 100644
--- a/arch/s390/boot/string.c
+++ b/arch/s390/boot/string.c
@@ -3,6 +3,7 @@
 #include 
 #include 
 #undef CONFIG_KASAN
+#undef CONFIG_KASAN_GENERIC
 #include "../lib/string.c"
 
 int strncmp(const char *cs, const char *ct, size_t count)
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 726e264410ff..2ac973983a8e 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -12,6 +12,7 @@
 #undef CONFIG_PARAVIRT_XXL
 #undef CONFIG_PARAVIRT_SPINLOCKS
 #undef CONFIG_KASAN
+#undef CONFIG_KASAN_GENERIC
 
 /* cpu_feature_enabled() cannot be used this early */
 #define USE_EARLY_PGTABLE_L5
-- 
2.28.0.709.gb0816b6eb0-goog



[PATCH v4 38/39] kasan, arm64: enable CONFIG_KASAN_HW_TAGS

2020-10-01 Thread Andrey Konovalov
Hardware tag-based KASAN is now ready, enable the configuration option.

Signed-off-by: Andrey Konovalov 
Signed-off-by: Vincenzo Frascino 
Acked-by: Catalin Marinas 
---
Change-Id: I6eb1eea770e6b61ad71c701231b8d815a7ccc853
---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e28d49cc1400..8d139c68343e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -132,6 +132,7 @@ config ARM64
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
select HAVE_ARCH_KASAN_SW_TAGS if (HAVE_ARCH_KASAN && !ARM64_MTE)
+   select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
-- 
2.28.0.709.gb0816b6eb0-goog



Re: [PATCH v7 07/13] PCI/AER: Extend AER error handling to RCECs

2020-10-01 Thread Bjorn Helgaas
On Wed, Sep 30, 2020 at 02:58:14PM -0700, Sean V Kelley wrote:
> From: Jonathan Cameron 
> 
> Currently the kernel does not handle AER errors for Root Complex
> integrated End Points (RCiEPs)[0]. These devices sit on a root bus within
> the Root Complex (RC). AER handling is performed by a Root Complex Event
> Collector (RCEC) [1] which is a effectively a type of RCiEP on the same
> root bus.
> 
> For an RCEC (technically not a Bridge), error messages "received" from
> associated RCiEPs must be enabled for "transmission" in order to cause a
> System Error via the Root Control register or (when the Advanced Error
> Reporting Capability is present) reporting via the Root Error Command
> register and logging in the Root Error Status register and Error Source
> Identification register.
> 
> In addition to the defined OS level handling of the reset flow for the
> associated RCiEPs of an RCEC, it is possible to also have non-native
> handling. In that case there is no need to take any actions on the RCEC
> because the firmware is responsible for them. This is true where APEI [2]
> is used to report the AER errors via a GHES[v2] HEST entry [3] and
> relevant AER CPER record [4] and non-native handling is in use.
> 
> We effectively end up with two different types of discovery for
> purposes of handling AER errors:
> 
> 1) Normal bus walk - we pass the downstream port above a bus to which
> the device is attached and it walks everything below that point.
> 
> 2) An RCiEP with no visible association with an RCEC as there is no need
> to walk devices. In that case, the flow is to just call the callbacks for
> the actual device, which in turn references its associated RCEC.
> 
> A new walk function pci_walk_bridge(), similar to pci_walk_bus(),
> is provided that takes a pci_dev instead of a bus. If that bridge
> corresponds to a downstream port it will walk the subordinate bus of
> that bridge. If the device does not then it will call the function on
> that device alone.
> 
> [0] ACPI PCI Express Base Specification 5.0-1 1.3.2.3 Root Complex
> Integrated Endpoint Rules.
> [1] ACPI PCI Express Base Specification 5.0-1 6.2 Error Signalling and
> Logging
> [2] ACPI Specification 6.3 Chapter 18 ACPI Platform Error Interface (APEI)
> [3] ACPI Specification 6.3 18.2.3.7 Generic Hardware Error Source
> [4] UEFI Specification 2.8, N.2.7 PCI Express Error Section
> 
> Signed-off-by: Jonathan Cameron 
> Signed-off-by: Sean V Kelley 
> ---
>  drivers/pci/pcie/err.c | 52 +-
>  1 file changed, 41 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> index 9e552330155b..c4ceca42a3bf 100644
> --- a/drivers/pci/pcie/err.c
> +++ b/drivers/pci/pcie/err.c
> @@ -146,44 +146,73 @@ static int report_resume(struct pci_dev *dev, void 
> *data)
>   return 0;
>  }
>  
> +/**
> + * pci_walk_bridge - walk bridges potentially AER affected
> + * @bridge   bridge which may be an RCEC with associated RCiEPs,
> + *   an RCiEP associated with an RCEC, or a Port.
> + * @cb   callback to be called for each device found
> + * @userdata arbitrary pointer to be passed to callback.
> + *
> + * If the device provided is a bridge, walk the subordinate bus,
> + * including any bridged devices on buses under this bus.
> + * Call the provided callback on each device found.
> + *
> + * If the device provided has no subordinate bus, call the provided
> + * callback on the device itself.
> + */
> +static void pci_walk_bridge(struct pci_dev *bridge, int (*cb)(struct pci_dev 
> *, void *),
> + void *userdata)
> +{
> + if (bridge->subordinate)
> + pci_walk_bus(bridge->subordinate, cb, userdata);
> + else
> + cb(bridge, userdata);
> +}
> +
>  pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>   pci_channel_state_t state,
>   pci_ers_result_t (*reset_subordinate_devices)(struct 
> pci_dev *pdev))
>  {
>   pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
> - struct pci_bus *bus;
>   struct pci_dev *bridge;
>   int type;
>  
>   /*
>* Error recovery runs on all subordinates of the first downstream
>* bridge. If the downstream bridge detected the error, it is
> -  * cleared at the end.
> +  * cleared at the end. For RCiEPs we should reset just the RCiEP itself.
>*/
>   type = pci_pcie_type(dev);
>   if (type == PCI_EXP_TYPE_ROOT_PORT ||
> - type == PCI_EXP_TYPE_DOWNSTREAM)
> + type == PCI_EXP_TYPE_DOWNSTREAM ||
> + type == PCI_EXP_TYPE_RC_EC ||
> + type == PCI_EXP_TYPE_RC_END)
>   bridge = dev;
>   else
>   bridge = pci_upstream_bridge(dev);
>  
> - bus = bridge->subordinate;
>   pci_dbg(dev, "broadcast error_detected message\n");
>   if (state == pci_channel_io_frozen) {
> - pci_walk_bus(bus, report_frozen_detected, );
> -   

Re: [PATCH v6 0/5] DVFS support for Venus

2020-10-01 Thread Stanimir Varbanov
Hi,

On 10/1/20 11:40 PM, Doug Anderson wrote:
> Hi,
> 
> On Wed, Sep 16, 2020 at 12:26 AM Stanimir Varbanov
>  wrote:
>>
>> Hi,
>>
>> On 9/16/20 8:33 AM, Rajendra Nayak wrote:
>>>
>>> On 9/1/2020 7:50 PM, Rajendra Nayak wrote:
 Rob, can you pick PATCH 1 since its already reviewed by you.
 Stan, Patch 2 and 3 will need to be picked by you and they both have
 your ACKs
>>>
>>> Rob/Stan, any plans to get the patches merged for 5.10?
>>
>> 2/5 and 3/5 are queued up for v5.10 through media tree.
> 
> Normally I'd expect device tree bindings (patch #1) to go through the
> same tree as the driver changes.  Does the media tree work
> differently?  If you're expecting Rob Herring to land the device tree
> binding change, is he aware?

I sent pull request to Mauro with 1/5 included.
Thanks for spotting.

> 
> 
> -Doug
> 

-- 
regards,
Stan


Re: [PATCH v1] of: platform: Batch fwnode parsing in the init_machine() path

2020-10-01 Thread Laurent Pinchart
Hi Saravana,

Thank you for the patch.

On Thu, Oct 01, 2020 at 03:59:51PM -0700, Saravana Kannan wrote:
> When commit 93d2e4322aa7 ("of: platform: Batch fwnode parsing when
> adding all top level devices") optimized the fwnode parsing when all top
> level devices are added, it missed out optimizing this for platform
> where the top level devices are added through the init_machine() path.
> 
> This commit does the optimization for all paths by simply moving the
> fw_devlink_pause/resume() inside of_platform_default_populate().

Based on v5.9-rc5, before the patch:

[0.652887] cpuidle: using governor menu
[   12.349476] No ATAGs?

After the patch:

[0.650460] cpuidle: using governor menu
[   12.262101] No ATAGs?

:-(

> Reported-by: Tomi Valkeinen 
> Signed-off-by: Saravana Kannan 
> ---
>  drivers/of/platform.c | 19 +++
>  1 file changed, 15 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/of/platform.c b/drivers/of/platform.c
> index 071f04da32c8..79972e49b539 100644
> --- a/drivers/of/platform.c
> +++ b/drivers/of/platform.c
> @@ -501,8 +501,21 @@ int of_platform_default_populate(struct device_node 
> *root,
>const struct of_dev_auxdata *lookup,
>struct device *parent)
>  {
> - return of_platform_populate(root, of_default_bus_match_table, lookup,
> - parent);
> + int ret;
> +
> + /*
> +  * fw_devlink_pause/resume() are only safe to be called around top
> +  * level device addition due to locking constraints.
> +  */
> + if (!root)
> + fw_devlink_pause();
> +
> + ret = of_platform_populate(root, of_default_bus_match_table, lookup,
> +parent);
> +
> + if (!root)
> + fw_devlink_resume();
> + return ret;
>  }
>  EXPORT_SYMBOL_GPL(of_platform_default_populate);
>  
> @@ -538,9 +551,7 @@ static int __init of_platform_default_populate_init(void)
>   }
>  
>   /* Populate everything else. */
> - fw_devlink_pause();
>   of_platform_default_populate(NULL, NULL, NULL);
> - fw_devlink_resume();
>  
>   return 0;
>  }

-- 
Regards,

Laurent Pinchart


Re: For review: seccomp_user_notif(2) manual page

2020-10-01 Thread Tycho Andersen
On Thu, Oct 01, 2020 at 02:06:10PM -0700, Sargun Dhillon wrote:
> On Wed, Sep 30, 2020 at 4:07 AM Michael Kerrisk (man-pages)
>  wrote:
> >
> > Hi Tycho, Sargun (and all),
> >
> > I knew it would be a big ask, but below is kind of the manual page
> > I was hoping you might write [1] for the seccomp user-space notification
> > mechanism. Since you didn't (and because 5.9 adds various new pieces
> > such as SECCOMP_ADDFD_FLAG_SETFD and SECCOMP_IOCTL_NOTIF_ADDFD
> > that also will need documenting [2]), I did :-). But of course I may
> > have made mistakes...
> >
> > I've shown the rendered version of the page below, and would love
> > to receive review comments from you and others, and acks, etc.
> >
> > There are a few FIXMEs sprinkled into the page, including one
> > that relates to what appears to me to be a misdesign (possibly
> > fixable) in the operation of the SECCOMP_IOCTL_NOTIF_RECV
> > operation. I would be especially interested in feedback on that
> > FIXME, and also of course the other FIXMEs.
> >
> > The page includes an extensive (albeit slightly contrived)
> > example program, and I would be happy also to receive comments
> > on that program.
> >
> > The page source currently sits in a branch (along with the text
> > that you sent me for the seccomp(2) page) at
> > https://git.kernel.org/pub/scm/docs/man-pages/man-pages.git/log/?h=seccomp_user_notif
> >
> > Thanks,
> >
> > Michael
> >
> > [1] 
> > https://lore.kernel.org/linux-man/2cea5fec-e73e-5749-18af-15c35a4bd...@gmail.com/#t
> > [2] Sargun, can you prepare something on SECCOMP_ADDFD_FLAG_SETFD
> > and SECCOMP_IOCTL_NOTIF_ADDFD to be added to this page?
> >
> > 
> >
> > --
> > Michael Kerrisk
> > Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
> > Linux/UNIX System Programming Training: http://man7.org/training/
> 
> Should we consider the SECCOMP_GET_NOTIF_SIZES dance to be "deprecated" at
> this point, given that the extensible ioctl mechanism works? If we add
> new fields to the
> seccomp datastructures, we would move them from fixed-size ioctls, to
> variable sized
> ioctls that encode the datastructure size / length?
> 
> -- This is mostly a question for Kees and Tycho.

It will tell you how big struct seccomp_data in the currently running
kernel is, so it still seems useful/necessary to me, unless there's
another way to figure that out.

But I agree, I don't think the intent is to add anything else to
struct seccomp_notif. (I don't know that it ever was.)

Tycho


Re: [PATCH net-next v2] net: dsa: Support bridge 802.1Q while untagging

2020-10-01 Thread Vladimir Oltean
On Wed, Sep 30, 2020 at 08:06:23PM -0700, Florian Fainelli wrote:
> The intent of 412a1526d067 ("net: dsa: untag the bridge pvid from rx
> skbs") is to transparently untag the bridge's default_pvid when the
> Ethernet switch can only support egress tagged of that default_pvid
> towards the CPU port.
> 
> Prior to this commit, users would have to configure an 802.1Q upper on
> the bridge master device when the bridge is configured with
> vlan_filtering=0 in order to pop the VLAN tag:
> 
> ip link add name br0 type bridge vlan_filtering 0
> ip link add link br0 name br0.1 type vlan id 1
> 
> After this commit we added support for managing a switch port 802.1Q
> upper but those are not usually added as bridge members, and if they do,
> they do not actually require any special management, the data path would
> pop the desired VLAN tag accordingly.
> 
> What we want to preserve is that use case and to manage when the user
> creates that 802.1Q upper for the bridge port.
> 
> While we are it, call __vlan_find_dev_deep_rcu() which makes use the
> VLAN group array which is faster.
> 
> As soon as we return the VLAN tagged SKB though it will be used by the
> following call path:
> 
> netif_receive_skb_list_internal
>   -> __netif_receive_skb_list_core
> -> __netif_receive_skb_core
>   -> vlan_do_receive()
> 
> which uses skb->vlan_proto, if we do not set it to the appropriate VLAN
> protocol, we will leave it set to what the DSA master has set
> (ETH_P_XDSA).
> 

The explanation is super confusing, although I think the placement of
the "skb->vlan_proto = vlan_dev_vlan_proto(upper_dev)" is correct.
Here's what I think is going on. It has to do with what's upwards of the
code you're changing:

/* Move VLAN tag from data to hwaccel */
if (!skb_vlan_tag_present(skb) && hdr->h_vlan_proto == htons(proto)) {
skb = skb_vlan_untag(skb);
if (!skb)
return NULL;
}

So skb->vlan_proto should already be equal to the protocol of the 8021q
upper, see the call path below.

   this is the problem
   |
skb_vlan_untag()   v
  -> __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
-> skb->vlan_proto = vlan_proto;

But the problem is that skb_vlan_untag() calls __vlan_hwaccel_put_tag
with the wrong vlan_proto, it calls it with the skb->protocol which is
still ETH_P_XDSA because we haven't re-run eth_type_trans() yet.
It looks like this function wants pretty badly to be called after
eth_type_trans(), and it's getting pretty messy because of that, but we
don't have any other driver-specific hook afterwards..

I don't have a lot of experience, the alternatives are either to:
- move dsa_untag_bridge_pvid() after eth_type_trans(), similar to what
  you did in your initial patch - maybe this is the cleanest
- make dsa_untag_bridge_pvid() call eth_type_trans() and this gets rid
  of the extra step you need to do in tag_brcm.c
- document this very well

> Fixes: 412a1526d067 ("net: dsa: untag the bridge pvid from rx skbs")
> Signed-off-by: Florian Fainelli 
> ---
> Changes in v2:
> 
> - removed unused list_head iter argument
> 
>  net/dsa/dsa_priv.h | 11 ---
>  1 file changed, 4 insertions(+), 7 deletions(-)
> 
> diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
> index 0348dbab4131..b4aafb2e90fa 100644
> --- a/net/dsa/dsa_priv.h
> +++ b/net/dsa/dsa_priv.h
> @@ -205,7 +205,6 @@ static inline struct sk_buff 
> *dsa_untag_bridge_pvid(struct sk_buff *skb)
>   struct net_device *br = dp->bridge_dev;
>   struct net_device *dev = skb->dev;
>   struct net_device *upper_dev;
> - struct list_head *iter;
>   u16 vid, pvid, proto;
>   int err;
>  
> @@ -247,12 +246,10 @@ static inline struct sk_buff 
> *dsa_untag_bridge_pvid(struct sk_buff *skb)
>* supports because vlan_filtering is 0. In that case, we should
>* definitely keep the tag, to make sure it keeps working.
>*/
> - netdev_for_each_upper_dev_rcu(dev, upper_dev, iter) {
> - if (!is_vlan_dev(upper_dev))
> - continue;
> -
> - if (vid == vlan_dev_vlan_id(upper_dev))
> - return skb;
> + upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid);
> + if (upper_dev) {
> + skb->vlan_proto = vlan_dev_vlan_proto(upper_dev);
> + return skb;
>   }
>  
>   __vlan_hwaccel_clear_tag(skb);
> -- 
> 2.25.1
> 

Re: [PATCH v3 02/18] iommu/vt-d: Add DEV-MSI support

2020-10-01 Thread Dey, Megha

Hi Thomas,

On 9/30/2020 11:32 AM, Thomas Gleixner wrote:

On Tue, Sep 15 2020 at 16:27, Dave Jiang wrote:

@@ -1303,9 +1303,10 @@ static void intel_irq_remapping_prepare_irte(struct 
intel_ir_data *data,
case X86_IRQ_ALLOC_TYPE_HPET:
case X86_IRQ_ALLOC_TYPE_PCI_MSI:
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
+   case X86_IRQ_ALLOC_TYPE_DEV_MSI:
if (info->type == X86_IRQ_ALLOC_TYPE_HPET)
set_hpet_sid(irte, info->devid);
-   else
+   else if (info->type != X86_IRQ_ALLOC_TYPE_DEV_MSI)
set_msi_sid(irte,
msi_desc_to_pci_dev(info->desc));

Gah. this starts to become unreadable.

hmm ok will change it.


diff --git a/drivers/iommu/intel/irq_remapping.c 
b/drivers/iommu/intel/irq_remapping.c
index 8f4ce72570ce..0c1ea8ceec31 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1271,6 +1271,16 @@ static struct irq_chip intel_ir_chip = {
.irq_set_vcpu_affinity  = intel_ir_set_vcpu_affinity,
  };
  
+static void irte_prepare_msg(struct msi_msg *msg, int index, int subhandle)

+{
+   msg->address_hi = MSI_ADDR_BASE_HI;
+   msg->data = sub_handle;
+   msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+ MSI_ADDR_IR_SHV |
+ MSI_ADDR_IR_INDEX1(index) |
+ MSI_ADDR_IR_INDEX2(index);
+}
+
  static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
 struct irq_cfg *irq_cfg,
 struct irq_alloc_info *info,
@@ -1312,19 +1322,18 @@ static void intel_irq_remapping_prepare_irte(struct 
intel_ir_data *data,
break;
  
  	case X86_IRQ_ALLOC_TYPE_HPET:

+   set_hpet_sid(irte, info->hpet_id);
+   irte_prepare_msg(msg, index, sub_handle);
+   break;
+
case X86_IRQ_ALLOC_TYPE_MSI:
case X86_IRQ_ALLOC_TYPE_MSIX:
-   if (info->type == X86_IRQ_ALLOC_TYPE_HPET)
-   set_hpet_sid(irte, info->hpet_id);
-   else
-   set_msi_sid(irte, info->msi_dev);
-
-   msg->address_hi = MSI_ADDR_BASE_HI;
-   msg->data = sub_handle;
-   msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
- MSI_ADDR_IR_SHV |
- MSI_ADDR_IR_INDEX1(index) |
- MSI_ADDR_IR_INDEX2(index);
+   set_msi_sid(irte, info->msi_dev);
+   irte_prepare_msg(msg, index, sub_handle);
+   break;
+
+   case X86_IRQ_ALLOC_TYPE_DEV_MSI:
+   irte_prepare_msg(msg, index, sub_handle);
break;
  
  	default:


Hmm?


ok so I have no clue what happened here. This was the patch that was 
sent out:


https://lore.kernel.org/lkml/160021246905.67751.1674517279122764758.st...@djiang5-desk3.ch.intel.com/

and this does not have the above change. Not sure what happened here.

Anyways, this should not be there.



Thanks,

 tglx


Re: [Freedreno] [PATCH 2/3] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance

2020-10-01 Thread Jordan Crouse
On Wed, Sep 30, 2020 at 08:27:05PM -0400, Jonathan Marek wrote:
> This makes it possible to use the non-coherent cached MSM_BO_CACHED mode,
> which otherwise doesn't provide any method for cleaning/invalidating the
> cache to sync with the device.
> 
> Signed-off-by: Jonathan Marek 
> ---
>  drivers/gpu/drm/msm/msm_drv.c | 21 +
>  drivers/gpu/drm/msm/msm_drv.h |  2 ++
>  drivers/gpu/drm/msm/msm_gem.c | 15 +++
>  include/uapi/drm/msm_drm.h| 20 
>  4 files changed, 58 insertions(+)
> 
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index 9716210495fc..305db1db1064 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -964,6 +964,26 @@ static int msm_ioctl_submitqueue_close(struct drm_device 
> *dev, void *data,
>   return msm_submitqueue_remove(file->driver_priv, id);
>  }
>  
> +static int msm_ioctl_gem_sync_cache(struct drm_device *dev, void *data,
> + struct drm_file *file)
> +{
> + struct drm_msm_gem_sync_cache *args = data;
> + struct drm_gem_object *obj;
> +
> + if (args->flags & ~MSM_GEM_SYNC_CACHE_FLAGS)
> + return -EINVAL;
> +
> + obj = drm_gem_object_lookup(file, args->handle);
> + if (!obj)
> + return -ENOENT;
> +
> + msm_gem_sync_cache(obj, args->flags, args->offset, args->end);
> +
> + drm_gem_object_put(obj);
> +
> + return 0;
> +}
> +
>  static const struct drm_ioctl_desc msm_ioctls[] = {
>   DRM_IOCTL_DEF_DRV(MSM_GET_PARAM,msm_ioctl_get_param,
> DRM_RENDER_ALLOW),
>   DRM_IOCTL_DEF_DRV(MSM_GEM_NEW,  msm_ioctl_gem_new,  
> DRM_RENDER_ALLOW),
> @@ -976,6 +996,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
>   DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW,   msm_ioctl_submitqueue_new,   
> DRM_RENDER_ALLOW),
>   DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, 
> DRM_RENDER_ALLOW),
>   DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, 
> DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(MSM_GEM_SYNC_CACHE,msm_ioctl_gem_sync_cache,
> DRM_RENDER_ALLOW),
>  };
>  
>  static const struct vm_operations_struct vm_ops = {
> diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
> index 6384844b1696..5e932dae453f 100644
> --- a/drivers/gpu/drm/msm/msm_drv.h
> +++ b/drivers/gpu/drm/msm/msm_drv.h
> @@ -314,6 +314,8 @@ void msm_gem_move_to_active(struct drm_gem_object *obj,
>  void msm_gem_move_to_inactive(struct drm_gem_object *obj);
>  int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t 
> *timeout);
>  int msm_gem_cpu_fini(struct drm_gem_object *obj);
> +void msm_gem_sync_cache(struct drm_gem_object *obj, uint32_t flags,
> + size_t range_start, size_t range_end);
>  void msm_gem_free_object(struct drm_gem_object *obj);
>  int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file,
>   uint32_t size, uint32_t flags, uint32_t *handle, char *name);
> diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
> index ad9a627493ae..93da88b3fc50 100644
> --- a/drivers/gpu/drm/msm/msm_gem.c
> +++ b/drivers/gpu/drm/msm/msm_gem.c
> @@ -8,6 +8,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  
> @@ -808,6 +809,20 @@ int msm_gem_cpu_fini(struct drm_gem_object *obj)
>   return 0;
>  }
>  
> +void msm_gem_sync_cache(struct drm_gem_object *obj, uint32_t flags,
> + size_t range_start, size_t range_end)
> +{
> + struct msm_gem_object *msm_obj = to_msm_bo(obj);
> +
> + /* TODO: sync only the required range, and don't invalidate on clean */
> +
> + if (flags & MSM_GEM_SYNC_CACHE_CLEAN)

Curious why you would rename these - I feel like the to_device / to_cpu model is
pretty well baked into our thought process. I know from personal experience that
I have to stop and think to remember which direction is which.

Jordan

> + sync_for_device(msm_obj);
> +
> + if (flags & MSM_GEM_SYNC_CACHE_INVALIDATE)
> + sync_for_cpu(msm_obj);
> +}
> +
>  #ifdef CONFIG_DEBUG_FS
>  static void describe_fence(struct dma_fence *fence, const char *type,
>   struct seq_file *m)
> diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> index 474497e8743a..1dfafa71fc94 100644
> --- a/include/uapi/drm/msm_drm.h
> +++ b/include/uapi/drm/msm_drm.h
> @@ -319,6 +319,24 @@ struct drm_msm_submitqueue_query {
>   __u32 pad;
>  };
>  
> +/*
> + * Host cache maintenance (relevant for MSM_BO_CACHED)
> + * driver may both clean/invalidate (flush) for clean
> + */
> +
> +#define MSM_GEM_SYNC_CACHE_CLEAN 0x1
> +#define MSM_GEM_SYNC_CACHE_INVALIDATE0x2
> +
> +#define MSM_GEM_SYNC_CACHE_FLAGS (MSM_GEM_SYNC_CACHE_CLEAN | \
> +  MSM_GEM_SYNC_CACHE_INVALIDATE)
> +
> +struct drm_msm_gem_sync_cache {
> + __u32 handle;
> + __u32 flags;
> + 

Re: [PATCH 3/7] Functions to fetch POSIX dynamic clock object

2020-10-01 Thread Thomas Gleixner
On Thu, Oct 01 2020 at 22:51, Erez Geva wrote:
> Add kernel functions to fetch a pointer to a POSIX dynamic clock
> using a user file description dynamic clock ID.

And how is that supposed to work. What are the lifetime rules?
  
> +struct posix_clock *posix_clock_get_clock(clockid_t id)
> +{
> + int err;
> + struct posix_clock_desc cd;

The core code uses reverse fir tree ordering of variable declaration
based on the length:

struct posix_clock_desc cd;
int err;

> + /* Verify we use posix clock ID */
> + if (!is_clockid_fd_clock(id))
> + return ERR_PTR(-EINVAL);
> +
> + err = get_clock_desc(id, );

So this is a kernel interface and get_clock_desc() does:

struct file *fp = fget(clockid_to_fd(id));

How is that file descriptor valid in random kernel context?

> + if (err)
> + return ERR_PTR(err);
> +
> + get_device(cd.clk->dev);

The purpose of this is? Comments are overrated...

> + put_clock_desc();
> +
> + return cd.clk;
> +}
> +EXPORT_SYMBOL_GPL(posix_clock_get_clock);
> +
> +int posix_clock_put_clock(struct posix_clock *clk)
> +{
> + if (IS_ERR_OR_NULL(clk))
> + return -EINVAL;
> + put_device(clk->dev);
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(posix_clock_put_clock);
> +
> +int posix_clock_gettime(struct posix_clock *clk, struct timespec64 *ts)
> +{
> + int err;
> +
> + if (IS_ERR_OR_NULL(clk))
> + return -EINVAL;
> +
> + down_read(>rwsem);

Open coding the logic of get_posix_clock() and having a copy here and
in the next function is really useful.

Thanks,

tglx


Re: [PATCH net-next v2] net: dsa: Support bridge 802.1Q while untagging

2020-10-01 Thread Vladimir Oltean
On Fri, Oct 02, 2020 at 02:24:02AM +0300, Vladimir Oltean wrote:
> The explanation is super confusing, although I think the placement of
> the "skb->vlan_proto = vlan_dev_vlan_proto(upper_dev)" is correct.

No, I think it _is_ wrong, after all, I think you're repairing
skb->vlan_proto only for that particular 8021q upper, but not for the
rest. I think the correct approach would be to say "skb->protocol =
hdr->h_vlan_proto" right before calling skb_vlan_untag().

Re: [RFC PATCH 13/22] x86/fpu/xstate: Expand dynamic user state area on first use

2020-10-01 Thread Andy Lutomirski
On Thu, Oct 1, 2020 at 1:43 PM Chang S. Bae  wrote:
>
> Intel's Extended Feature Disable (XFD) feature is an extension of the XSAVE
> architecture. XFD allows the kernel to enable a feature state in XCR0 and
> to receive a #NM trap when a task uses instructions accessing that state.
> In this way, Linux can allocate the large task->fpu buffer only for tasks
> that use it.
>
> XFD introduces two MSRs: IA32_XFD to enable/disable the feature and
> IA32_XFD_ERR to assist the #NM trap handler. Both use the same
> state-component bitmap format, used by XCR0.
>
> Use this hardware capability to find the right time to expand xstate area.
> Introduce two sets of helper functions for that:
>
> 1. The first set is primarily for interacting with the XFD hardware
>feature. Helpers for configuring disablement, e.g. in context switching,
>are:
> xdisable_setbits()
> xdisable_getbits()
> xdisable_switch()
>
> 2. The second set is for managing the first-use status and handling #NM
>trap:
> xfirstuse_enabled()
> xfirstuse_not_detected()
> xfirstuse_event_handler()
>
> The #NM handler induces the xstate area expansion to save the first-used
> states.
>
> No functional change until the kernel enables dynamic user states and XFD.
>
> Signed-off-by: Chang S. Bae 
> Reviewed-by: Len Brown 
> Cc: x...@kernel.org
> Cc: linux-kernel@vger.kernel.org
> ---
>  arch/x86/include/asm/cpufeatures.h  |  1 +
>  arch/x86/include/asm/fpu/internal.h | 53 -
>  arch/x86/include/asm/msr-index.h|  2 ++
>  arch/x86/kernel/fpu/core.c  | 37 
>  arch/x86/kernel/fpu/xstate.c| 34 --
>  arch/x86/kernel/process.c   |  5 +++
>  arch/x86/kernel/process_32.c|  2 +-
>  arch/x86/kernel/process_64.c|  2 +-
>  arch/x86/kernel/traps.c |  3 ++
>  9 files changed, 133 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/include/asm/cpufeatures.h 
> b/arch/x86/include/asm/cpufeatures.h
> index 2901d5df4366..7d7fe1d82966 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -274,6 +274,7 @@
>  #define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
>  #define X86_FEATURE_XGETBV1(10*32+ 2) /* XGETBV with ECX = 1 
> instruction */
>  #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS 
> instructions */
> +#define X86_FEATURE_XFD(10*32+ 4) /* eXtended 
> Feature Disabling */
>
>  /*
>   * Extended auxiliary flags: Linux defined - for features scattered in 
> various
> diff --git a/arch/x86/include/asm/fpu/internal.h 
> b/arch/x86/include/asm/fpu/internal.h
> index 3b03ead87a46..f5dbbaa060fb 100644
> --- a/arch/x86/include/asm/fpu/internal.h
> +++ b/arch/x86/include/asm/fpu/internal.h
> @@ -572,11 +572,60 @@ static inline void switch_fpu_prepare(struct fpu 
> *old_fpu, int cpu)
>   * Misc helper functions:
>   */
>
> +/* The first-use detection helpers: */
> +
> +static inline void xdisable_setbits(u64 value)
> +{
> +   wrmsrl_safe(MSR_IA32_XFD, value);
> +}
> +
> +static inline u64 xdisable_getbits(void)
> +{
> +   u64 value;
> +
> +   rdmsrl_safe(MSR_IA32_XFD, );
> +   return value;
> +}
> +
> +static inline u64 xfirstuse_enabled(void)
> +{
> +   /* All the dynamic user components are first-use enabled. */
> +   return xfeatures_mask_user_dynamic;
> +}
> +
> +/*
> + * Convert fpu->firstuse_bv to xdisable configuration in MSR IA32_XFD.
> + * xdisable_setbits() only uses this.
> + */
> +static inline u64 xfirstuse_not_detected(struct fpu *fpu)
> +{
> +   u64 firstuse_bv = (fpu->state_mask & xfirstuse_enabled());
> +
> +   /*
> +* If first-use is not detected, set the bit. If the detection is
> +* not enabled, the bit is always zero in firstuse_bv. So, make
> +* following conversion:
> +*/
> +   return  (xfirstuse_enabled() ^ firstuse_bv);
> +}
> +
> +/* Update MSR IA32_XFD based on fpu->firstuse_bv */
> +static inline void xdisable_switch(struct fpu *prev, struct fpu *next)
> +{
> +   if (!static_cpu_has(X86_FEATURE_XFD) || !xfirstuse_enabled())
> +   return;
> +
> +   if (unlikely(prev->state_mask != next->state_mask))
> +   xdisable_setbits(xfirstuse_not_detected(next));
> +}
> +
> +bool xfirstuse_event_handler(struct fpu *fpu);
> +
>  /*
>   * Load PKRU from the FPU context if available. Delay loading of the
>   * complete FPU state until the return to userland.
>   */
> -static inline void switch_fpu_finish(struct fpu *new_fpu)
> +static inline void switch_fpu_finish(struct fpu *old_fpu, struct fpu 
> *new_fpu)
>  {
> u32 pkru_val = init_pkru_value;
> struct pkru_state *pk;
> @@ -586,6 +635,8 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
>
> set_thread_flag(TIF_NEED_FPU_LOAD);
>
> +   xdisable_switch(old_fpu, new_fpu);
> +
> 

Re: [PATCH 3/4] mmap locking API: Don't check locking if the mm isn't live yet

2020-10-01 Thread Jason Gunthorpe
On Thu, Oct 01, 2020 at 10:16:35PM +0200, Jann Horn wrote:

> > A subclass isn't right, it has to be a _nested annotation.
> >
> > nested locking is a pretty good reason to not be able to do this, this
> > is something lockdep does struggle to model.
> 
> Did I get the terminology wrong? I thought they were the same. The
> down_*_nested() APIs take an argument "subclass", with the default
> subclass for the functions without "_nested" being 0.

AFAIK a subclass at init time sticks with the lock forever, the
_nested ones are temporary overrides.

I think what you kind of want is to start out with
lockdep_set_novalidate_class() then switch to a real class once things
are finished. Not sure exactly how :)

Jason


Re: [RFC PATCH 07/22] x86/fpu/xstate: Introduce helpers to manage an xstate area dynamically

2020-10-01 Thread Andy Lutomirski
On Thu, Oct 1, 2020 at 1:42 PM Chang S. Bae  wrote:
>
> task->fpu has a buffer to keep the extended register states, but it is not
> expandable at runtime. Introduce runtime methods and new fpu struct fields
> to support the expansion.
>
> fpu->state_mask indicates the saved states per task and fpu->state_ptr
> points the dynamically allocated area.
>
> alloc_xstate_area() uses vmalloc() for its scalability. However, set a
> threshold (64KB) to watch out a potential need for an alternative
> mechanism.
>
> Also, introduce a new helper -- get_xstate_size() to calculate the area
> size.
>
> No functional change until the kernel supports dynamic user states.
>
> Signed-off-by: Chang S. Bae 
> Reviewed-by: Len Brown 
> Cc: x...@kernel.org
> Cc: linux-kernel@vger.kernel.org
> ---
>  arch/x86/include/asm/fpu/types.h  |  29 +--
>  arch/x86/include/asm/fpu/xstate.h |   3 +
>  arch/x86/kernel/fpu/core.c|   3 +
>  arch/x86/kernel/fpu/xstate.c  | 124 ++
>  4 files changed, 154 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/include/asm/fpu/types.h 
> b/arch/x86/include/asm/fpu/types.h
> index c87364ea6446..4b7756644824 100644
> --- a/arch/x86/include/asm/fpu/types.h
> +++ b/arch/x86/include/asm/fpu/types.h
> @@ -327,14 +327,33 @@ struct fpu {
>  */
> unsigned long   avx512_timestamp;
>
> +   /*
> +* @state_mask:
> +*
> +* The state component bitmap. It indicates the saved xstate in
> +* either @state or @state_ptr. The map value starts to be aligned
> +* with @state and then with @state_ptr once it is in use.
> +*/
> +   u64 state_mask;
> +
> +   /*
> +* @state_ptr:
> +*
> +* Copy of all extended register states, in a dynamically-allocated
> +* area, we save and restore over context switches. When a task is
> +* using extended features, the register state is always the most
> +* current. This state copy is more recent than @state. If the task
> +* context-switches away, they get saved here, representing the 
> xstate.
> +*/
> +   union fpregs_state  *state_ptr;
> +
> /*
>  * @state:
>  *
> -* In-memory copy of all FPU registers that we save/restore
> -* over context switches. If the task is using the FPU then
> -* the registers in the FPU are more recent than this state
> -* copy. If the task context-switches away then they get
> -* saved here and represent the FPU state.
> +* Copy of some extended register state that we save and restore
> +* over context switches. If a task uses a dynamically-allocated
> +* area, @state_ptr, then it has a more recent state copy than this.
> +* This copy follows the same attributes as described for @state_ptr.
>  */
> union fpregs_state  state;
> /*
> diff --git a/arch/x86/include/asm/fpu/xstate.h 
> b/arch/x86/include/asm/fpu/xstate.h
> index 9aad91c0725b..37728bfcb71e 100644
> --- a/arch/x86/include/asm/fpu/xstate.h
> +++ b/arch/x86/include/asm/fpu/xstate.h
> @@ -103,6 +103,9 @@ extern void __init update_regset_xstate_info(unsigned int 
> size,
>  u64 xstate_mask);
>
>  void *get_xsave_addr(struct fpu *fpu, int xfeature_nr);
> +int alloc_xstate_area(struct fpu *fpu, u64 mask, unsigned int *alloc_size);
> +void free_xstate_area(struct fpu *fpu);
> +
>  const void *get_xsave_field_ptr(int xfeature_nr);
>  int using_compacted_format(void);
>  int xfeature_size(int xfeature_nr);
> diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
> index 875620fdfe61..e25f7866800e 100644
> --- a/arch/x86/kernel/fpu/core.c
> +++ b/arch/x86/kernel/fpu/core.c
> @@ -235,6 +235,9 @@ int fpu__copy(struct task_struct *dst, struct task_struct 
> *src)
>  */
> memset(_fpu->state.xsave, 0, fpu_kernel_xstate_default_size);
>
> +   dst_fpu->state_mask = xfeatures_mask_all & 
> ~xfeatures_mask_user_dynamic;
> +   dst_fpu->state_ptr = NULL;
> +
> /*
>  * If the FPU registers are not current just memcpy() the state.
>  * Otherwise save current FPU registers directly into the child's FPU
> diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
> index 6e0d8a9699ed..af60332aafef 100644
> --- a/arch/x86/kernel/fpu/xstate.c
> +++ b/arch/x86/kernel/fpu/xstate.c
> @@ -10,6 +10,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  #include 
>  #include 
> @@ -69,6 +70,7 @@ static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 
> ... XFEATURE_MAX - 1] =
>  static unsigned int xstate_sizes[XFEATURE_MAX]   = { [ 0 ... XFEATURE_MAX - 
> 1] = -1};
>  static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... 
> XFEATURE_MAX - 1] = -1};
>  static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] = { 

Re: [PATCH rdma-next 0/2] RDMA: Constify static struct attribute_group

2020-10-01 Thread Jason Gunthorpe
On Thu, Oct 01, 2020 at 12:40:02AM +0200, Rikard Falkeborn wrote:
> Constify a couple of static struct attribute_group that are never
> modified to allow the compiler to put them in read-only memory.
> 
> Rikard Falkeborn (2):
>   RDMA/core: Constify struct attribute_group
>   RDMA/rtrs: Constify static struct attribute_group

Applied to for-next, thanks

Jason


Re: [PATCH 1/3] drm/msm: add MSM_BO_CACHED_COHERENT

2020-10-01 Thread Jordan Crouse
On Wed, Sep 30, 2020 at 08:27:04PM -0400, Jonathan Marek wrote:
> Add a new cache mode for creating coherent host-cached BOs.

Reviewed-by: Jordan Crouse 

> Signed-off-by: Jonathan Marek 
> ---
>  drivers/gpu/drm/msm/adreno/adreno_device.c | 1 +
>  drivers/gpu/drm/msm/msm_drv.h  | 1 +
>  drivers/gpu/drm/msm/msm_gem.c  | 8 
>  include/uapi/drm/msm_drm.h | 5 ++---
>  4 files changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c 
> b/drivers/gpu/drm/msm/adreno/adreno_device.c
> index 9eeb46bf2a5d..2aa707546254 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_device.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
> @@ -410,6 +410,7 @@ static int adreno_bind(struct device *dev, struct device 
> *master, void *data)
>   config.rev.minor, config.rev.patchid);
>  
>   priv->is_a2xx = config.rev.core == 2;
> + priv->has_cached_coherent = config.rev.core >= 6;
>  
>   gpu = info->init(drm);
>   if (IS_ERR(gpu)) {
> diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
> index 2c3225bc1794..6384844b1696 100644
> --- a/drivers/gpu/drm/msm/msm_drv.h
> +++ b/drivers/gpu/drm/msm/msm_drv.h
> @@ -167,6 +167,7 @@ struct msm_drm_private {
>   struct msm_file_private *lastctx;
>   /* gpu is only set on open(), but we need this info earlier */
>   bool is_a2xx;
> + bool has_cached_coherent;
>  
>   struct drm_fb_helper *fbdev;
>  
> diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
> index b2f49152b4d4..ad9a627493ae 100644
> --- a/drivers/gpu/drm/msm/msm_gem.c
> +++ b/drivers/gpu/drm/msm/msm_gem.c
> @@ -431,6 +431,9 @@ static int msm_gem_pin_iova(struct drm_gem_object *obj,
>   if (msm_obj->flags & MSM_BO_MAP_PRIV)
>   prot |= IOMMU_PRIV;
>  
> + if (msm_obj->flags & MSM_BO_CACHED_COHERENT)
> + prot |= IOMMU_CACHE;
> +
>   WARN_ON(!mutex_is_locked(_obj->lock));
>  
>   if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED))
> @@ -998,6 +1001,7 @@ static int msm_gem_new_impl(struct drm_device *dev,
>   uint32_t size, uint32_t flags,
>   struct drm_gem_object **obj)
>  {
> + struct msm_drm_private *priv = dev->dev_private;
>   struct msm_gem_object *msm_obj;
>  
>   switch (flags & MSM_BO_CACHE_MASK) {
> @@ -1005,6 +1009,10 @@ static int msm_gem_new_impl(struct drm_device *dev,
>   case MSM_BO_CACHED:
>   case MSM_BO_WC:
>   break;
> + case MSM_BO_CACHED_COHERENT:
> + if (priv->has_cached_coherent)
> + break;
> + /* fallthrough */

It confused me that this kind of implicitly fell into the else clause in
msm_gem_mmap_obj, but I'm on board. This is a good solution since it only allows
I/O coherence with caching.

>   default:
>   DRM_DEV_ERROR(dev->dev, "invalid cache flag: %x\n",
>   (flags & MSM_BO_CACHE_MASK));
> diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> index a6c1f3eb2623..474497e8743a 100644
> --- a/include/uapi/drm/msm_drm.h
> +++ b/include/uapi/drm/msm_drm.h
> @@ -94,12 +94,11 @@ struct drm_msm_param {
>  #define MSM_BO_CACHED0x0001
>  #define MSM_BO_WC0x0002
>  #define MSM_BO_UNCACHED  0x0004
> +#define MSM_BO_CACHED_COHERENT 0x08
>  
>  #define MSM_BO_FLAGS (MSM_BO_SCANOUT | \
>MSM_BO_GPU_READONLY | \
> -  MSM_BO_CACHED | \
> -  MSM_BO_WC | \
> -  MSM_BO_UNCACHED)
> +  MSM_BO_CACHE_MASK)
>  
>  struct drm_msm_gem_new {
>   __u64 size;   /* in */
> -- 
> 2.26.1
> 

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: [PATCH net-next v2] net: dsa: Support bridge 802.1Q while untagging

2020-10-01 Thread Florian Fainelli




On 10/1/2020 4:24 PM, Vladimir Oltean wrote:

On Wed, Sep 30, 2020 at 08:06:23PM -0700, Florian Fainelli wrote:

The intent of 412a1526d067 ("net: dsa: untag the bridge pvid from rx
skbs") is to transparently untag the bridge's default_pvid when the
Ethernet switch can only support egress tagged of that default_pvid
towards the CPU port.

Prior to this commit, users would have to configure an 802.1Q upper on
the bridge master device when the bridge is configured with
vlan_filtering=0 in order to pop the VLAN tag:

ip link add name br0 type bridge vlan_filtering 0
ip link add link br0 name br0.1 type vlan id 1

After this commit we added support for managing a switch port 802.1Q
upper but those are not usually added as bridge members, and if they do,
they do not actually require any special management, the data path would
pop the desired VLAN tag accordingly.

What we want to preserve is that use case and to manage when the user
creates that 802.1Q upper for the bridge port.

While we are it, call __vlan_find_dev_deep_rcu() which makes use the
VLAN group array which is faster.

As soon as we return the VLAN tagged SKB though it will be used by the
following call path:

netif_receive_skb_list_internal
   -> __netif_receive_skb_list_core
 -> __netif_receive_skb_core
   -> vlan_do_receive()

which uses skb->vlan_proto, if we do not set it to the appropriate VLAN
protocol, we will leave it set to what the DSA master has set
(ETH_P_XDSA).



The explanation is super confusing, although I think the placement of
the "skb->vlan_proto = vlan_dev_vlan_proto(upper_dev)" is correct.
Here's what I think is going on. It has to do with what's upwards of the
code you're changing:

/* Move VLAN tag from data to hwaccel */
if (!skb_vlan_tag_present(skb) && hdr->h_vlan_proto == htons(proto)) {
skb = skb_vlan_untag(skb);
if (!skb)
return NULL;
}

So skb->vlan_proto should already be equal to the protocol of the 8021q
upper, see the call path below.

this is the problem
|
skb_vlan_untag()   v
   -> __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
 -> skb->vlan_proto = vlan_proto;


Ah, indeed!



But the problem is that skb_vlan_untag() calls __vlan_hwaccel_put_tag
with the wrong vlan_proto, it calls it with the skb->protocol which is
still ETH_P_XDSA because we haven't re-run eth_type_trans() yet.
It looks like this function wants pretty badly to be called after
eth_type_trans(), and it's getting pretty messy because of that, but we
don't have any other driver-specific hook afterwards..

I don't have a lot of experience, the alternatives are either to:
- move dsa_untag_bridge_pvid() after eth_type_trans(), similar to what
   you did in your initial patch - maybe this is the cleanest


This would be my preference and it would not be hurting the fast-path 
that much.



- make dsa_untag_bridge_pvid() call eth_type_trans() and this gets rid
   of the extra step you need to do in tag_brcm.c


Sure, however this requires that we remove the call to eth_type_trans() 
in dsa_switch_rcv() or that we push/pull by an appropriate amount, not 
very effective.



- document this very well


I doubt this would survive the test of time unfortunately.




Fixes: 412a1526d067 ("net: dsa: untag the bridge pvid from rx skbs")
Signed-off-by: Florian Fainelli 
---
Changes in v2:

- removed unused list_head iter argument

  net/dsa/dsa_priv.h | 11 ---
  1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 0348dbab4131..b4aafb2e90fa 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -205,7 +205,6 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct 
sk_buff *skb)
struct net_device *br = dp->bridge_dev;
struct net_device *dev = skb->dev;
struct net_device *upper_dev;
-   struct list_head *iter;
u16 vid, pvid, proto;
int err;
  
@@ -247,12 +246,10 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb)

 * supports because vlan_filtering is 0. In that case, we should
 * definitely keep the tag, to make sure it keeps working.
 */
-   netdev_for_each_upper_dev_rcu(dev, upper_dev, iter) {
-   if (!is_vlan_dev(upper_dev))
-   continue;
-
-   if (vid == vlan_dev_vlan_id(upper_dev))
-   return skb;
+   upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid);
+   if (upper_dev) {
+   skb->vlan_proto = vlan_dev_vlan_proto(upper_dev);
+   return skb;
}
  
  	__vlan_hwaccel_clear_tag(skb);

--
2.25.1


--
Florian


Re: [PATCH net-next v2] net: dsa: Support bridge 802.1Q while untagging

2020-10-01 Thread Vladimir Oltean
On Thu, Oct 01, 2020 at 04:48:43PM -0700, Florian Fainelli wrote:
> > - move dsa_untag_bridge_pvid() after eth_type_trans(), similar to what
> >you did in your initial patch - maybe this is the cleanest
>
> This would be my preference and it would not be hurting the fast-path that
> much.

Ok, let's do that. You can also replace the hdr->h_vlan_proto with
skb->protocol in that case, and remove this:

struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);

Thanks!
-Vladimir

Re: [PATCH v2 1/5] media: mt9p031: Add support for 8 bit and 10 bit formats

2020-10-01 Thread Laurent Pinchart
Hi Stefan,

On Thu, Oct 01, 2020 at 11:07:00AM +0200, Stefan Riedmüller wrote:
> On 30.09.20 13:42, Laurent Pinchart wrote:
> > On Wed, Sep 30, 2020 at 12:51:29PM +0200, Stefan Riedmueller wrote:
> >> From: Christian Hemp 
> >>
> >> Aside from 12 bit monochrome or color format the sensor implicitly
> >> supports 10 and 8 bit formats as well by simply dropping the
> >> corresponding LSBs.
> > 
> > That's not how it should work though. If you set the format on
> > MEDIA_BUS_FMT_SGRBG8_1X8 through the pipeline for instance, you will end
> > up capturing the 8 LSB, not the 8 MSB.
> > 
> > What's your use case for this ?
> 
> I use this sensor in combination with an i.MX 6 and i.MX 6UL. When the 
> sensor is connected with 12 bit (or 10 bit on the i.MX 6UL) and I set 
> MEDIA_BUS_FMT_SGRBG8_1X8 through the pipeline the CSI interface drops the 
> unused 4 LSB (or 2 LSB on the i.MX 6UL) so I get the 8 MSB from my 12 bit 
> sensor.

Is that the PIXEL_BIT bit in CSI_CSICR1 for the i.MX6UL ? If so I think
this should be handled in the imx7-media-csi driver. You could set the
format to MEDIA_BUS_FMT_SGRBG10_1X10 on the sink pad of the CSI and to
MEDIA_BUS_FMT_SGRBG8_1X8 on the source pad to configure this. I don't
think the sensor driver should be involved, otherwise we'd have to patch
all sensor drivers. From a sensor point of view, it outputs 12-bit
Bayer, not 8-bit.

Now there's a caveat. When used with the i.MX6UL, I assume you connected
D[11:2] of the sensor to D[9:0] of the i.MX6UL, right ? The i.MX6UL
doesn't support 12-bit inputs, so it should accept
MEDIA_BUS_FMT_SGRBG12_1X12 on its sink pad. In this case, as D[1:0] of
the sensor are left unconnected, I think you should set data-shift to 2
and bus-width to 10 in DT on the sensor side. The MT9P031 driver should
parse that, and output MEDIA_BUS_FMT_SGRBG10_1X10 instead of
MEDIA_BUS_FMT_SGRBG12_1X12 in that case.

> Does this clarify things? Maybe the description in the commit message is not 
> accurate enough or did I get something wrong?
> 
> >> Signed-off-by: Christian Hemp 
> >> [j...@pengutronix.de: simplified by dropping v4l2_colorspace handling]
> >> Signed-off-by: Jan Luebbe 
> >> Signed-off-by: Stefan Riedmueller 
> >> ---
> >> Changes in v2:
> >>   - Use unsigned int for num_fmts and loop variable in find_datafmt
> >>   - Remove superfluous const qualifier from find_datafmt
> >> ---
> >>   drivers/media/i2c/mt9p031.c | 50 +
> >>   1 file changed, 40 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/drivers/media/i2c/mt9p031.c b/drivers/media/i2c/mt9p031.c
> >> index dc23b9ed510a..2e6671ef877c 100644
> >> --- a/drivers/media/i2c/mt9p031.c
> >> +++ b/drivers/media/i2c/mt9p031.c
> >> @@ -116,6 +116,18 @@ enum mt9p031_model {
> >>MT9P031_MODEL_MONOCHROME,
> >>   };
> >>   
> >> +static const u32 mt9p031_color_fmts[] = {
> >> +  MEDIA_BUS_FMT_SGRBG8_1X8,
> >> +  MEDIA_BUS_FMT_SGRBG10_1X10,
> >> +  MEDIA_BUS_FMT_SGRBG12_1X12,
> >> +};
> >> +
> >> +static const u32 mt9p031_monochrome_fmts[] = {
> >> +  MEDIA_BUS_FMT_Y8_1X8,
> >> +  MEDIA_BUS_FMT_Y10_1X10,
> >> +  MEDIA_BUS_FMT_Y12_1X12,
> >> +};
> >> +
> >>   struct mt9p031 {
> >>struct v4l2_subdev subdev;
> >>struct media_pad pad;
> >> @@ -138,6 +150,9 @@ struct mt9p031 {
> >>struct v4l2_ctrl *blc_auto;
> >>struct v4l2_ctrl *blc_offset;
> >>   
> >> +  const u32 *fmts;
> >> +  unsigned int num_fmts;
> >> +
> >>/* Registers cache */
> >>u16 output_control;
> >>u16 mode2;
> >> @@ -148,6 +163,17 @@ static struct mt9p031 *to_mt9p031(struct v4l2_subdev 
> >> *sd)
> >>return container_of(sd, struct mt9p031, subdev);
> >>   }
> >>   
> >> +static u32 mt9p031_find_datafmt(struct mt9p031 *mt9p031, u32 code)
> >> +{
> >> +  unsigned int i;
> >> +
> >> +  for (i = 0; i < mt9p031->num_fmts; i++)
> >> +  if (mt9p031->fmts[i] == code)
> >> +  return mt9p031->fmts[i];
> >> +
> >> +  return mt9p031->fmts[mt9p031->num_fmts-1];
> >> +}
> >> +
> >>   static int mt9p031_read(struct i2c_client *client, u8 reg)
> >>   {
> >>return i2c_smbus_read_word_swapped(client, reg);
> >> @@ -476,10 +502,11 @@ static int mt9p031_enum_mbus_code(struct v4l2_subdev 
> >> *subdev,
> >>   {
> >>struct mt9p031 *mt9p031 = to_mt9p031(subdev);
> >>   
> >> -  if (code->pad || code->index)
> >> +  if (code->pad || code->index >= mt9p031->num_fmts)
> >>return -EINVAL;
> >>   
> >> -  code->code = mt9p031->format.code;
> >> +  code->code = mt9p031->fmts[code->index];
> >> +
> >>return 0;
> >>   }
> >>   
> >> @@ -573,6 +600,8 @@ static int mt9p031_set_format(struct v4l2_subdev 
> >> *subdev,
> >>__format->width = __crop->width / hratio;
> >>__format->height = __crop->height / vratio;
> >>   
> >> +  __format->code = mt9p031_find_datafmt(mt9p031, format->format.code);
> >> +
> >>format->format = *__format;
> >>   
> >>return 0;
> >> @@ -951,10 +980,7 @@ static int mt9p031_open(struct v4l2_subdev *subdev, 
> >> 

Re: [PATCH 3/4] mmap locking API: Don't check locking if the mm isn't live yet

2020-10-01 Thread Jann Horn
On Fri, Oct 2, 2020 at 1:41 AM Jason Gunthorpe  wrote:
> On Thu, Oct 01, 2020 at 10:16:35PM +0200, Jann Horn wrote:
> > > A subclass isn't right, it has to be a _nested annotation.
> > >
> > > nested locking is a pretty good reason to not be able to do this, this
> > > is something lockdep does struggle to model.
> >
> > Did I get the terminology wrong? I thought they were the same. The
> > down_*_nested() APIs take an argument "subclass", with the default
> > subclass for the functions without "_nested" being 0.
>
> AFAIK a subclass at init time sticks with the lock forever, the
> _nested ones are temporary overrides.
>
> I think what you kind of want is to start out with
> lockdep_set_novalidate_class() then switch to a real class once things
> are finished. Not sure exactly how :)

Huh, is there an API that sets a *subclass* (not a class) at init
time? I don't think there is.

Anyway, I'm pretty sure I just need to use the normal _nested()
locking API. I'm still cleaning up and testing a little bit, but I'll
send it out in a short while, unless I run into unexpected trouble.
Let's continue this if necessary once there's a concrete patch to talk
about. :)


Re: [PATCH v2 2/5] media: mt9p031: Read back the real clock rate

2020-10-01 Thread Laurent Pinchart
Hi Stefan,

Thank you for the patch.

On Wed, Sep 30, 2020 at 12:51:30PM +0200, Stefan Riedmueller wrote:
> From: Enrico Scholz 
> 
> The real and requested clock can differ and because it is used to
> calculate PLL values, the real clock rate should be read.
> 
> Signed-off-by: Enrico Scholz 
> Signed-off-by: Stefan Riedmueller 

Reviewed-by: Laurent Pinchart 

> ---
> No changes in v2
> ---
>  drivers/media/i2c/mt9p031.c | 9 ++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/media/i2c/mt9p031.c b/drivers/media/i2c/mt9p031.c
> index 2e6671ef877c..b4c042f418c1 100644
> --- a/drivers/media/i2c/mt9p031.c
> +++ b/drivers/media/i2c/mt9p031.c
> @@ -255,6 +255,7 @@ static int mt9p031_clk_setup(struct mt9p031 *mt9p031)
>  
>   struct i2c_client *client = v4l2_get_subdevdata(>subdev);
>   struct mt9p031_platform_data *pdata = mt9p031->pdata;
> + unsigned long ext_freq;
>   int ret;
>  
>   mt9p031->clk = devm_clk_get(>dev, NULL);
> @@ -265,13 +266,15 @@ static int mt9p031_clk_setup(struct mt9p031 *mt9p031)
>   if (ret < 0)
>   return ret;
>  
> + ext_freq = clk_get_rate(mt9p031->clk);
> +
>   /* If the external clock frequency is out of bounds for the PLL use the
>* pixel clock divider only and disable the PLL.
>*/
> - if (pdata->ext_freq > limits.ext_clock_max) {
> + if (ext_freq > limits.ext_clock_max) {
>   unsigned int div;
>  
> - div = DIV_ROUND_UP(pdata->ext_freq, pdata->target_freq);
> + div = DIV_ROUND_UP(ext_freq, pdata->target_freq);
>   div = roundup_pow_of_two(div) / 2;
>  
>   mt9p031->clk_div = min_t(unsigned int, div, 64);
> @@ -280,7 +283,7 @@ static int mt9p031_clk_setup(struct mt9p031 *mt9p031)
>   return 0;
>   }
>  
> - mt9p031->pll.ext_clock = pdata->ext_freq;
> + mt9p031->pll.ext_clock = ext_freq;
>   mt9p031->pll.pix_clock = pdata->target_freq;
>   mt9p031->use_pll = true;
>  

-- 
Regards,

Laurent Pinchart


Re: [PATCH v2 3/5] media: mt9p031: Implement [gs]_register debug calls

2020-10-01 Thread Laurent Pinchart
Hi Stefan,

On Thu, Oct 01, 2020 at 10:56:24AM +0200, Stefan Riedmüller wrote:
> On 30.09.20 13:38, Laurent Pinchart wrote:
> > On Wed, Sep 30, 2020 at 12:51:31PM +0200, Stefan Riedmueller wrote:
> >> From: Enrico Scholz 
> >>
> >> Implement g_register and s_register v4l2_subdev_core_ops to access
> >> camera register directly from userspace for debug purposes.
> > 
> > As the name of the operations imply, this is meant for debug purpose
> > only. They are however prone to be abused to configure the sensor from
> > userspace in production, which isn't a direction we want to take.
> > What's your use case for this ?  I'd rather drop this patch and see the
> > driver extended with support for more controls if needed
> 
> thanks for your feedback.
> 
> I get your point. I myself solely use these operations for debugging 
> purposes but I'm aware that others like to abuse them.
> 
> I thought I send it anyway since for me the DEBUG config is enough to 
> signalize that these operations are not to be used with a productive system. 
> But I'm OK with dropping this patch if you think it might send the wrong 
> signal.

I'd rather avoid this patch due to the risk of abuse if it's OK with
you.

> >> Signed-off-by: Enrico Scholz 
> >> Signed-off-by: Stefan Riedmueller 
> >> ---
> >> No changes in v2
> >> ---
> >>   drivers/media/i2c/mt9p031.c | 28 
> >>   1 file changed, 28 insertions(+)
> >>
> >> diff --git a/drivers/media/i2c/mt9p031.c b/drivers/media/i2c/mt9p031.c
> >> index b4c042f418c1..de36025260a8 100644
> >> --- a/drivers/media/i2c/mt9p031.c
> >> +++ b/drivers/media/i2c/mt9p031.c
> >> @@ -703,6 +703,30 @@ static int mt9p031_restore_blc(struct mt9p031 
> >> *mt9p031)
> >>return 0;
> >>   }
> >>   
> >> +#ifdef CONFIG_VIDEO_ADV_DEBUG
> >> +static int mt9p031_g_register(struct v4l2_subdev *sd,
> >> +struct v4l2_dbg_register *reg)
> >> +{
> >> +  struct i2c_client *client = v4l2_get_subdevdata(sd);
> >> +  int ret;
> >> +
> >> +  ret = mt9p031_read(client, reg->reg);
> >> +  if (ret < 0)
> >> +  return ret;
> >> +
> >> +  reg->val = ret;
> >> +  return 0;
> >> +}
> >> +
> >> +static int mt9p031_s_register(struct v4l2_subdev *sd,
> >> +struct v4l2_dbg_register const *reg)
> >> +{
> >> +  struct i2c_client *client = v4l2_get_subdevdata(sd);
> >> +
> >> +  return mt9p031_write(client, reg->reg, reg->val);
> >> +}
> >> +#endif
> >> +
> >>   static int mt9p031_s_ctrl(struct v4l2_ctrl *ctrl)
> >>   {
> >>struct mt9p031 *mt9p031 =
> >> @@ -1000,6 +1024,10 @@ static int mt9p031_close(struct v4l2_subdev 
> >> *subdev, struct v4l2_subdev_fh *fh)
> >>   
> >>   static const struct v4l2_subdev_core_ops mt9p031_subdev_core_ops = {
> >>.s_power= mt9p031_set_power,
> >> +#ifdef CONFIG_VIDEO_ADV_DEBUG
> >> +  .s_register = mt9p031_s_register,
> >> +  .g_register = mt9p031_g_register,
> >> +#endif
> >>   };
> >>   
> >>   static const struct v4l2_subdev_video_ops mt9p031_subdev_video_ops = {

-- 
Regards,

Laurent Pinchart


Re: [PATCH v2 5/5] media: mt9p031: Fix corrupted frame after restarting stream

2020-10-01 Thread Laurent Pinchart
Hi Stefan,

Thank you for the patch.

On Wed, Sep 30, 2020 at 12:51:33PM +0200, Stefan Riedmueller wrote:
> From: Dirk Bender 
> 
> To prevent corrupted frames after starting and stopping the sensor it's

s/it's/its/

> datasheet specifies a specific pause sequence to follow:
> 
> Stopping:
>   Set Pause_Restart Bit -> Set Restart Bit -> Set Chip_Enable Off
> 
> Restarting:
>   Set Chip_Enable On -> Clear Pause_Restart Bit
> 
> The Restart Bit is cleared automatically and must not be cleared
> manually as this would cause undefined behavior.
> 
> Signed-off-by: Dirk Bender 
> Signed-off-by: Stefan Riedmueller 
> ---
> No changes in v2
> ---
>  drivers/media/i2c/mt9p031.c | 25 +
>  1 file changed, 25 insertions(+)
> 
> diff --git a/drivers/media/i2c/mt9p031.c b/drivers/media/i2c/mt9p031.c
> index d10457361e6c..d59f66e3dcf3 100644
> --- a/drivers/media/i2c/mt9p031.c
> +++ b/drivers/media/i2c/mt9p031.c
> @@ -80,6 +80,8 @@
>  #define  MT9P031_PIXEL_CLOCK_SHIFT(n)((n) << 8)
>  #define  MT9P031_PIXEL_CLOCK_DIVIDE(n)   ((n) << 0)
>  #define MT9P031_FRAME_RESTART0x0b
> +#define  MT9P031_FRAME_RESTART_SET   (1 << 0)
> +#define  MT9P031_FRAME_PAUSE_RESTART_SET (1 << 1)

The fields are named Restart and Pause_Restart, I would drop _SET. Could
you also sort them from MSB to LSB as for the other registers ? Using
BIT() would be good too, although this could be done as an additional
patch to convert all the existing macros.

>  #define MT9P031_SHUTTER_DELAY0x0c
>  #define MT9P031_RST  0x0d
>  #define  MT9P031_RST_ENABLE  1
> @@ -483,9 +485,25 @@ static int mt9p031_set_params(struct mt9p031 *mt9p031)
>  static int mt9p031_s_stream(struct v4l2_subdev *subdev, int enable)
>  {
>   struct mt9p031 *mt9p031 = to_mt9p031(subdev);
> + struct i2c_client *client = v4l2_get_subdevdata(subdev);
> + int val;
>   int ret;
>  
>   if (!enable) {
> + val = mt9p031_read(client, MT9P031_FRAME_RESTART);

Do you need to read the register ? Can't you write
MT9P031_FRAME_PAUSE_RESTART_SET and then MT9P031_FRAME_PAUSE_RESTART_SET
| MT9P031_FRAME_RESTART_SET ? And actually, can't we just write both
bits in one go, do we need two writes ?

> +
> + /* enable pause restart */
> + val |= MT9P031_FRAME_PAUSE_RESTART_SET;
> + ret = mt9p031_write(client, MT9P031_FRAME_RESTART, val);
> + if (ret < 0)
> + return ret;
> +
> + /* enable restart + keep pause restart set */
> + val |= MT9P031_FRAME_RESTART_SET;
> + ret = mt9p031_write(client, MT9P031_FRAME_RESTART, val);
> + if (ret < 0)
> + return ret;
> +
>   /* Stop sensor readout */
>   ret = mt9p031_set_output_control(mt9p031,
>MT9P031_OUTPUT_CONTROL_CEN, 0);
> @@ -505,6 +523,13 @@ static int mt9p031_s_stream(struct v4l2_subdev *subdev, 
> int enable)
>   if (ret < 0)
>   return ret;
>  
> + val = mt9p031_read(client, MT9P031_FRAME_RESTART);
> + /* disable reset + pause restart */
> + val &= ~MT9P031_FRAME_PAUSE_RESTART_SET;

Same here, I think you can simply write MT9P031_FRAME_PAUSE_RESTART_SET.

> + ret = mt9p031_write(client, MT9P031_FRAME_RESTART, val);
> + if (ret < 0)
> + return ret;
> +
>   return mt9p031_pll_enable(mt9p031);
>  }
>  

-- 
Regards,

Laurent Pinchart


Re: linux-next: manual merge of the net-next tree with the net tree

2020-10-01 Thread Mat Martineau



On Thu, 1 Oct 2020, Stephen Rothwell wrote:


Hi all,

Today's linux-next merge of the net-next tree got a conflict in:

 net/mptcp/protocol.c

between commit:

 917944da3bfc ("mptcp: Consistently use READ_ONCE/WRITE_ONCE with msk->ack_seq")

from the net tree and commit:

 8268ed4c9d19 ("mptcp: introduce and use mptcp_try_coalesce()")
 ab174ad8ef76 ("mptcp: move ooo skbs into msk out of order queue.")

from the net-next tree.

I fixed it up (I think - see below) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging.  You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.



Hi Stephen,

I am fine with introducing the WRITE_ONCE() in __mptcp_move_skb() as your 
conflict resolution does, or I can submit a patch later to add the 
WRITE_ONCE() in that location. The latter is what I suggested to David 
when submitting the patch to the net tree.


Thanks,

Mat




diff --cc net/mptcp/protocol.c
index 5d747c6a610e,34c037731f35..
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@@ -112,64 -112,205 +112,205 @@@ static int __mptcp_socket_create(struc
return 0;
 }

- static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
-struct sk_buff *skb,
-unsigned int offset, size_t copy_len)
+ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
+ {
+   sk_drops_add(sk, skb);
+   __kfree_skb(skb);
+ }
+
+ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
+  struct sk_buff *from)
+ {
+   bool fragstolen;
+   int delta;
+
+   if (MPTCP_SKB_CB(from)->offset ||
+   !skb_try_coalesce(to, from, , ))
+   return false;
+
+   pr_debug("colesced seq %llx into %llx new len %d new end seq %llx",
+MPTCP_SKB_CB(from)->map_seq, MPTCP_SKB_CB(to)->map_seq,
+to->len, MPTCP_SKB_CB(from)->end_seq);
+   MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq;
+   kfree_skb_partial(from, fragstolen);
+   atomic_add(delta, >sk_rmem_alloc);
+   sk_mem_charge(sk, delta);
+   return true;
+ }
+
+ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
+  struct sk_buff *from)
+ {
+   if (MPTCP_SKB_CB(from)->map_seq != MPTCP_SKB_CB(to)->end_seq)
+   return false;
+
+   return mptcp_try_coalesce((struct sock *)msk, to, from);
+ }
+
+ /* "inspired" by tcp_data_queue_ofo(), main differences:
+  * - use mptcp seqs
+  * - don't cope with sacks
+  */
+ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb)
 {
struct sock *sk = (struct sock *)msk;
-   struct sk_buff *tail;
+   struct rb_node **p, *parent;
+   u64 seq, end_seq, max_seq;
+   struct sk_buff *skb1;
+   int space;
+
+   seq = MPTCP_SKB_CB(skb)->map_seq;
+   end_seq = MPTCP_SKB_CB(skb)->end_seq;
+   space = tcp_space(sk);
+   max_seq = space > 0 ? space + msk->ack_seq : msk->ack_seq;
+
+   pr_debug("msk=%p seq=%llx limit=%llx empty=%d", msk, seq, max_seq,
+RB_EMPTY_ROOT(>out_of_order_queue));
+   if (after64(seq, max_seq)) {
+   /* out of window */
+   mptcp_drop(sk, skb);
+   MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_NODSSWINDOW);
+   return;
+   }

-   __skb_unlink(skb, >sk_receive_queue);
+   p = >out_of_order_queue.rb_node;
+   MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_OFOQUEUE);
+   if (RB_EMPTY_ROOT(>out_of_order_queue)) {
+   rb_link_node(>rbnode, NULL, p);
+   rb_insert_color(>rbnode, >out_of_order_queue);
+   msk->ooo_last_skb = skb;
+   goto end;
+   }

-   skb_ext_reset(skb);
-   skb_orphan(skb);
-   WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len);
+   /* with 2 subflows, adding at end of ooo queue is quite likely
+* Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
+*/
+   if (mptcp_ooo_try_coalesce(msk, msk->ooo_last_skb, skb)) {
+   MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_OFOMERGE);
+   MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_OFOQUEUETAIL);
+   return;
+   }

-   tail = skb_peek_tail(>sk_receive_queue);
-   if (offset == 0 && tail) {
-   bool fragstolen;
-   int delta;
+   /* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */
+   if (!before64(seq, MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq)) {
+   MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_OFOQUEUETAIL);
+   parent = >ooo_last_skb->rbnode;
+   p = >rb_right;
+   goto insert;
+   }

-   

Re: [PATCH] mm: memcg/slab: fix slab statistics in !SMP configuration

2020-10-01 Thread kernel test robot
Hi Roman,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on mmotm/master]

url:
https://github.com/0day-ci/linux/commits/Roman-Gushchin/mm-memcg-slab-fix-slab-statistics-in-SMP-configuration/20201002-044114
base:   git://git.cmpxchg.org/linux-mmotm.git master
config: i386-randconfig-s002-20200930 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.2-201-g24bdaac6-dirty
# 
https://github.com/0day-ci/linux/commit/3e4248734433fea1624e4971258042af2f231e02
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Roman-Gushchin/mm-memcg-slab-fix-slab-statistics-in-SMP-configuration/20201002-044114
git checkout 3e4248734433fea1624e4971258042af2f231e02
# save the attached .config to linux build tree
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   In file included from include/linux/mm.h:1317,
from include/linux/memcontrol.h:20,
from include/linux/swap.h:9,
from include/linux/suspend.h:5,
from arch/x86/kernel/asm-offsets.c:13:
   include/linux/vmstat.h: In function '__mod_node_page_state':
>> include/linux/vmstat.h:295:6: error: implicit declaration of function 
>> 'vmstat_item_in_bytes' [-Werror=implicit-function-declaration]
 295 |  if (vmstat_item_in_bytes(item)) {
 |  ^~~~
   cc1: some warnings being treated as errors
   make[2]: *** [scripts/Makefile.build:99: arch/x86/kernel/asm-offsets.s] 
Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1139: prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:179: sub-make] Error 2
   make: Target 'prepare' not remade because of errors.

vim +/vmstat_item_in_bytes +295 include/linux/vmstat.h

   291  
   292  static inline void __mod_node_page_state(struct pglist_data *pgdat,
   293  enum node_stat_item item, int delta)
   294  {
 > 295  if (vmstat_item_in_bytes(item)) {
   296  VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
   297  delta >>= PAGE_SHIFT;
   298  }
   299  
   300  node_page_state_add(delta, pgdat, item);
   301  }
   302  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


Re: [PATCH 0/5] Speed up mremap on large regions

2020-10-01 Thread Lokesh Gidra
On Thu, Oct 1, 2020 at 9:00 AM Kalesh Singh  wrote:
>
> On Thu, Oct 1, 2020 at 8:27 AM Kirill A. Shutemov
>  wrote:
> >
> > On Wed, Sep 30, 2020 at 03:42:17PM -0700, Lokesh Gidra wrote:
> > > On Wed, Sep 30, 2020 at 3:32 PM Kirill A. Shutemov
> > >  wrote:
> > > >
> > > > On Wed, Sep 30, 2020 at 10:21:17PM +, Kalesh Singh wrote:
> > > > > mremap time can be optimized by moving entries at the PMD/PUD level if
> > > > > the source and destination addresses are PMD/PUD-aligned and
> > > > > PMD/PUD-sized. Enable moving at the PMD and PUD levels on arm64 and
> > > > > x86. Other architectures where this type of move is supported and 
> > > > > known to
> > > > > be safe can also opt-in to these optimizations by enabling 
> > > > > HAVE_MOVE_PMD
> > > > > and HAVE_MOVE_PUD.
> > > > >
> > > > > Observed Performance Improvements for remapping a PUD-aligned 
> > > > > 1GB-sized
> > > > > region on x86 and arm64:
> > > > >
> > > > > - HAVE_MOVE_PMD is already enabled on x86 : N/A
> > > > > - Enabling HAVE_MOVE_PUD on x86   : ~13x speed up
> > > > >
> > > > > - Enabling HAVE_MOVE_PMD on arm64 : ~ 8x speed up
> > > > > - Enabling HAVE_MOVE_PUD on arm64 : ~19x speed up
> > > > >
> > > > >   Altogether, HAVE_MOVE_PMD and HAVE_MOVE_PUD
> > > > >   give a total of ~150x speed up on arm64.
> > > >
> > > > Is there a *real* workload that benefit from HAVE_MOVE_PUD?
> > > >
> > > We have a Java garbage collector under development which requires
> > > moving physical pages of multi-gigabyte heap using mremap. During this
> > > move, the application threads have to be paused for correctness. It is
> > > critical to keep this pause as short as possible to avoid jitters
> > > during user interaction. This is where HAVE_MOVE_PUD will greatly
> > > help.
> >
> > Any chance to quantify the effect of mremap() with and without
> > HAVE_MOVE_PUD?
> >
> > I doubt it's a major contributor to the GC pause. I expect you need to
> > move tens of gigs to get sizable effect. And if your GC routinely moves
> > tens of gigs, maybe problem somewhere else?
> >
> > I'm asking for numbers, because increase in complexity comes with cost.
> > If it doesn't provide an substantial benefit to a real workload
> > maintaining the code forever doesn't make sense.
>
mremap is indeed the biggest contributor to the GC pause. It has to
take place in what is typically known as a 'stop-the-world' pause,
wherein all application threads are paused. During this pause the GC
thread flips the GC roots (threads' stacks, globals etc.), and then
resumes threads along with concurrent compaction of the heap.This
GC-root flip differs depending on which compaction algorithm is being
used.

In our case it involves updating object references in threads' stacks
and remapping java heap to a different location. The threads' stacks
can be handled in parallel with the mremap. Therefore, the dominant
factor is indeed the cost of mremap. From patches 2 and 4, it is clear
that remapping 1GB without this optimization will take ~9ms on arm64.

Although this mremap has to happen only once every GC cycle, and the
typical size is also not going to be more than a GB or 2, pausing
application threads for ~9ms is guaranteed to cause jitters. OTOH,
with this optimization, mremap is reduced to ~60us, which is a totally
acceptable pause time.

Unfortunately, implementation of the new GC algorithm hasn't yet
reached the point where I can quantify the effect of this
optimization. But I can confirm that without this optimization the new
GC will not be approved.


> Lokesh on this thread would be better able to answer this. I'll let
> him weigh in here.
> Thanks, Kalesh
> >
> > --
> >  Kirill A. Shutemov
> >
> > --
> > To unsubscribe from this group and stop receiving emails from it, send an 
> > email to kernel-team+unsubscr...@android.com.
> >


Re: linux-next: manual merge of the net-next tree with the net tree

2020-10-01 Thread Mat Martineau



On Thu, 1 Oct 2020, Stephen Rothwell wrote:


Hi all,

Today's linux-next merge of the net-next tree got a conflict in:

 net/mptcp/protocol.h

between commit:

 1a49b2c2a501 ("mptcp: Handle incoming 32-bit DATA_FIN values")

from the net tree and commit:

 5c8c1640956e ("mptcp: add mptcp_destroy_common helper")

from the net-next tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

--
Cheers,
Stephen Rothwell

diff --cc net/mptcp/protocol.h
index 20f04ac85409,7cfe52aeb2b8..
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@@ -387,7 -407,8 +407,8 @@@ void mptcp_data_ready(struct sock *sk,
 bool mptcp_finish_join(struct sock *sk);
 void mptcp_data_acked(struct sock *sk);
 void mptcp_subflow_eof(struct sock *sk);
-bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq);
+bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool 
use_64bit);
+ void mptcp_destroy_common(struct mptcp_sock *msk);



Yes, this is the appropriate conflict resolution. Thanks!


--
Mat Martineau
Intel


Re: [PATCH 2/7] Function to retrieve main clock state

2020-10-01 Thread Thomas Gleixner
On Fri, Oct 02 2020 at 00:05, Thomas Gleixner wrote:
> On Thu, Oct 01 2020 at 22:51, Erez Geva wrote:
>
> same comments as for patch 1 apply.
>
>> Add kernel function to retrieve main clock oscillator state.
>
> The function you are adding is named adjtimex(). adjtimex(2) is a well
> known user space interface and naming a read only kernel interface the
> same way is misleading.

Aside of that there is no user for this function in this series. We're
not adding interfaces just because we can.

Thanks,

tglx


[PATCH v3 01/13] x86/platform/uv: Remove UV BAU TLB Shootdown Handler

2020-10-01 Thread Mike Travis
The Broadcast Assist Unit (BAU) TLB shootdown handler is being rewritten
to become the UV BAU APIC driver.  It is designed to speed up sending
IPI's to selective CPUs within the system.  Remove the current TLB
shutdown handler (tlb_uv.c) file and a couple of kernel hooks in the
interim.

Signed-off-by: Mike Travis 
Reviewed-by: Dimitri Sivanich 
---
 arch/x86/include/asm/idtentry.h  |4 -
 arch/x86/include/asm/uv/uv.h |4 +-
 arch/x86/include/asm/uv/uv_bau.h |  755 ---
 arch/x86/kernel/idt.c|3 -
 arch/x86/mm/tlb.c|   24 -
 arch/x86/platform/uv/Makefile|2 +-
 arch/x86/platform/uv/tlb_uv.c| 2097 --
 7 files changed, 2 insertions(+), 2887 deletions(-)
 delete mode 100644 arch/x86/include/asm/uv/uv_bau.h
 delete mode 100644 arch/x86/platform/uv/tlb_uv.c

diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index a43366191212..27485af16008 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -591,10 +591,6 @@ DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR,  
sysvec_call_function);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
-# ifdef CONFIG_X86_UV
-DECLARE_IDTENTRY_SYSVEC(UV_BAU_MESSAGE,
sysvec_uv_bau_message);
-# endif
-
 # ifdef CONFIG_X86_MCE_THRESHOLD
 DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR, sysvec_threshold);
 # endif
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index e48aea9ba47d..172d3e4a9e4b 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -35,10 +35,8 @@ extern int is_uv_hubbed(int uvtype);
 extern void uv_cpu_init(void);
 extern void uv_nmi_init(void);
 extern void uv_system_init(void);
-extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
-const struct flush_tlb_info 
*info);
 
-#else  /* X86_UV */
+#else  /* !X86_UV */
 
 static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
 static inline bool is_early_uv_system(void){ return 0; }
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
deleted file mode 100644
index cd24804955d7..
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ /dev/null
@@ -1,755 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * SGI UV Broadcast Assist Unit definitions
- *
- * Copyright (C) 2008-2011 Silicon Graphics, Inc. All rights reserved.
- */
-
-#ifndef _ASM_X86_UV_UV_BAU_H
-#define _ASM_X86_UV_UV_BAU_H
-
-#include 
-#include 
-
-#define BITSPERBYTE 8
-
-/*
- * Broadcast Assist Unit messaging structures
- *
- * Selective Broadcast activations are induced by software action
- * specifying a particular 8-descriptor "set" via a 6-bit index written
- * to an MMR.
- * Thus there are 64 unique 512-byte sets of SB descriptors - one set for
- * each 6-bit index value. These descriptor sets are mapped in sequence
- * starting with set 0 located at the address specified in the
- * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
- * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
- *
- * We will use one set for sending BAU messages from each of the
- * cpu's on the uvhub.
- *
- * TLB shootdown will use the first of the 8 descriptors of each set.
- * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
- */
-
-#define MAX_CPUS_PER_UVHUB 128
-#define MAX_CPUS_PER_SOCKET64
-#define ADP_SZ 64 /* hardware-provided max. */
-#define UV_CPUS_PER_AS 32 /* hardware-provided max. */
-#define ITEMS_PER_DESC 8
-/* the 'throttle' to prevent the hardware stay-busy bug */
-#define MAX_BAU_CONCURRENT 3
-#define UV_ACT_STATUS_MASK 0x3
-#define UV_ACT_STATUS_SIZE 2
-#define UV_DISTRIBUTION_SIZE   256
-#define UV_SW_ACK_NPENDING 8
-#define UV_NET_ENDPOINT_INTD   0x28
-#define UV_PAYLOADQ_GNODE_SHIFT49
-#define UV_PTC_BASENAME"sgi_uv/ptc_statistics"
-#define UV_BAU_BASENAME"sgi_uv/bau_tunables"
-#define UV_BAU_TUNABLES_DIR"sgi_uv"
-#define UV_BAU_TUNABLES_FILE   "bau_tunables"
-#define WHITESPACE " \t\n"
-#define cpubit_isset(cpu, bau_local_cpumask) \
-   test_bit((cpu), (bau_local_cpumask).bits)
-
-/* [19:16] SOFT_ACK timeout period  19: 1 is urgency 7  17:16 1 is multiplier 
*/
-/*
- * UV2: Bit 19 selects between
- *  (0): 10 microsecond timebase and
- *  (1): 80 microseconds
- *  we're using 560us
- */
-#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD(15UL)
-/* assuming UV3 is the same */
-
-#define BAU_MISC_CONTROL_MULT_MASK 3
-
-#define UVH_AGING_PRESCALE_SEL 

[PATCH v3 05/13] x86/platform/uv: Add UV5 direct references

2020-10-01 Thread Mike Travis
Add new references to UV5 (and UVY class) system MMR addresses and
fields primarily caused by the expansion from 46 to 52 bits of
physical memory address.

Signed-off-by: Mike Travis 
Reviewed-by: Dimitri Sivanich 
Reviewed-by: Steve Wahl 
---
 arch/x86/include/asm/uv/uv_hub.h   |  49 +-
 arch/x86/kernel/apic/x2apic_uv_x.c | 100 +
 2 files changed, 105 insertions(+), 44 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 76969be09660..ecf5c93e7ae8 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -160,6 +160,7 @@ struct uv_hub_info_s {
unsigned char   gr_table_len;
unsigned char   apic_pnode_shift;
unsigned char   gpa_shift;
+   unsigned char   nasid_shift;
unsigned char   m_shift;
unsigned char   n_lshift;
unsigned intgnode_extra;
@@ -226,6 +227,7 @@ static inline __init void uv_hub_type_set(int uvmask)
 #define UV3_HUB_REVISION_BASE  5
 #define UV4_HUB_REVISION_BASE  7
 #define UV4A_HUB_REVISION_BASE 8   /* UV4 (fixed) rev 2 */
+#define UV5_HUB_REVISION_BASE  9
 
 static inline int is_uv(int uvmask) { return uv_hub_type() & uvmask; }
 static inline int is_uv1_hub(void) { return 0; }
@@ -233,7 +235,7 @@ static inline int is_uv2_hub(void) { return is_uv(UV2); }
 static inline int is_uv3_hub(void) { return is_uv(UV3); }
 static inline int is_uv4a_hub(void) { return is_uv(UV4A); }
 static inline int is_uv4_hub(void) { return is_uv(UV4); }
-static inline int is_uv5_hub(void) { return 0; }
+static inline int is_uv5_hub(void) { return is_uv(UV5); }
 
 /*
  * UV4A is a revision of UV4.  So on UV4A, both is_uv4_hub() and
@@ -246,7 +248,7 @@ static inline int is_uv5_hub(void) { return 0; }
 static inline int is_uvx_hub(void) { return is_uv(UVX); }
 
 /* UVY class: UV5,..? */
-static inline int is_uvy_hub(void) { return 0; }
+static inline int is_uvy_hub(void) { return is_uv(UVY); }
 
 /* Any UV Hubbed System */
 static inline int is_uv_hub(void) { return is_uv(UV_ANY); }
@@ -271,9 +273,11 @@ union uvh_apicid {
  * g -  GNODE (full 15-bit global nasid, right shifted 1)
  * p -  PNODE (local part of nsids, right shifted 1)
  */
-#define UV_NASID_TO_PNODE(n)   (((n) >> 1) & uv_hub_info->pnode_mask)
+#define UV_NASID_TO_PNODE(n)   \
+   (((n) >> uv_hub_info->nasid_shift) & uv_hub_info->pnode_mask)
 #define UV_PNODE_TO_GNODE(p)   ((p) |uv_hub_info->gnode_extra)
-#define UV_PNODE_TO_NASID(p)   (UV_PNODE_TO_GNODE(p) << 1)
+#define UV_PNODE_TO_NASID(p)   \
+   (UV_PNODE_TO_GNODE(p) << uv_hub_info->nasid_shift)
 
 #define UV2_LOCAL_MMR_BASE 0xfa00UL
 #define UV2_GLOBAL_MMR32_BASE  0xfc00UL
@@ -290,25 +294,38 @@ union uvh_apicid {
 #define UV4_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
 #define UV4_GLOBAL_MMR32_SIZE  0
 
+#define UV5_LOCAL_MMR_BASE 0xfa00UL
+#define UV5_GLOBAL_MMR32_BASE  0
+#define UV5_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
+#define UV5_GLOBAL_MMR32_SIZE  0
+
 #define UV_LOCAL_MMR_BASE  (   \
-   is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
-   is_uv3_hub() ? UV3_LOCAL_MMR_BASE : \
-   /*is_uv4_hub*/ UV4_LOCAL_MMR_BASE)
+   is_uv(UV2) ? UV2_LOCAL_MMR_BASE : \
+   is_uv(UV3) ? UV3_LOCAL_MMR_BASE : \
+   is_uv(UV4) ? UV4_LOCAL_MMR_BASE : \
+   is_uv(UV5) ? UV5_LOCAL_MMR_BASE : \
+   0)
 
 #define UV_GLOBAL_MMR32_BASE   (   \
-   is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE : \
-   is_uv3_hub() ? UV3_GLOBAL_MMR32_BASE : \
-   /*is_uv4_hub*/ UV4_GLOBAL_MMR32_BASE)
+   is_uv(UV2) ? UV2_GLOBAL_MMR32_BASE : \
+   is_uv(UV3) ? UV3_GLOBAL_MMR32_BASE : \
+   is_uv(UV4) ? UV4_GLOBAL_MMR32_BASE : \
+   is_uv(UV5) ? UV5_GLOBAL_MMR32_BASE : \
+   0)
 
 #define UV_LOCAL_MMR_SIZE  (   \
-   is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \
-   is_uv3_hub() ? UV3_LOCAL_MMR_SIZE : \
-   /*is_uv4_hub*/ UV4_LOCAL_MMR_SIZE)
+   is_uv(UV2) ? UV2_LOCAL_MMR_SIZE : \
+ 

[PATCH v3 02/13] x86/platform/uv: Remove SCIR MMR references for UVY systems.

2020-10-01 Thread Mike Travis
UV class systems no longer use System Controller for monitoring of CPU
activity provided by this driver.  Other methods have been developed
for BIOS and the management controller (BMC).  This patch removes that
supporting code.

Signed-off-by: Mike Travis 
Reviewed-by: Dimitri Sivanich 
---
 arch/x86/include/asm/uv/uv_hub.h   | 43 ++--
 arch/x86/kernel/apic/x2apic_uv_x.c | 82 --
 2 files changed, 3 insertions(+), 122 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 100d66806503..b21228db75bf 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -129,17 +129,6 @@
  */
 #define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2)
 
-/* System Controller Interface Reg info */
-struct uv_scir_s {
-   struct timer_list timer;
-   unsigned long   offset;
-   unsigned long   last;
-   unsigned long   idle_on;
-   unsigned long   idle_off;
-   unsigned char   state;
-   unsigned char   enabled;
-};
-
 /* GAM (globally addressed memory) range table */
 struct uv_gam_range_s {
u32 limit;  /* PA bits 56:26 (GAM_RANGE_SHFT) */
@@ -191,16 +180,13 @@ struct uv_hub_info_s {
 struct uv_cpu_info_s {
void*p_uv_hub_info;
unsigned char   blade_cpu_id;
-   struct uv_scir_sscir;
+   void*reserved;
 };
 DECLARE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info);
 
 #define uv_cpu_infothis_cpu_ptr(&__uv_cpu_info)
 #define uv_cpu_info_per(cpu)   (_cpu(__uv_cpu_info, cpu))
 
-#defineuv_scir_info(_cpu_info->scir)
-#defineuv_cpu_scir_info(cpu)   (_cpu_info_per(cpu)->scir)
-
 /* Node specific hub common info struct */
 extern void **__uv_hub_info_list;
 static inline struct uv_hub_info_s *uv_hub_info_list(int node)
@@ -297,9 +283,9 @@ union uvh_apicid {
 #define UV3_GLOBAL_MMR32_SIZE  (32UL * 1024 * 1024)
 
 #define UV4_LOCAL_MMR_BASE 0xfa00UL
-#define UV4_GLOBAL_MMR32_BASE  0xfc00UL
+#define UV4_GLOBAL_MMR32_BASE  0
 #define UV4_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
-#define UV4_GLOBAL_MMR32_SIZE  (16UL * 1024 * 1024)
+#define UV4_GLOBAL_MMR32_SIZE  0
 
 #define UV_LOCAL_MMR_BASE  (   \
is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
@@ -772,29 +758,6 @@ DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
 #defineUV_NMI_STATE_DUMP   2
 #defineUV_NMI_STATE_DUMP_DONE  3
 
-/* Update SCIR state */
-static inline void uv_set_scir_bits(unsigned char value)
-{
-   if (uv_scir_info->state != value) {
-   uv_scir_info->state = value;
-   uv_write_local_mmr8(uv_scir_info->offset, value);
-   }
-}
-
-static inline unsigned long uv_scir_offset(int apicid)
-{
-   return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f);
-}
-
-static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
-{
-   if (uv_cpu_scir_info(cpu)->state != value) {
-   uv_write_global_mmr8(uv_cpu_to_pnode(cpu),
-   uv_cpu_scir_info(cpu)->offset, value);
-   uv_cpu_scir_info(cpu)->state = value;
-   }
-}
-
 /*
  * Get the minimum revision number of the hub chips within the partition.
  * (See UVx_HUB_REVISION_BASE above for specific values.)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c 
b/arch/x86/kernel/apic/x2apic_uv_x.c
index 0b6eea3f54e6..f51fabf56010 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -909,85 +909,6 @@ static __init void uv_rtc_init(void)
}
 }
 
-/*
- * percpu heartbeat timer
- */
-static void uv_heartbeat(struct timer_list *timer)
-{
-   unsigned char bits = uv_scir_info->state;
-
-   /* Flip heartbeat bit: */
-   bits ^= SCIR_CPU_HEARTBEAT;
-
-   /* Is this CPU idle? */
-   if (idle_cpu(raw_smp_processor_id()))
-   bits &= ~SCIR_CPU_ACTIVITY;
-   else
-   bits |= SCIR_CPU_ACTIVITY;
-
-   /* Update system controller interface reg: */
-   uv_set_scir_bits(bits);
-
-   /* Enable next timer period: */
-   mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
-}
-
-static int uv_heartbeat_enable(unsigned int cpu)
-{
-   while (!uv_cpu_scir_info(cpu)->enabled) {
-   struct timer_list *timer = _cpu_scir_info(cpu)->timer;
-
-   uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
-   timer_setup(timer, uv_heartbeat, TIMER_PINNED);
-   timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
-   add_timer_on(timer, cpu);
-   uv_cpu_scir_info(cpu)->enabled = 1;
-
-   /* Also ensure that boot CPU is enabled: */
-   cpu = 0;
-   }
-   return 0;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static int 

[PATCH v3 13/13] x86/platform/uv: Update Copyrights to conform to HPE standards

2020-10-01 Thread Mike Travis
Add Copyrights to those files that have been updated for UV5 changes.

Signed-off-by: Mike Travis 
---
 arch/x86/include/asm/uv/bios.h  | 1 +
 arch/x86/include/asm/uv/uv_hub.h| 1 +
 arch/x86/include/asm/uv/uv_mmrs.h   | 1 +
 arch/x86/kernel/apic/x2apic_uv_x.c  | 1 +
 arch/x86/platform/uv/bios_uv.c  | 1 +
 arch/x86/platform/uv/uv_nmi.c   | 1 +
 arch/x86/platform/uv/uv_time.c  | 1 +
 drivers/misc/sgi-gru/grufile.c  | 1 +
 drivers/misc/sgi-xp/xp.h| 1 +
 drivers/misc/sgi-xp/xp_main.c   | 1 +
 drivers/misc/sgi-xp/xp_uv.c | 1 +
 drivers/misc/sgi-xp/xpc_main.c  | 1 +
 drivers/misc/sgi-xp/xpc_partition.c | 1 +
 drivers/misc/sgi-xp/xpnet.c | 1 +
 14 files changed, 14 insertions(+)

diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 97ac595ebc6a..08b3d810dfba 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -5,6 +5,7 @@
 /*
  * UV BIOS layer definitions.
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
  * Copyright (c) Russ Anderson 
  */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 610bda21a8d9..5002f52be332 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -5,6 +5,7 @@
  *
  * SGI UV architectural definitions
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.
  */
 
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h 
b/arch/x86/include/asm/uv/uv_mmrs.h
index 06ea2d1aaa3e..57fa67373262 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,6 +5,7 @@
  *
  * HPE UV MMR definitions
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2016 Silicon Graphics, Inc. All rights reserved.
  */
 
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c 
b/arch/x86/kernel/apic/x2apic_uv_x.c
index 0c97315bf864..7c895991389c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,6 +5,7 @@
  *
  * SGI UV APIC functions (note: not an Intel compatible APIC)
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.
  */
 #include 
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index b148b4c8c2ec..54511eaccf4d 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -2,6 +2,7 @@
 /*
  * BIOS run time interface routines.
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
  * Copyright (c) Russ Anderson 
  */
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index eac26feb0461..0f5cbcf0da63 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -2,6 +2,7 @@
 /*
  * SGI NMI support routines
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
  * Copyright (c) Mike Travis
  */
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index e94436ba3022..a39e0f394c0e 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -2,6 +2,7 @@
 /*
  * SGI RTC clock/timer routines.
  *
+ *  (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  *  Copyright (c) 2009-2013 Silicon Graphics, Inc.  All Rights Reserved.
  *  Copyright (c) Dimitri Sivanich
  */
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 18aa8c877bf8..7ffcfc0bb587 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -7,6 +7,7 @@
  * This file supports the user system call for file open, close, mmap, etc.
  * This also incudes the driver initialization code.
  *
+ *  (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  *  Copyright (c) 2008-2014 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 0af267baf031..7d839d844847 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -3,6 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2004-2008 Silicon Graphics, Inc. All rights reserved.
  */
 
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index 33558555820d..049c9aef0559 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -3,6 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (c) 2004-2008 

[PATCH v3 08/13] x86/platform/uv: Adjust GAM MMR references affected by UV5 updates

2020-10-01 Thread Mike Travis
Make modifications to the GAM MMR mappings to accommodate changes for UV5.

Signed-off-by: Mike Travis 
Reviewed-by: Dimitri Sivanich 
Reviewed-by: Steve Wahl 
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 30 +-
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c 
b/arch/x86/kernel/apic/x2apic_uv_x.c
index 746a56466066..9f89c1e82df0 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -931,12 +931,32 @@ static __init void map_gru_high(int max_pnode)
 
 static __init void map_mmr_high(int max_pnode)
 {
-   union uvh_rh_gam_mmr_overlay_config_u mmr;
-   int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT;
+   unsigned long base;
+   int shift;
+   bool enable;
+
+   if (UVH_RH10_GAM_MMR_OVERLAY_CONFIG) {
+   union uvh_rh10_gam_mmr_overlay_config_u mmr;
+
+   mmr.v = uv_read_local_mmr(UVH_RH10_GAM_MMR_OVERLAY_CONFIG);
+   enable = mmr.s.enable;
+   base = mmr.s.base;
+   shift = UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT;
+   } else if (UVH_RH_GAM_MMR_OVERLAY_CONFIG) {
+   union uvh_rh_gam_mmr_overlay_config_u mmr;
+
+   mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG);
+   enable = mmr.s.enable;
+   base = mmr.s.base;
+   shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT;
+   } else {
+   pr_err("UV:%s:RH_GAM_MMR_OVERLAY_CONFIG MMR undefined?\n",
+   __func__);
+   return;
+   }
 
-   mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG);
-   if (mmr.s.enable)
-   map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc);
+   if (enable)
+   map_high("MMR", base, shift, shift, max_pnode, map_uc);
else
pr_info("UV: MMR disabled\n");
 }
-- 
2.21.0



[PATCH v3 10/13] x86/platform/uv: Update Node Present Counting

2020-10-01 Thread Mike Travis
The changes in the UV5 arch shrunk the NODE PRESENT table to just 2x64
entries (128 total) so are in to 64 bit MMRs instead of a depth of 64
bits in an array.  Adjust references when counting up the nodes present.

Signed-off-by: Mike Travis 
Reviewed-by: Dimitri Sivanich 
Reviewed-by: Steve Wahl 
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 26 +++---
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c 
b/arch/x86/kernel/apic/x2apic_uv_x.c
index 678001ff789d..0e40f4866bc3 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1441,20 +1441,32 @@ static int __init decode_uv_systab(void)
 /* Set up physical blade translations from UVH_NODE_PRESENT_TABLE */
 static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info)
 {
+   unsigned long np;
int i, uv_pb = 0;
 
-   pr_info("UV: NODE_PRESENT_DEPTH = %d\n", UVH_NODE_PRESENT_TABLE_DEPTH);
-   for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
-   unsigned long np;
-
-   np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
-   if (np)
+   if (UVH_NODE_PRESENT_TABLE) {
+   pr_info("UV: NODE_PRESENT_DEPTH = %d\n",
+   UVH_NODE_PRESENT_TABLE_DEPTH);
+   for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
+   np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np);
-
+   uv_pb += hweight64(np);
+   }
+   }
+   if (UVH_NODE_PRESENT_0) {
+   np = uv_read_local_mmr(UVH_NODE_PRESENT_0);
+   pr_info("UV: NODE_PRESENT_0 = 0x%016lx\n", np);
+   uv_pb += hweight64(np);
+   }
+   if (UVH_NODE_PRESENT_1) {
+   np = uv_read_local_mmr(UVH_NODE_PRESENT_1);
+   pr_info("UV: NODE_PRESENT_1 = 0x%016lx\n", np);
uv_pb += hweight64(np);
}
if (uv_possible_blades != uv_pb)
uv_possible_blades = uv_pb;
+
+   pr_info("UV: number nodes/possible blades %d\n", uv_pb);
 }
 
 static void __init build_socket_tables(void)
-- 
2.21.0



[PATCH v3 09/13] x86/platform/uv: Update UV5 MMR references in UV GRU

2020-10-01 Thread Mike Travis
Make modifications to the GRU mappings to accommodate changes for UV5.

Signed-off-by: Mike Travis 
Reviewed-by: Dimitri Sivanich 
Reviewed-by: Steve Wahl 
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 30 --
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c 
b/arch/x86/kernel/apic/x2apic_uv_x.c
index 9f89c1e82df0..678001ff789d 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -82,6 +82,9 @@ static unsigned long __init uv_early_read_mmr(unsigned long 
addr)
 
 static inline bool is_GRU_range(u64 start, u64 end)
 {
+   if (!gru_start_paddr)
+   return false;
+
return start >= gru_start_paddr && end <= gru_end_paddr;
 }
 
@@ -913,13 +916,24 @@ static __init void map_high(char *id, unsigned long base, 
int pshift, int bshift
 static __init void map_gru_high(int max_pnode)
 {
union uvh_rh_gam_gru_overlay_config_u gru;
-   int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT;
-   unsigned long mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK;
-   unsigned long base;
+   unsigned long mask, base;
+   int shift;
+
+   if (UVH_RH_GAM_GRU_OVERLAY_CONFIG) {
+   gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG);
+   shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT;
+   mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK;
+   } else if (UVH_RH10_GAM_GRU_OVERLAY_CONFIG) {
+   gru.v = uv_read_local_mmr(UVH_RH10_GAM_GRU_OVERLAY_CONFIG);
+   shift = UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT;
+   mask = UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK;
+   } else {
+   pr_err("UV: GRU unavailable (no MMR)\n");
+   return;
+   }
 
-   gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG);
if (!gru.s.enable) {
-   pr_info("UV: GRU disabled\n");
+   pr_info("UV: GRU disabled (by BIOS)\n");
return;
}
 
@@ -1293,7 +1307,11 @@ static void __init uv_init_hub_info(struct uv_hub_info_s 
*hi)
/* Show system specific info: */
pr_info("UV: N:%d M:%d m_shift:%d n_lshift:%d\n", hi->n_val, hi->m_val, 
hi->m_shift, hi->n_lshift);
pr_info("UV: gpa_mask/shift:0x%lx/%d pnode_mask:0x%x apic_pns:%d\n", 
hi->gpa_mask, hi->gpa_shift, hi->pnode_mask, hi->apic_pnode_shift);
-   pr_info("UV: mmr_base/shift:0x%lx/%ld gru_base/shift:0x%lx/%ld\n", 
hi->global_mmr_base, hi->global_mmr_shift, hi->global_gru_base, 
hi->global_gru_shift);
+   pr_info("UV: mmr_base/shift:0x%lx/%ld\n", hi->global_mmr_base, 
hi->global_mmr_shift);
+   if (hi->global_gru_base)
+   pr_info("UV: gru_base/shift:0x%lx/%ld\n",
+   hi->global_gru_base, hi->global_gru_shift);
+
pr_info("UV: gnode_upper:0x%lx gnode_extra:0x%x\n", hi->gnode_upper, 
hi->gnode_extra);
 }
 
-- 
2.21.0



[tip:x86/asm] BUILD SUCCESS aa5cacdc29d76a005cbbee018a47faa6e724dd2d

2020-10-01 Thread kernel test robot
 allyesconfig
parisc   allyesconfig
s390defconfig
i386 allyesconfig
i386defconfig
sparc   defconfig
mips allmodconfig
powerpc  allyesconfig
powerpc   allnoconfig
i386 randconfig-a003-20200930
i386 randconfig-a002-20200930
i386 randconfig-a006-20200930
i386 randconfig-a005-20200930
i386 randconfig-a004-20200930
i386 randconfig-a001-20200930
x86_64   randconfig-a015-20200930
x86_64   randconfig-a013-20200930
x86_64   randconfig-a012-20200930
x86_64   randconfig-a016-20200930
x86_64   randconfig-a014-20200930
x86_64   randconfig-a011-20200930
x86_64   randconfig-a012-20201001
x86_64   randconfig-a015-20201001
x86_64   randconfig-a014-20201001
x86_64   randconfig-a013-20201001
x86_64   randconfig-a011-20201001
x86_64   randconfig-a016-20201001
i386 randconfig-a011-20200930
i386 randconfig-a015-20200930
i386 randconfig-a012-20200930
i386 randconfig-a014-20200930
i386 randconfig-a016-20200930
i386 randconfig-a013-20200930
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64   rhel-8.3
x86_64  kexec

clang tested configs:
x86_64   randconfig-a004-20201001
x86_64   randconfig-a001-20201001
x86_64   randconfig-a002-20201001
x86_64   randconfig-a003-20201001
x86_64   randconfig-a005-20201001
x86_64   randconfig-a006-20201001
x86_64   randconfig-a001-20200930
x86_64   randconfig-a005-20200930
x86_64   randconfig-a003-20200930
x86_64   randconfig-a004-20200930
x86_64   randconfig-a002-20200930
x86_64   randconfig-a006-20200930

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


[PATCH v3 00/13] x86/platform/uv: Updates for UV5 Architecture

2020-10-01 Thread Mike Travis


Changes included in this patch set:

 * Add changes needed for new UV5 UV architecture.  Chief among the
   changes are 52 bits of physical memory address and 57 bits of
   virtual address space.

 * Remove the BAU TLB code cuurently being replaced by BAU APIC driver.

 * Remove System Controller (monitoring) code

 * Updated UV mmrs.h file removing UV1, adding UVY class, optimizing
   the arch selection of the MMR address/field, and trimming down MMR
   selection list reducing number of MMRs being defined.

 * Process UV ArchType in UV BIOS generated UVsystab allowing OEMs to
   use OEM_ID for their own purposes.

 * Update various mapping functions (MMIOH, MMR, GRU) to accommodate
   UV5 differences.

 * Update node present counting for change in MMRs.

 * Update TSC sync check of BIOS sync status.

 * Update NMI handler for UV5 MMR changes.

 * Update copyrights to conform to HPE standards.


This is version 2 with these changes since version 1: 

 * Added diffstats to p-intro.

 * Updated Copyrights to be in one file and only include the year
   the code was modified.

 * Updated to use git format-patch to construct patch email and 
   git send-email to send the patches.


This is version 3 with these changes since version 2: 

 * Changes made to .gitconfig so no internal (unreachable) systems
   are referenced.


Mike Travis (13):
  x86/platform/uv: Remove UV BAU TLB Shootdown Handler
  x86/platform/uv: Remove SCIR MMR references for UVY systems.
  x86/platform/uv: Adjust references in UV kernel modules
  x86/platform/uv: Update UV MMRs for UV5
  x86/platform/uv: Add UV5 direct references
  x86/platform/uv: Add and Decode Arch Type in UVsystab
  x86/platform/uv: Update MMIOH references based on new UV5 MMRs.
  x86/platform/uv: Adjust GAM MMR references affected by UV5 updates
  x86/platform/uv: Update UV5 MMR references in UV GRU
  x86/platform/uv: Update Node Present Counting
  x86/platform/uv: Update UV5 TSC Checking
  x86/platform/uv: Update for UV5 NMI MMR changes
  x86/platform/uv: Update Copyrights to conform to HPE standards

 arch/x86/include/asm/idtentry.h |4 -
 arch/x86/include/asm/uv/bios.h  |   17 +-
 arch/x86/include/asm/uv/uv.h|4 +-
 arch/x86/include/asm/uv/uv_bau.h|  755 ---
 arch/x86/include/asm/uv/uv_hub.h|  165 +-
 arch/x86/include/asm/uv/uv_mmrs.h   | 7646 ++-
 arch/x86/kernel/apic/x2apic_uv_x.c  |  817 ++-
 arch/x86/kernel/idt.c   |3 -
 arch/x86/mm/tlb.c   |   24 -
 arch/x86/platform/uv/Makefile   |2 +-
 arch/x86/platform/uv/bios_uv.c  |   28 +-
 arch/x86/platform/uv/tlb_uv.c   | 2097 
 arch/x86/platform/uv/uv_nmi.c   |   65 +-
 arch/x86/platform/uv/uv_time.c  |   11 +-
 drivers/misc/sgi-gru/grufile.c  |3 +-
 drivers/misc/sgi-xp/xp.h|9 +-
 drivers/misc/sgi-xp/xp_main.c   |5 +-
 drivers/misc/sgi-xp/xp_uv.c |7 +-
 drivers/misc/sgi-xp/xpc_main.c  |7 +-
 drivers/misc/sgi-xp/xpc_partition.c |3 +-
 drivers/misc/sgi-xp/xpnet.c |3 +-
 21 files changed, 4797 insertions(+), 6878 deletions(-)
 delete mode 100644 arch/x86/include/asm/uv/uv_bau.h
 delete mode 100644 arch/x86/platform/uv/tlb_uv.c

-- 
2.21.0



[PATCH v3 07/13] x86/platform/uv: Update MMIOH references based on new UV5 MMRs.

2020-10-01 Thread Mike Travis
Make modifications to the MMIOH mappings to accommodate changes for UV5.

Signed-off-by: Mike Travis 
Reviewed-by: Steve Wahl 
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 211 +++--
 1 file changed, 143 insertions(+), 68 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c 
b/arch/x86/kernel/apic/x2apic_uv_x.c
index 353825a0b327..746a56466066 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -228,6 +228,13 @@ static void __init uv_tsc_check_sync(void)
mark_tsc_unstable("UV BIOS");
 }
 
+/* Selector for (4|4A|5) structs */
+#define uvxy_field(sname, field, undef) (  \
+   is_uv(UV4A) ? sname.s4a.field : \
+   is_uv(UV4) ? sname.s4.field :   \
+   is_uv(UV3) ? sname.s3.field :   \
+   undef)
+
 /* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */
 
 #define SMT_LEVEL  0   /* Leaf 0xb SMT level */
@@ -882,6 +889,7 @@ static __init void get_lowmem_redirect(unsigned long *base, 
unsigned long *size)
 }
 
 enum map_type {map_wb, map_uc};
+static const char * const mt[] = { "WB", "UC" };
 
 static __init void map_high(char *id, unsigned long base, int pshift, int 
bshift, int max_pnode, enum map_type map_type)
 {
@@ -893,11 +901,13 @@ static __init void map_high(char *id, unsigned long base, 
int pshift, int bshift
pr_info("UV: Map %s_HI base address NULL\n", id);
return;
}
-   pr_debug("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
if (map_type == map_uc)
init_extra_mapping_uc(paddr, bytes);
else
init_extra_mapping_wb(paddr, bytes);
+
+   pr_info("UV: Map %s_HI 0x%lx - 0x%lx %s (%d segments)\n",
+   id, paddr, paddr + bytes, mt[map_type], max_pnode + 1);
 }
 
 static __init void map_gru_high(int max_pnode)
@@ -931,52 +941,73 @@ static __init void map_mmr_high(int max_pnode)
pr_info("UV: MMR disabled\n");
 }
 
-/* UV3/4 have identical MMIOH overlay configs, UV4A is slightly different */
-static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode)
-{
-   unsigned long overlay;
-   unsigned long mmr;
-   unsigned long base;
-   unsigned long nasid_mask;
-   unsigned long m_overlay;
-   int i, n, shift, m_io, max_io;
-   int nasid, lnasid, fi, li;
-   char *id;
-
-   if (index == 0) {
-   id = "MMIOH0";
-   m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0;
-   overlay = uv_read_local_mmr(m_overlay);
-   base = overlay & UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK;
+/* Arch specific ENUM cases */
+enum mmioh_arch {
+   UV2_MMIOH = -1,
+   UVY_MMIOH0, UVY_MMIOH1,
+   UVX_MMIOH0, UVX_MMIOH1,
+};
+
+/* Calculate and Map MMIOH Regions */
+void __init calc_mmioh_map(enum mmioh_arch index, int min_pnode, int max_pnode,
+   int shift, unsigned long base, int m_io, int n_io)
+{
+   unsigned long mmr, nasid_mask;
+   int nasid, min_nasid, max_nasid, lnasid, mapped;
+   int i, fi, li, n, max_io;
+   char id[8];
+
+   /* One (UV2) mapping */
+   if (index == UV2_MMIOH) {
+   strncpy(id, "MMIOH", sizeof(id));
+   max_io = max_pnode;
+   mapped = 0;
+   goto map_exit;
+   }
+
+   /* small and large MMIOH mappings */
+   switch (index) {
+   case UVY_MMIOH0:
+   mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0;
+   nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK;
+   n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH;
+   min_nasid = min_pnode;
+   max_nasid = max_pnode;
+   mapped = 1;
+   break;
+   case UVY_MMIOH1:
+   mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1;
+   nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK;
+   n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH;
+   min_nasid = min_pnode;
+   max_nasid = max_pnode;
+   mapped = 1;
+   break;
+   case UVX_MMIOH0:
mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0;
-   m_io = (overlay & UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK)
-   >> UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT;
-   shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT;
+   nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK;
n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH;
-   nasid_mask = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK;
-   } else {
-   id = "MMIOH1";
-   m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1;
-   overlay = uv_read_local_mmr(m_overlay);
-   base = overlay & UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK;
+   min_nasid = min_pnode * 2;
+

Re: [PATCH rdma-next] overflow: Include header file with SIZE_MAX declaration

2020-10-01 Thread Jason Gunthorpe
On Sun, Sep 13, 2020 at 01:29:28PM +0300, Leon Romanovsky wrote:
> From: Leon Romanovsky 
> 
> The various array_size functions use SIZE_MAX define, but missed limits.h
> causes to failure to compile code that needs overflow.h.
> 
>  In file included from drivers/infiniband/core/uverbs_std_types_device.c:6:
>  ./include/linux/overflow.h: In function 'array_size':
>  ./include/linux/overflow.h:258:10: error: 'SIZE_MAX' undeclared (first use 
> in this function)
>258 |   return SIZE_MAX;
>|  ^~~~
> 
> Fixes: 610b15c50e86 ("overflow.h: Add allocation size calculation helpers")
> Signed-off-by: Leon Romanovsky 
> ---
>  include/linux/overflow.h | 1 +
>  1 file changed, 1 insertion(+)

Applied to rdma for-next, seems other patches need this. Thanks

Jason


Re: [PATCH rdma-next v3 0/4] Query GID table API

2020-10-01 Thread Jason Gunthorpe
On Wed, Sep 23, 2020 at 07:50:11PM +0300, Leon Romanovsky wrote:
> When an application is not using RDMA CM and if it is using multiple RDMA
> devices with one or more RoCE ports, finding the right GID table entry is
> a long process.
> 
> For example, with two RoCE dual-port devices in a system, when IP
> failover is used between two RoCE ports, searching a suitable GID
> entry for a given source IP, matching netdevice of given RoCEv1/v2 type
> requires iterating over all 4 ports * 256 entry GID table.
> 
> Even though the best first match GID table for given criteria is used,
> when the matching entry is on the 4th port, it requires reading
> 3 ports * 256 entries * 3 files (GID, netdev, type) = 2304 files.
> 
> The GID table needs to be referred on every QP creation during IP
> failover on other netdevice of an RDMA device.
> 
> In an alternative approach, a GID cache may be maintained and updated on
> GID change event was reported by the kernel. However, it comes with below
> two limitations:
> (a) Maintain a thread per application process instance to listen and update
>  the cache.
> (b) Without the thread, on cache miss event, query the GID table. Even in
>  this approach, if multiple processes are used, a GID cache needs to be
>  maintained on a per-process basis. With a large number of processes,
>  this method doesn't scale.
> 
> Hence, we introduce this series of patches, which introduces an API to
> query the complete GID tables of an RDMA device, that returns all valid
> GID table entries.
> 
> This is done through single ioctl, eliminating 2304 read, 2304 open and
> 2304 close system calls to just a total of 2 calls (one for each device).
> 
> While at it, we also introduce an API to query an individual GID entry
> over ioctl interface, which provides all GID attributes information.
> 
> Thanks
> 
> Avihai Horon (4):
>   RDMA/core: Change rdma_get_gid_attr returned error code
>   RDMA/core: Modify enum ib_gid_type and enum rdma_network_type
>   RDMA/core: Introduce new GID table query API
>   RDMA/uverbs: Expose the new GID query API to user space

I made the edit to fix the locking, please check it

Applied to for-next

Thanks,
Jason


Re: [PATCH 7/7] TC-ETF support PTP clocks

2020-10-01 Thread Thomas Gleixner
On Thu, Oct 01 2020 at 22:51, Erez Geva wrote:

>   - Add support for using a POSIX dynamic clock with
> Traffic control Earliest TxTime First (ETF) Qdisc.



> --- a/include/uapi/linux/net_tstamp.h
> +++ b/include/uapi/linux/net_tstamp.h
> @@ -167,6 +167,11 @@ enum txtime_flags {
>   SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_FLAGS_LAST - 1) |
>SOF_TXTIME_FLAGS_LAST
>  };
> +/*
> + * Clock ID to use with POSIX clocks
> + * The ID must be u8 to fit in (struct sock)->sk_clockid
> + */
> +#define SOF_TXTIME_POSIX_CLOCK_ID (0x77)

Random number with a random name. 
  
>  struct sock_txtime {
>   __kernel_clockid_t  clockid;/* reference clockid */
> diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
> index c0de4c6f9299..8e3e0a61fa58 100644
> --- a/net/sched/sch_etf.c
> +++ b/net/sched/sch_etf.c
> @@ -15,6 +15,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -40,19 +41,40 @@ struct etf_sched_data {
>   struct rb_root_cached head;
>   struct qdisc_watchdog watchdog;
>   ktime_t (*get_time)(void);
> +#ifdef CONFIG_POSIX_TIMERS
> + struct posix_clock *pclock; /* pointer to a posix clock */

Tail comments suck because they disturb the reading flow and this
comment has absolute zero value.

Comments are required to explain things which are not obvious...

> +#endif /* CONFIG_POSIX_TIMERS */

Also this #ifdeffery is bonkers. How is TSN supposed to work without
POSIX_TIMERS in the first place?

>  static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
>   [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
>  };
>  
> +static inline ktime_t get_now(struct Qdisc *sch, struct etf_sched_data *q)
> +{
> +#ifdef CONFIG_POSIX_TIMERS
> + if (IS_ERR_OR_NULL(q->get_time)) {
> + struct timespec64 ts;
> + int err = posix_clock_gettime(q->pclock, );
> +
> + if (err) {
> + pr_warn("Clock is disabled (%d) for queue %d\n",
> + err, q->queue);
> + return 0;

That's really useful error handling.

> + }
> + return timespec64_to_ktime(ts);
> + }
> +#endif /* CONFIG_POSIX_TIMERS */
> + return q->get_time();
> +}
> +
>  static inline int validate_input_params(struct tc_etf_qopt *qopt,
>   struct netlink_ext_ack *extack)
>  {
>   /* Check if params comply to the following rules:
>*  * Clockid and delta must be valid.
>*
> -  *  * Dynamic clockids are not supported.
> +  *  * Dynamic CPU clockids are not supported.
>*
>*  * Delta must be a positive or zero integer.
>*
> @@ -60,11 +82,22 @@ static inline int validate_input_params(struct 
> tc_etf_qopt *qopt,
>* expect that system clocks have been synchronized to PHC.
>*/
>   if (qopt->clockid < 0) {
> +#ifdef CONFIG_POSIX_TIMERS
> + /**
> +  * Use of PTP clock through a posix clock.
> +  * The TC application must open the posix clock device file
> +  * and use the dynamic clockid from the file description.

What? How is the code which calls into this guaranteed to have a valid
file descriptor open for a particular dynamic posix clock?

> +  */
> + if (!is_clockid_fd_clock(qopt->clockid)) {
> + NL_SET_ERR_MSG(extack,
> +"Dynamic CPU clockids are not 
> supported");
> + return -EOPNOTSUPP;
> + }
> +#else /* CONFIG_POSIX_TIMERS */
>   NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
>   return -ENOTSUPP;
> - }
> -
> - if (qopt->clockid != CLOCK_TAI) {
> +#endif /* CONFIG_POSIX_TIMERS */
> + } else if (qopt->clockid != CLOCK_TAI) {
>   NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be 
> used");
>   return -EINVAL;
>   }
> @@ -103,7 +136,7 @@ static bool is_packet_valid(struct Qdisc *sch, struct 
> etf_sched_data *q,
>   return false;
>  
>  skip:
> - now = q->get_time();
> + now = get_now(sch, q);

Yuck.

is_packet_valid() is invoked via:

__dev_queue_xmit()
  __dev_xmit_skb()
 etf_enqueue_timesortedlist()
   is_packet_valid()

__dev_queue_xmit() does

   rcu_read_lock_bh();

and your get_now() does

posix_clock_gettime()
down_read(>rwsem);

 > FAIL

down_read() might sleep and cannot be called from a BH disabled
region. This clearly has never been tested with any mandatory debug
option enabled. Why am I not surprised?

Aside of accessing PCH clock being slow at hell this cannot ever work
and there is no way to make it work in any consistent form.

If you have several NICs on several PCH domains then all of these
domains should have one thing in common: CLOCK_TAI and the frequency.

If that's not 

[PATCH v3 03/13] x86/platform/uv: Adjust references in UV kernel modules

2020-10-01 Thread Mike Travis
Make a small symbol change (is_uv() ==> is_uv_sys()) to accommodate a
change in the uv_mmrs.h file (is_uv() is the new arch selector function).

Signed-off-by: Mike Travis 
Reviewed-by: Dimitri Sivanich 
Reviewed-by: Steve Wahl 
---
 drivers/misc/sgi-xp/xp.h| 8 
 drivers/misc/sgi-xp/xp_main.c   | 4 ++--
 drivers/misc/sgi-xp/xp_uv.c | 6 --
 drivers/misc/sgi-xp/xpc_main.c  | 6 +++---
 drivers/misc/sgi-xp/xpc_partition.c | 2 +-
 drivers/misc/sgi-xp/xpnet.c | 2 +-
 6 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 06469b12aced..0af267baf031 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -17,11 +17,11 @@
 
 #if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV
 #include 
-#define is_uv()is_uv_system()
+#define is_uv_sys()is_uv_system()
 #endif
 
-#ifndef is_uv
-#define is_uv()0
+#ifndef is_uv_sys
+#define is_uv_sys()0
 #endif
 
 #ifdef USE_DBUG_ON
@@ -79,7 +79,7 @@
 
 #define XPC_MSG_SIZE(_payload_size) \
ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \
- is_uv() ? 64 : 128)
+ is_uv_sys() ? 64 : 128)
 
 
 /*
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index 61b03fcefb13..33558555820d 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -233,7 +233,7 @@ xp_init(void)
for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++)
mutex_init(_registrations[ch_number].mutex);
 
-   if (is_uv())
+   if (is_uv_sys())
ret = xp_init_uv();
else
ret = 0;
@@ -249,7 +249,7 @@ module_init(xp_init);
 static void __exit
 xp_exit(void)
 {
-   if (is_uv())
+   if (is_uv_sys())
xp_exit_uv();
 }
 
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
index f15a9f2ac1dd..118aef64518d 100644
--- a/drivers/misc/sgi-xp/xp_uv.c
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -148,7 +148,9 @@ xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned 
long size)
 enum xp_retval
 xp_init_uv(void)
 {
-   BUG_ON(!is_uv());
+   WARN_ON(!is_uv_sys());
+   if (!is_uv_sys())
+   return xpUnsupported;
 
xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
 #ifdef CONFIG_X86
@@ -168,5 +170,5 @@ xp_init_uv(void)
 void
 xp_exit_uv(void)
 {
-   BUG_ON(!is_uv());
+   WARN_ON(!is_uv_sys());
 }
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 8a495dc82f16..f533ded72941 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -1043,7 +1043,7 @@ xpc_do_exit(enum xp_retval reason)
 
xpc_teardown_partitions();
 
-   if (is_uv())
+   if (is_uv_sys())
xpc_exit_uv();
 }
 
@@ -1226,7 +1226,7 @@ xpc_init(void)
dev_set_name(xpc_part, "part");
dev_set_name(xpc_chan, "chan");
 
-   if (is_uv()) {
+   if (is_uv_sys()) {
ret = xpc_init_uv();
 
} else {
@@ -1312,7 +1312,7 @@ xpc_init(void)
 
xpc_teardown_partitions();
 out_1:
-   if (is_uv())
+   if (is_uv_sys())
xpc_exit_uv();
return ret;
 }
diff --git a/drivers/misc/sgi-xp/xpc_partition.c 
b/drivers/misc/sgi-xp/xpc_partition.c
index 099a53bdbb7d..c21d48fd65cd 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -433,7 +433,7 @@ xpc_discovery(void)
 */
region_size = xp_region_size;
 
-   if (is_uv())
+   if (is_uv_sys())
max_regions = 256;
else {
max_regions = 64;
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 837d6c3fe69c..fc5cd83f21f2 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -515,7 +515,7 @@ xpnet_init(void)
 {
int result;
 
-   if (!is_uv())
+   if (!is_uv_sys())
return -ENODEV;
 
dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
-- 
2.21.0



[PATCH v3 12/13] x86/platform/uv: Update for UV5 NMI MMR changes

2020-10-01 Thread Mike Travis
The UV NMI MMR addresses and fields moved between UV4 and UV5
necessitating a rewrite of the UV NMI handler.  Adjust references
to accommodate those changes.

Signed-off-by: Mike Travis 
Reviewed-by: Dimitri Sivanich 
Reviewed-by: Steve Wahl 
---
 arch/x86/include/asm/uv/uv_hub.h | 13 ---
 arch/x86/platform/uv/uv_nmi.c| 64 +++-
 2 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 07079b59824d..610bda21a8d9 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -734,19 +734,6 @@ extern void uv_nmi_setup_hubless(void);
 #define UVH_NMI_MMR_SHIFT  63
 #define UVH_NMI_MMR_TYPE   "SCRATCH5"
 
-/* Newer SMM NMI handler, not present in all systems */
-#define UVH_NMI_MMRX   UVH_EVENT_OCCURRED0
-#define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS
-#define UVH_NMI_MMRX_SHIFT UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT
-#define UVH_NMI_MMRX_TYPE  "EXTIO_INT0"
-
-/* Non-zero indicates newer SMM NMI handler present */
-#define UVH_NMI_MMRX_SUPPORTED UVH_EXTIO_INT0_BROADCAST
-
-/* Indicates to BIOS that we want to use the newer SMM NMI handler */
-#define UVH_NMI_MMRX_REQ   UVH_BIOS_KERNEL_MMR_ALIAS_2
-#define UVH_NMI_MMRX_REQ_SHIFT 62
-
 struct uv_hub_nmi_s {
raw_spinlock_t  nmi_lock;
atomic_tin_nmi; /* flag this node in UV NMI IRQ */
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 9d08ff5a755e..eac26feb0461 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -2,8 +2,8 @@
 /*
  * SGI NMI support routines
  *
- *  Copyright (c) 2009-2013 Silicon Graphics, Inc.  All Rights Reserved.
- *  Copyright (c) Mike Travis
+ * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) Mike Travis
  */
 
 #include 
@@ -54,6 +54,20 @@ static struct uv_hub_nmi_s **uv_hub_nmi_list;
 
 DEFINE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
 
+/* Newer SMM NMI handler, not present in all systems */
+static unsigned long uvh_nmi_mmrx; /* UVH_EVENT_OCCURRED0/1 */
+static unsigned long uvh_nmi_mmrx_clear;   /* UVH_EVENT_OCCURRED0/1_ALIAS 
*/
+static int uvh_nmi_mmrx_shift; /* 
UVH_EVENT_OCCURRED0/1_EXTIO_INT0_SHFT */
+static int uvh_nmi_mmrx_mask;  /* 
UVH_EVENT_OCCURRED0/1_EXTIO_INT0_MASK */
+static char *uvh_nmi_mmrx_type;/* "EXTIO_INT0" */
+
+/* Non-zero indicates newer SMM NMI handler present */
+static unsigned long uvh_nmi_mmrx_supported;   /* UVH_EXTIO_INT0_BROADCAST */
+
+/* Indicates to BIOS that we want to use the newer SMM NMI handler */
+static unsigned long uvh_nmi_mmrx_req; /* UVH_BIOS_KERNEL_MMR_ALIAS_2 
*/
+static int uvh_nmi_mmrx_req_shift; /* 62 */
+
 /* UV hubless values */
 #define NMI_CONTROL_PORT   0x70
 #define NMI_DUMMY_PORT 0x71
@@ -227,13 +241,43 @@ static inline bool uv_nmi_action_is(const char *action)
 /* Setup which NMI support is present in system */
 static void uv_nmi_setup_mmrs(void)
 {
-   if (uv_read_local_mmr(UVH_NMI_MMRX_SUPPORTED)) {
-   uv_write_local_mmr(UVH_NMI_MMRX_REQ,
-   1UL << UVH_NMI_MMRX_REQ_SHIFT);
-   nmi_mmr = UVH_NMI_MMRX;
-   nmi_mmr_clear = UVH_NMI_MMRX_CLEAR;
-   nmi_mmr_pending = 1UL << UVH_NMI_MMRX_SHIFT;
-   pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMRX_TYPE);
+   /* First determine arch specific MMRs to handshake with BIOS */
+   if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) {
+   uvh_nmi_mmrx = UVH_EVENT_OCCURRED0;
+   uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED0_ALIAS;
+   uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT;
+   uvh_nmi_mmrx_mask = UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK;
+   uvh_nmi_mmrx_type = "OCRD0-EXTIO_INT0";
+
+   uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST;
+   uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
+   uvh_nmi_mmrx_req_shift = 62;
+
+   } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) {
+   uvh_nmi_mmrx = UVH_EVENT_OCCURRED1;
+   uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED1_ALIAS;
+   uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT;
+   uvh_nmi_mmrx_mask = UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK;
+   uvh_nmi_mmrx_type = "OCRD1-EXTIO_INT0";
+
+   uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST;
+   uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
+   uvh_nmi_mmrx_req_shift = 62;
+
+   } else {
+   pr_err("UV:%s:cannot find EVENT_OCCURRED*_EXTIO_INT0\n",
+   __func__);
+   return;
+   }
+
+   /* Then find out if new NMI is supported */
+   if 

[PATCH v3 11/13] x86/platform/uv: Update UV5 TSC Checking

2020-10-01 Thread Mike Travis
Update check of BIOS TSC sync status to include both possible "invalid"
states provided by newer UV5 BIOS.

Signed-off-by: Mike Travis 
Reviewed-by: Steve Wahl 
---
 arch/x86/include/asm/uv/uv_hub.h   |  2 +-
 arch/x86/kernel/apic/x2apic_uv_x.c | 24 ++--
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index ecf5c93e7ae8..07079b59824d 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -726,7 +726,7 @@ extern void uv_nmi_setup_hubless(void);
 #define UVH_TSC_SYNC_SHIFT_UV2K16  /* UV2/3k have different bits */
 #define UVH_TSC_SYNC_MASK  3   /* 0011 */
 #define UVH_TSC_SYNC_VALID 3   /* 0011 */
-#define UVH_TSC_SYNC_INVALID   2   /* 0010 */
+#define UVH_TSC_SYNC_UNKNOWN   0   /*  */
 
 /* BMC sets a bit this MMR non-zero before sending an NMI */
 #define UVH_NMI_MMRUVH_BIOS_KERNEL_MMR
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c 
b/arch/x86/kernel/apic/x2apic_uv_x.c
index 0e40f4866bc3..0c97315bf864 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -199,36 +199,32 @@ static void __init uv_tsc_check_sync(void)
int sync_state;
int mmr_shift;
char *state;
-   bool valid;
 
-   /* Accommodate different UV arch BIOSes */
+   /* Different returns from different UV BIOS versions */
mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR);
mmr_shift =
is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT;
sync_state = (mmr >> mmr_shift) & UVH_TSC_SYNC_MASK;
 
+   /* Check if TSC is valid for all sockets */
switch (sync_state) {
case UVH_TSC_SYNC_VALID:
state = "in sync";
-   valid = true;
+   mark_tsc_async_resets("UV BIOS");
break;
 
-   case UVH_TSC_SYNC_INVALID:
-   state = "unstable";
-   valid = false;
+   /* If BIOS state unknown, don't do anything */
+   case UVH_TSC_SYNC_UNKNOWN:
+   state = "unknown";
break;
+
+   /* Otherwise, BIOS indicates problem with TSC */
default:
-   state = "unknown: assuming valid";
-   valid = true;
+   state = "unstable";
+   mark_tsc_unstable("UV BIOS");
break;
}
pr_info("UV: TSC sync state from BIOS:0%d(%s)\n", sync_state, state);
-
-   /* Mark flag that says TSC != 0 is valid for socket 0 */
-   if (valid)
-   mark_tsc_async_resets("UV BIOS");
-   else
-   mark_tsc_unstable("UV BIOS");
 }
 
 /* Selector for (4|4A|5) structs */
-- 
2.21.0



Re: [RESEND PATCH] spmi: prefix spmi bus device names with "spmi"

2020-10-01 Thread David Collins
On 10/1/20 11:51 AM, Stephen Boyd wrote:
> Quoting Mark Brown (2020-10-01 10:43:26)
>> On Wed, Sep 30, 2020 at 05:07:20PM -0700, Stephen Boyd wrote:
>>> Quoting David Collins (2020-09-22 15:04:18)
>>
 This helps to disambiguate SPMI device regmaps from I2C ones
 at /sys/kernel/debug/regmap since I2C devices use a very
 similar naming scheme: 0-.
>>
>>> Can regmap debugfs prepend the bus name on the node made in debugfs?
>>> Does it do that already?
>>
>> It doesn't do that.  I have to say that given the use of dev_name() in
>> logging it does feel like it'd be useful to have distinct names for
>> grepping if we're running into collisions, IIRC the reason I went with
>> dev_name() was that it's a commonly used human readable handle for
>> diagnostic infrastrucuture so it makes it easier to follow things around.
> 
> To me the dev_name() usage seems fine. Maybe David has some real reason
> to change this though?
> 
> In general I don't think userspace cares what the SPMI device name is,
> i.e. the device name isn't used for dev nodes because SPMI doesn't
> support ioctls or read/write APIs on the bus. That could be a nice
> feature addition though, to support something like i2c-dev.
> 
> Changing it so that regmap debugfs is less likely to collide looks
> weird. It doesn't actually collide anyway, so it seems like we're adding
> spmi prefix to make it easier to find in debugfs?

Yes, that is correct.  There isn't a collision since I2C uses 0- and
SPMI uses 0-00 naming scheme.  However, those names are very similar and
it is hard for a user to tell which is which inside
/sys/kernel/debug/regmap without a deep understanding of the I2C and SPMI
code.

The SPMI regmap debugfs files are used extensively for testing and debug
purposes internally at Qualcomm and by our customers.  It would be helpful
if the more verbose naming scheme were accepted upstream to avoid
confusion and broken test scripts.

Thanks,
David

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: [PATCH v3 devicetree 0/2] Add Seville Ethernet switch to T1040RDB

2020-10-01 Thread Vladimir Oltean
On Thu, Oct 01, 2020 at 01:10:05PM -0700, David Miller wrote:
> From: Vladimir Oltean 
> Date: Thu,  1 Oct 2020 16:20:11 +0300
> 
> > Seville is a DSA switch that is embedded inside the T1040 SoC, and
> > supported by the mscc_seville DSA driver inside drivers/net/dsa/ocelot.
> > 
> > This series adds this switch to the SoC's dtsi files and to the T1040RDB
> > board file.
> 
> I am assuming the devicetree folks will pick this series up.
> 
> Thanks.
> 

I can also resend via net-next if that's easier (the last commit on
arch/powerpc/boot/dts/fsl/t104*, as per today's linux-next, has been in
2018, so there is no conflict).

I need to resend anyway, due to an epic failure where I got the port
numbering wrong...

[PATCH v3 06/13] x86/platform/uv: Add and Decode Arch Type in UVsystab

2020-10-01 Thread Mike Travis
A patch to add and process the UV Arch Type field in the UVsystab passed
from UV BIOS to the kernel.  This allows the system to be recognized
without relying on the OEM_ID which OEMs want to change.

Signed-off-by: Mike Travis 
Reviewed-by: Dimitri Sivanich 
Reviewed-by: Steve Wahl 
---
 arch/x86/include/asm/uv/bios.h |  16 +++-
 arch/x86/kernel/apic/x2apic_uv_x.c | 135 +
 arch/x86/platform/uv/bios_uv.c |  27 --
 3 files changed, 148 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 70050d0136c3..97ac595ebc6a 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -5,8 +5,8 @@
 /*
  * UV BIOS layer definitions.
  *
- *  Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
- *  Copyright (c) Russ Anderson 
+ * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) Russ Anderson 
  */
 
 #include 
@@ -71,6 +71,11 @@ struct uv_gam_range_entry {
u32 limit;  /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
 };
 
+#defineUV_AT_SIZE  8   /* 7 character arch type + NULL char */
+struct uv_arch_type_entry {
+   chararchtype[UV_AT_SIZE];
+};
+
 #defineUV_SYSTAB_SIG   "UVST"
 #defineUV_SYSTAB_VERSION_1 1   /* UV2/3 BIOS version */
 #defineUV_SYSTAB_VERSION_UV4   0x400   /* UV4 BIOS base 
version */
@@ -79,10 +84,14 @@ struct uv_gam_range_entry {
 #defineUV_SYSTAB_VERSION_UV4_3 0x403   /* - GAM Range PXM 
Value */
 #defineUV_SYSTAB_VERSION_UV4_LATESTUV_SYSTAB_VERSION_UV4_3
 
+#defineUV_SYSTAB_VERSION_UV5   0x500   /* UV5 GAM base version 
*/
+#defineUV_SYSTAB_VERSION_UV5_LATESTUV_SYSTAB_VERSION_UV5
+
 #defineUV_SYSTAB_TYPE_UNUSED   0   /* End of table (offset 
== 0) */
 #defineUV_SYSTAB_TYPE_GAM_PARAMS   1   /* GAM PARAM 
conversions */
 #defineUV_SYSTAB_TYPE_GAM_RNG_TBL  2   /* GAM entry table */
-#defineUV_SYSTAB_TYPE_MAX  3
+#defineUV_SYSTAB_TYPE_ARCH_TYPE3   /* UV arch type */
+#defineUV_SYSTAB_TYPE_MAX  4
 
 /*
  * The UV system table describes specific firmware
@@ -133,6 +142,7 @@ extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 
*);
 extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
 
 extern int uv_bios_init(void);
+extern unsigned long get_uv_systab_phys(bool msg);
 
 extern unsigned long sn_rtc_cycles_per_second;
 extern int uv_type;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c 
b/arch/x86/kernel/apic/x2apic_uv_x.c
index 4e055a01cc63..353825a0b327 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -31,7 +31,8 @@ static u64gru_start_paddr, gru_end_paddr;
 static union uvh_apiciduvh_apicid;
 static int uv_node_id;
 
-/* Unpack OEM/TABLE ID's to be NULL terminated strings */
+/* Unpack AT/OEM/TABLE ID's to be NULL terminated strings */
+static u8 uv_archtype[UV_AT_SIZE];
 static u8 oem_id[ACPI_OEM_ID_SIZE + 1];
 static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
 
@@ -286,20 +287,104 @@ static void __init uv_stringify(int len, char *to, char 
*from)
strncpy(to, from, len-1);
 }
 
+/* Find UV arch type entry in UVsystab */
+static unsigned long __init early_find_archtype(struct uv_systab *st)
+{
+   int i;
+
+   for (i = 0; st->entry[i].type != UV_SYSTAB_TYPE_UNUSED; i++) {
+   unsigned long ptr = st->entry[i].offset;
+
+   if (!ptr)
+   continue;
+   ptr += (unsigned long)st;
+   if (st->entry[i].type == UV_SYSTAB_TYPE_ARCH_TYPE)
+   return ptr;
+   }
+   return 0;
+}
+
+/* Validate UV arch type field in UVsystab */
+static int __init decode_arch_type(unsigned long ptr)
+{
+   struct uv_arch_type_entry *uv_ate = (struct uv_arch_type_entry *)ptr;
+   int n = strlen(uv_ate->archtype);
+
+   if (n > 0 && n < sizeof(uv_ate->archtype)) {
+   pr_info("UV: UVarchtype received from BIOS\n");
+   uv_stringify(UV_AT_SIZE, uv_archtype, uv_ate->archtype);
+   return 1;
+   }
+   return 0;
+}
+
+/* Determine if UV arch type entry might exist in UVsystab */
+static int __init early_get_arch_type(void)
+{
+   unsigned long uvst_physaddr, uvst_size, ptr;
+   struct uv_systab *st;
+   u32 rev;
+   int ret;
+
+   uvst_physaddr = get_uv_systab_phys(0);
+   if (!uvst_physaddr)
+   return 0;
+
+   st = early_memremap_ro(uvst_physaddr, sizeof(struct uv_systab));
+   if (!st) {
+   pr_err("UV: Cannot access UVsystab, remap failed\n");
+   return 0;
+   }
+
+   rev = st->revision;
+   if (rev < UV_SYSTAB_VERSION_UV5) 

Re: [PATCH] mm: memcg/slab: fix slab statistics in !SMP configuration

2020-10-01 Thread Roman Gushchin
On Fri, Oct 02, 2020 at 08:08:40AM +0800, kbuild test robot wrote:
> Hi Roman,
> 
> Thank you for the patch! Yet something to improve:
> 
> [auto build test ERROR on mmotm/master]

It's a bogus error, the patch was applied onto mmotm/master, which doesn't
contain necessary slab controller patches.

Thanks!

> 
> url:
> https://github.com/0day-ci/linux/commits/Roman-Gushchin/mm-memcg-slab-fix-slab-statistics-in-SMP-configuration/20201002-044114
> base:   git://git.cmpxchg.org/linux-mmotm.git master
> config: i386-randconfig-s002-20200930 (attached as .config)
> compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
> reproduce:
> # apt-get install sparse
> # sparse version: v0.6.2-201-g24bdaac6-dirty
> # 
> https://github.com/0day-ci/linux/commit/3e4248734433fea1624e4971258042af2f231e02
> git remote add linux-review https://github.com/0day-ci/linux
> git fetch --no-tags linux-review 
> Roman-Gushchin/mm-memcg-slab-fix-slab-statistics-in-SMP-configuration/20201002-044114
> git checkout 3e4248734433fea1624e4971258042af2f231e02
> # save the attached .config to linux build tree
> make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386 
> 
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot 
> 
> All errors (new ones prefixed by >>):
> 
>In file included from include/linux/mm.h:1317,
> from include/linux/memcontrol.h:20,
> from include/linux/swap.h:9,
> from include/linux/suspend.h:5,
> from arch/x86/kernel/asm-offsets.c:13:
>include/linux/vmstat.h: In function '__mod_node_page_state':
> >> include/linux/vmstat.h:295:6: error: implicit declaration of function 
> >> 'vmstat_item_in_bytes' [-Werror=implicit-function-declaration]
>  295 |  if (vmstat_item_in_bytes(item)) {
>  |  ^~~~
>cc1: some warnings being treated as errors
>make[2]: *** [scripts/Makefile.build:99: arch/x86/kernel/asm-offsets.s] 
> Error 1
>make[2]: Target '__build' not remade because of errors.
>make[1]: *** [Makefile:1139: prepare0] Error 2
>make[1]: Target 'prepare' not remade because of errors.
>make: *** [Makefile:179: sub-make] Error 2
>make: Target 'prepare' not remade because of errors.
> 
> vim +/vmstat_item_in_bytes +295 include/linux/vmstat.h
> 
>291
>292static inline void __mod_node_page_state(struct pglist_data 
> *pgdat,
>293enum node_stat_item item, int delta)
>294{
>  > 295if (vmstat_item_in_bytes(item)) {
>296VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
>297delta >>= PAGE_SHIFT;
>298}
>299
>300node_page_state_add(delta, pgdat, item);
>301}
>302
> 
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.01.org_hyperkitty_list_kbuild-2Dall-40lists.01.org=DwIBAg=5VD0RTtNlTh3ycd41b3MUw=jJYgtDM7QT-W-Fz_d29HYQ=a91mqnAqjcA0iLpnhkpBXCmVqE_BuOIny-YmkB8jp2U=4EiiISKLS8YORtIb2rqP7oxVeqhBmKzMHqJzWhDAuMw=
>  




Gute Nachrichten !

2020-10-01 Thread Mr. Marvin
Die letzten Monate waren nicht gut für Unternehmen, Institutionen und 
Einzelpersonen auf der ganzen Welt. Die globale Epidemie (Covid-19) hat alle 
finanziell erschöpft und Sie wurden nicht freigestellt. Ihr Hilferuf wurde 
gehört und wir sind bereit, Ihnen unsere freundliche Geste anzubieten. Sie 
gehören zu den 3. Chargen, die von unserem Fondsprogramm profitieren, und ich 
muss aufrichtig sagen, dass die 1. und 2. Charge für die von uns geleistete 
Hilfe dankbar ist. Sie haben erfolgreich die kumulierte Gesamtsumme von 
(50.000,00 USD) als Gemeinschaftsspende von Oxfam Aid erhalten. Antworten Sie 
zurück, um weitere Informationen und Anweisungen zur Beantragung Ihres 
Zuschusses zu erhalten.

Denken Sie daran, immer in Sicherheit zu bleiben und soziale Distanzierung zu 
üben. Befolgen Sie immer die Anweisungen der örtlichen Behörden.

Herr Marvin Sims,

Supervisor (Oxfam Finanzabteilung)
Oxfam International Inc


Re: [PATCH v8 6/8] KVM: x86: VMX: Prevent MSR passthrough when MSR access is denied

2020-10-01 Thread Peter Xu
Hi,

I reported in the v13 cover letter of kvm dirty ring series that this patch
seems to have been broken.  Today I tried to reproduce with a simplest vm, and
after a closer look...

On Fri, Sep 25, 2020 at 04:34:20PM +0200, Alexander Graf wrote:
> @@ -3764,15 +3859,14 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
>   return mode;
>  }
>  
> -static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
> -  unsigned long *msr_bitmap, u8 mode)
> +static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode)
>  {
>   int msr;
>  
> - for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
> - unsigned word = msr / BITS_PER_LONG;
> - msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : 
> ~0;
> - msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
> + for (msr = 0x800; msr <= 0x8ff; msr++) {
> + bool intercepted = !!(mode & MSR_BITMAP_MODE_X2APIC_APICV);
> +
> + vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_RW, intercepted);
>   }
>  
>   if (mode & MSR_BITMAP_MODE_X2APIC) {

... I think we may want below change to be squashed:

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d160aad59697..7d3f2815b04d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3781,9 +3781,10 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu 
*vcpu, u8 mode)
int msr;
 
for (msr = 0x800; msr <= 0x8ff; msr++) {
-   bool intercepted = !!(mode & MSR_BITMAP_MODE_X2APIC_APICV);
+   bool apicv = mode & MSR_BITMAP_MODE_X2APIC_APICV;
 
-   vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_RW, intercepted);
+   vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_R, !apicv);
+   vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_W, true);
}
 
if (mode & MSR_BITMAP_MODE_X2APIC) {

This fixes my problem the same as having this patch reverted.

-- 
Peter Xu



Re: [PATCH v3 2/3] iommu/tegra-smmu: Rework .probe_device and .attach_dev

2020-10-01 Thread Nicolin Chen
On Thu, Oct 01, 2020 at 11:33:38PM +0300, Dmitry Osipenko wrote:
> >>> If we can't come to an agreement on globalizing mc pointer, would
> >>> it be possible to pass tegra_mc_driver through tegra_smmu_probe()
> >>> so we can continue to use driver_find_device_by_fwnode() as v1?
> >>>
> >>> v1: https://lkml.org/lkml/2020/9/26/68
> >>
> >> tegra_smmu_probe() already takes a struct tegra_mc *. Did you mean
> >> tegra_smmu_probe_device()? I don't think we can do that because it isn't
> > 
> > I was saying to have a global parent_driver pointer: similar to
> > my v1, yet rather than "extern" the tegra_mc_driver, we pass it
> > through egra_smmu_probe() and store it in a static global value
> > so as to call tegra_smmu_get_by_fwnode() in ->probe_device().
> > 
> > Though I agree that creating a global device pointer (mc) might
> > be controversial, yet having a global parent_driver pointer may
> > not be against the rule, considering that it is common in iommu
> > drivers to call driver_find_device_by_fwnode in probe_device().
> 
> You don't need the global pointer if you have SMMU OF node.
> 
> You could also get driver pointer from mc->dev->driver.
> 
> But I don't think you need to do this at all. The probe_device() could
> be invoked only for the tegra_smmu_ops and then seems you could use
> dev_iommu_priv_set() in tegra_smmu_of_xlate(), like sun50i-iommu driver
> does.

Getting iommu device pointer using driver_find_device_by_fwnode()
is a common practice in ->probe_device() of other iommu drivers.
But this requires a device_driver pointer that tegra-smmu doesn't
have. So passing tegra_mc_driver through tegra_smmu_probe() will
address it.


[PATCH] tracepoint: Fix out of sync data passing by static caller

2020-10-01 Thread Steven Rostedt
From: Steven Rostedt (VMware) 

Naresh reported a bug discovered in linux-next that I can reliably
trigger myself. It appears to be a side effect of the static calls. It
happens when going from more than one tracepoint callback to a single
one, and removing the first callback on the list. The list of
tracepoint callbacks holds data and a function to call with the
parameters of that tracepoint and a handler to the associated data.

 old_list:
0: func = foo; data = NULL;
1: func = bar; data = _struct;

 new_list:
0: func = bar; data = _struct;


CPU 0   CPU 1
-   -
   tp_funcs = old_list;
   tp_static_caller = tp_interator

   __DO_TRACE()
 
data = tp_funcs[0].data = NULL;

   tp_funcs = new_list;
   tracepoint_update_call()
  tp_static_caller = tp_funcs[0] = bar;
tp_static_caller(data)
   bar(data)
 x = data->item = NULL->item

   BOOM!

Funny, I was able to reliably trigger this bug, and always on the
sched_switch tracepoint. Which does make sense, because the
sched_switch tracepoint is a utility tracepoint that is attached to
collect information about tasks when tracing is enabled (like mapping
pids to comms). And most of these utility helpers do not have a data
item attached. But the trace events that attach to tracepoints do have
a data item that is used to find state and know what tracing buffer to
write to.

The race window is probably extended by any synchronization the text
poke may do, which would cause the sched switch to be triggered at
vulnerable times.

I've seen this:

 Testing event sched_migrate_task: OK
 Testing event sched_switch:
 BUG: kernel NULL pointer dereference, address: 0048
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x) - not-present page
 PGD 0 P4D 0
 Oops:  [#1] PREEMPT SMP PTI
 CPU: 4 PID: 158 Comm: kworker/4:2 Not tainted 5.9.0-rc7-test-next-20201001+ #12
 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 
07/14/2016
 Workqueue:  0x0 (events)
 RIP: 0010:trace_event_raw_event_sched_switch+0x1d/0x160
 Code: 75 c3 e9 4e ff ff ff e8 01 bd 9f 00 90 55 48 89 e5 41 57 49 89 cf 41 56 
49 89 d6 41 55 41 89 f5 41 54 49 89 fc 53 48 83 ec 40 <48> 8b 5f 48 65 48 8b 04 
25 28 00 00 00 48
1 c0 f6 c7
 RSP: 0018:a93680487dc8 EFLAGS: 00010082
 RAX: 98a053129bb0 RBX: 98a05ab2df98 RCX: 98a059a7
 RDX: 98a052ce5180 RSI:  RDI: 
 RBP: a93680487e30 R08:  R09: 0001
 R10: 98a052ce5180 R11:  R12: 
 R13:  R14: 98a052ce5180 R15: 98a059a7
 FS:  () GS:98a05ab0() knlGS:
 CS:  0010 DS:  ES:  CR0: 80050033
 CR2: 0048 CR3: 3b612002 CR4: 001706e0
 Call Trace:
  ? trace_event_raw_event_sched_move_numa+0x100/0x100
  __schedule+0x5dd/0xa40
  schedule+0x45/0xe0
  worker_thread+0xc6/0x3a0
  ? process_one_work+0x570/0x570
  kthread+0x128/0x170
  ? kthread_park+0x90/0x90
  ret_from_fork+0x22/0x30

And that's called directly by the static call to the sched_switch trace
event callback (not the iterator), and it triggers with the data
pointer passed as NULL.

To solve this, add a tracepoint_synchronize_unregister() between
changing tp_funcs and updating the static tracepoint, that does both a
synchronize_rcu() and synchronize_srcu(). This will ensure that when
the static call is updated to the single callback that it will be
receiving the data that it registered with.

Note, to avoid over calling the synchronization functions, it is only
needed when going from the iterator back to a single caller, and if
that single caller wasn't the first one on the list before the update.

Link: 
https://lore.kernel.org/linux-next/CA+G9fYvPXVRO0NV7yL=FxCmFEMYkCwdz7R=9w+_votpt824...@mail.gmail.com

Reported-by: Naresh Kamboju 
Fixes: d25e37d89dd2f ("tracepoint: Optimize using static_call()")
Signed-off-by: Steven Rostedt (VMware) 
---
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 1b4be44d1d2b..b0baec351bd7 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -221,7 +221,7 @@ static void *func_remove(struct tracepoint_func **funcs,
return old;
 }
 
-static void tracepoint_update_call(struct tracepoint *tp, struct 
tracepoint_func *tp_funcs)
+static void tracepoint_update_call(struct tracepoint *tp, struct 
tracepoint_func *tp_funcs, bool sync)
 {
void *func = tp->iterator;
 
@@ -229,8 +229,17 @@ static void tracepoint_update_call(struct tracepoint *tp, 
struct tracepoint_func
if (!tp->static_call_key)
return;
 
-   if (!tp_funcs[1].func)
+   if (!tp_funcs[1].func) {
   

Re: [PATCH v4 1/2] dt-bindings: usb: Add binding for discrete onboard USB hubs

2020-10-01 Thread Alan Stern
On Thu, Oct 01, 2020 at 02:54:12PM -0700, Matthias Kaehlcke wrote:
> Hi,
> 
> thanks for providing more insights on the USB hardware!

Sure.

> On Wed, Sep 30, 2020 at 09:24:13PM -0400, Alan Stern wrote:
> > A hub that attaches only to the USB-3 data wires in a cable is not USB
> > compliant.  A USB-2 device plugged into such a hub would not work.
> > 
> > But ports can be wired up in weird ways.  For example, it is possible
> > to have the USB-3 wires from a port going directly to the host
> > controller, while the USB-2 wires from the same port go through a
> > USB-2 hub which is then connected to a separate host controller.  (In
> > fact, my office computer has just such an arrangement.)
> 
> It's not clear to me how this case would be addressed when (some of) the
> handling is done in xhci-plat.c We have two host controllers now, which one
> is supposed to be in charge? I guess the idea is to specify the hub only
> for one of the controllers?

I don't grasp the point of this question.  It doesn't seem to be
relevant to the case you're concerned about -- your board isn't going to
wire up the special hub in this weird way, is it?

> > > Yes, I've been saying for some time we need a pre-probe. Or we need a
> > > forced probe where the subsystem walks the DT nodes for the bus and
> > > probes the devices in DT (if they're in DT, we know they are present).
> > > This was the discussion only a few weeks ago for MDIO (which I think
> > > concluded with they already do the latter).
> > 
> > This is why I suggested putting the new code into the xhci-platform
> > driver.  That is the right place for doing these "pre-probes" of DT
> > nodes for hubs attached to the host controller.
> 
> Reminder that the driver is not exclusively about powering the hub, but
> also about powering it off conditionally during system suspend, depending
> on what devices are connected to either of the busses. Should this also
> be done in the xhci-platform driver?

It certainly could be.  The platform-specific xhci suspend and resume
routines could power the hub on and off as needed, along with powering
the host controller.

> Since we are talking about "pre-probes" I imagine the idea is to have a
> USB device driver that implements the power on/off sequence (in pre_probe()
> and handles the suspend/resume case. I already went through a variant of
> this with an earlier version of the onboard_hub_driver, where suspend/resume
> case was handled by the USB hub device. One of the problems with this was
> that power must only be turned off after both USB hub devices have been
> suspended. Some instance needs to be aware that there are two USB devices
> and make the decision whether to cut the power during system suspend
> or not, which is one of the reasons I ended up with the platform
> driver. It's not clear to me how this would be addressed by using
> "pre-probes". Potentially some of the handling could be done by
> xhci-platform, but would that be really better than a dedicated driver?

_All_ of the handling could be done by xhci-plat.  Since the xHCI
controller is the parent of both the USB-2 and USB-3 incarnations of
the special hub, it won't get suspended until they are both in
suspend, and it will get resumed before either of them.  Similarly,
the power to the special hub could be switched on as part of the host
controller's probe routine and switched off during the host
controller's remove routine.

Using xhci-plat in this way would be better than a dedicated driver in
the sense that it wouldn't then be necessary to make up a fictitious
platform device and somehow describe it in DT.

The disadvantage is that we would end up with a driver that's
nominally meant to handle host controllers but now also manages (at
least in part) hubs.  A not-so-clean separation of functions.  But
that's not terribly different from the way your current patch works,
right?

Alan Stern


[PATCH 0/2] Broad write-locking of nascent mm in execve

2020-10-01 Thread Jann Horn
These two patches replace "mmap locking API: don't check locking
if the mm isn't live yet"[1], which is currently in the mmotm tree,
and should be placed in the same spot where the old patch was.

While I originally said that this would be an alternative
patch (meaning that the existing patch would have worked just
as well), the new patches actually address an additional issue
that the old patch missed (bprm->vma is used after the switch
to the new mm).

I have boot-tested these patches on x64-64 (with lockdep) and
!MMU arm (the latter with both FLAT and ELF).

[1] 
https://lkml.kernel.org/r/cag48ez03yjg9ju_6tgimcavjutyre_o4leq7901b5zocnna...@mail.gmail.com

Jann Horn (2):
  mmap locking API: Order lock of nascent mm outside lock of live mm
  exec: Broadly lock nascent mm until setup_arg_pages()

 arch/um/include/asm/mmu_context.h |  3 +-
 fs/exec.c | 64 ---
 include/linux/binfmts.h   |  2 +-
 include/linux/mmap_lock.h | 23 ++-
 kernel/fork.c |  7 +---
 5 files changed, 59 insertions(+), 40 deletions(-)


base-commit: fb0155a09b0224a7147cb07a4ce6034c8d29667f
prerequisite-patch-id: 08f97130a51898a5f6efddeeb5b42638577398c7
prerequisite-patch-id: 577664d761cd23fe9031ffdb1d3c9ac313572c67
prerequisite-patch-id: dc29a39716aa8689f80ba2767803d9df3709beaa
prerequisite-patch-id: 42b1b546d33391ead2753621f541bcc408af1769
prerequisite-patch-id: 2cbb839f57006f32e21f4229e099ae1bd782be24
prerequisite-patch-id: 1b4daf01cf61654a5ec54b5c3f7c7508be7244ee
prerequisite-patch-id: f46cc8c99f1909fe2a65fbc3cf1f6bc57489a086
prerequisite-patch-id: 2b0caed97223241d5008898dde995d02fda544e4
prerequisite-patch-id: 6b7adcb54989e1ec3370f256ff2c35d19cf785aa
-- 
2.28.0.806.g8561365e88-goog


VM_HUGEPAGE support for XFS

2020-10-01 Thread Matthew Wilcox


Today I decided to implement VM_HUGEPAGE support for XFS.  It turned out
to be a rather simpler implementation than I was expecting because I
could reuse the readahead implementation.

Feel free to try it for yourself:
http://git.infradead.org/users/willy/pagecache.git

The patches up to "fs: Do not update nr_thps for mappings which support... "
are in linux-next for 5.10.  I hope to get the rest into 5.11.


[PATCH 1/2] mmap locking API: Order lock of nascent mm outside lock of live mm

2020-10-01 Thread Jann Horn
Until now, the mmap lock of the nascent mm was ordered inside the mmap lock
of the old mm (in dup_mmap() and in UML's activate_mm()).
A following patch will change the exec path to very broadly lock the
nascent mm, but fine-grained locking should still work at the same time for
the new mm.
To do this in a way that lockdep is happy about, let's turn around the lock
ordering in both places that currently nest the locks.
Since SINGLE_DEPTH_NESTING is normally used for the inner nesting layer,
make up our own lock subclass MMAP_LOCK_SUBCLASS_NASCENT and use that
instead.

The added locking calls in exec_mmap() are temporary; the following patch
will move the locking out of exec_mmap().

Signed-off-by: Jann Horn 
---
 arch/um/include/asm/mmu_context.h |  3 +--
 fs/exec.c |  4 
 include/linux/mmap_lock.h | 23 +--
 kernel/fork.c |  7 ++-
 4 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/arch/um/include/asm/mmu_context.h
b/arch/um/include/asm/mmu_context.h
index 17ddd4edf875..c13bc5150607 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -48,9 +48,8 @@ static inline void activate_mm(struct mm_struct
*old, struct mm_struct *new)
 * when the new ->mm is used for the first time.
 */
__switch_mm(>context.id);
-   mmap_write_lock_nested(new, SINGLE_DEPTH_NESTING);
+   mmap_assert_write_locked(new);
uml_setup_stubs(new);
-   mmap_write_unlock(new);
 }

 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
diff --git a/fs/exec.c b/fs/exec.c
index a91003e28eaa..229dbc7aa61a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1114,6 +1114,8 @@ static int exec_mmap(struct mm_struct *mm)
if (ret)
return ret;

+   mmap_write_lock_nascent(mm);
+
if (old_mm) {
/*
 * Make sure that if there is a core dump in progress
@@ -1125,6 +1127,7 @@ static int exec_mmap(struct mm_struct *mm)
if (unlikely(old_mm->core_state)) {
mmap_read_unlock(old_mm);
mutex_unlock(>signal->exec_update_mutex);
+   mmap_write_unlock(mm);
return -EINTR;
}
}
@@ -1138,6 +1141,7 @@ static int exec_mmap(struct mm_struct *mm)
tsk->mm->vmacache_seqnum = 0;
vmacache_flush(tsk);
task_unlock(tsk);
+   mmap_write_unlock(mm);
if (old_mm) {
mmap_read_unlock(old_mm);
BUG_ON(active_mm != old_mm);
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 0707671851a8..24de1fe99ee4 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -3,6 +3,18 @@

 #include 

+/*
+ * Lock subclasses for the mmap_lock.
+ *
+ * MMAP_LOCK_SUBCLASS_NASCENT is for core kernel code that wants to lock an mm
+ * that is still being constructed and wants to be able to access the active mm
+ * normally at the same time. It nests outside MMAP_LOCK_SUBCLASS_NORMAL.
+ */
+enum {
+   MMAP_LOCK_SUBCLASS_NORMAL = 0,
+   MMAP_LOCK_SUBCLASS_NASCENT
+};
+
 #define MMAP_LOCK_INITIALIZER(name) \
.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),

@@ -16,9 +28,16 @@ static inline void mmap_write_lock(struct mm_struct *mm)
down_write(>mmap_lock);
 }

-static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
+/*
+ * Lock an mm_struct that is still being set up (during fork or exec).
+ * This nests outside the mmap locks of live mm_struct instances.
+ * No interruptible/killable versions exist because at the points where you're
+ * supposed to use this helper, the mm isn't visible to anything else, so we
+ * expect the mmap_lock to be uncontended.
+ */
+static inline void mmap_write_lock_nascent(struct mm_struct *mm)
 {
-   down_write_nested(>mmap_lock, subclass);
+   down_write_nested(>mmap_lock, MMAP_LOCK_SUBCLASS_NASCENT);
 }

 static inline int mmap_write_lock_killable(struct mm_struct *mm)
diff --git a/kernel/fork.c b/kernel/fork.c
index da8d360fb032..db67eb4ac7bd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -474,6 +474,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
unsigned long charge;
LIST_HEAD(uf);

+   mmap_write_lock_nascent(mm);
uprobe_start_dup_mmap();
if (mmap_write_lock_killable(oldmm)) {
retval = -EINTR;
@@ -481,10 +482,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
}
flush_cache_dup_mm(oldmm);
uprobe_dup_mmap(oldmm, mm);
-   /*
-* Not linked in yet - no deadlock potential:
-*/
-   mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING);

/* No ordering required: file already has been exposed. */
RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
@@ -600,12 +597,12 @@ static __latent_entropy int 

[PATCH 2/2] exec: Broadly lock nascent mm until setup_arg_pages()

2020-10-01 Thread Jann Horn
While AFAIK there currently is nothing that can modify the VMA tree of a
new mm until userspace has started running under the mm, we should properly
lock the mm here anyway, both to keep lockdep happy when adding locking
assertions and to be safe in the future in case someone e.g. decides to
permit VMA-tree-mutating operations in process_madvise_behavior_valid().

The goal of this patch is to broadly lock the nascent mm in the exec path,
from around the time it is created all the way to the end of
setup_arg_pages() (because setup_arg_pages() accesses bprm->vma).
As long as the mm is write-locked, keep it around in bprm->mm, even after
it has been installed on the task (with an extra reference on the mm, to
reduce complexity in free_bprm()).
After setup_arg_pages(), we have to unlock the mm so that APIs such as
copy_to_user() will work in the following binfmt-specific setup code.

Suggested-by: Jason Gunthorpe 
Suggested-by: Michel Lespinasse 
Signed-off-by: Jann Horn 
---
 fs/exec.c   | 68 -
 include/linux/binfmts.h |  2 +-
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 229dbc7aa61a..fe11d77e397a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -254,11 +254,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
return -ENOMEM;
vma_set_anonymous(vma);

-   if (mmap_write_lock_killable(mm)) {
-   err = -EINTR;
-   goto err_free;
-   }
-
/*
 * Place the stack at the largest stack address the architecture
 * supports. Later, we'll move this to an appropriate place. We don't
@@ -276,12 +271,9 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
goto err;

mm->stack_vm = mm->total_vm = 1;
-   mmap_write_unlock(mm);
bprm->p = vma->vm_end - sizeof(void *);
return 0;
 err:
-   mmap_write_unlock(mm);
-err_free:
bprm->vma = NULL;
vm_area_free(vma);
return err;
@@ -364,9 +356,9 @@ static int bprm_mm_init(struct linux_binprm *bprm)
struct mm_struct *mm = NULL;

bprm->mm = mm = mm_alloc();
-   err = -ENOMEM;
if (!mm)
-   goto err;
+   return -ENOMEM;
+   mmap_write_lock_nascent(mm);

/* Save current stack limit for all calculations made during exec. */
task_lock(current->group_leader);
@@ -374,17 +366,12 @@ static int bprm_mm_init(struct linux_binprm *bprm)
task_unlock(current->group_leader);

err = __bprm_mm_init(bprm);
-   if (err)
-   goto err;
-
-   return 0;
-
-err:
-   if (mm) {
-   bprm->mm = NULL;
-   mmdrop(mm);
-   }
+   if (!err)
+   return 0;

+   bprm->mm = NULL;
+   mmap_write_unlock(mm);
+   mmdrop(mm);
return err;
 }

@@ -735,6 +722,7 @@ static int shift_arg_pages(struct vm_area_struct
*vma, unsigned long shift)
 /*
  * Finalizes the stack vm_area_struct. The flags and permissions are updated,
  * the stack is optionally relocated, and some extra space is added.
+ * At the end of this, the mm_struct will be unlocked on success.
  */
 int setup_arg_pages(struct linux_binprm *bprm,
unsigned long stack_top,
@@ -787,9 +775,6 @@ int setup_arg_pages(struct linux_binprm *bprm,
bprm->loader -= stack_shift;
bprm->exec -= stack_shift;

-   if (mmap_write_lock_killable(mm))
-   return -EINTR;
-
vm_flags = VM_STACK_FLAGS;

/*
@@ -807,7 +792,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
ret = mprotect_fixup(vma, , vma->vm_start, vma->vm_end,
vm_flags);
if (ret)
-   goto out_unlock;
+   return ret;
BUG_ON(prev != vma);

if (unlikely(vm_flags & VM_EXEC)) {
@@ -819,7 +804,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
if (stack_shift) {
ret = shift_arg_pages(vma, stack_shift);
if (ret)
-   goto out_unlock;
+   return ret;
}

/* mprotect_fixup is overkill to remove the temporary stack flags */
@@ -846,11 +831,17 @@ int setup_arg_pages(struct linux_binprm *bprm,
current->mm->start_stack = bprm->p;
ret = expand_stack(vma, stack_base);
if (ret)
-   ret = -EFAULT;
+   return -EFAULT;

-out_unlock:
+   /*
+* From this point on, anything that wants to poke around in the
+* mm_struct must lock it by itself.
+*/
+   bprm->vma = NULL;
mmap_write_unlock(mm);
-   return ret;
+   mmput(mm);
+   bprm->mm = NULL;
+   return 0;
 }
 EXPORT_SYMBOL(setup_arg_pages);

@@ -1114,8 +1105,6 @@ static int exec_mmap(struct mm_struct *mm)
if (ret)
return ret;

-   mmap_write_lock_nascent(mm);
-
if (old_mm) {
/*
  

Re: [PATCH blk-next 0/2] Delete the get_vector_affinity leftovers

2020-10-01 Thread Jens Axboe
On 9/30/20 11:01 PM, Leon Romanovsky wrote:
> On Tue, Sep 29, 2020 at 12:13:56PM +0300, Leon Romanovsky wrote:
>> From: Leon Romanovsky 
>>
>> There are no drivers that implement .get_vector_affinity(), so delete
>> the RDMA function and simplify block code.
>>
>> Thanks
>>
>> P.S. Probably it should go through block tree.
>>
>> Leon Romanovsky (2):
>>   blk-mq-rdma: Delete not-used multi-queue RDMA map queue code
>>   RDMA/core: Delete not-implemented get_vector_affinity
> 
> Jens, Keith
> 
> How can we progress here?

I'd really like for the nvme side to sign off on this first.

-- 
Jens Axboe



[PATCH v2] tracepoint: Fix out of sync data passing by static caller

2020-10-01 Thread Steven Rostedt
From: Steven Rostedt (VMware) 

Naresh reported a bug discovered in linux-next that I can reliably
trigger myself. It appears to be a side effect of the static calls. It
happens when going from more than one tracepoint callback to a single
one, and removing the first callback on the list. The list of
tracepoint callbacks holds data and a function to call with the
parameters of that tracepoint and a handler to the associated data.

 old_list:
0: func = foo; data = NULL;
1: func = bar; data = _struct;

 new_list:
0: func = bar; data = _struct;


CPU 0   CPU 1
-   -
   tp_funcs = old_list;
   tp_static_caller = tp_interator

   __DO_TRACE()
 
data = tp_funcs[0].data = NULL;

   tp_funcs = new_list;
   tracepoint_update_call()
  tp_static_caller = tp_funcs[0] = bar;
tp_static_caller(data)
   bar(data)
 x = data->item = NULL->item

   BOOM!

[ Update: The current code is actually worse, which is probably why it
  is so easy to trigger. It does the tp_funcs = new_list *after* the
  static_caller update! ]

Funny, I was able to reliably trigger this bug, and always on the
sched_switch tracepoint. Which does make sense, because the
sched_switch tracepoint is a utility tracepoint that is attached to
collect information about tasks when tracing is enabled (like mapping
pids to comms). And most of these utility helpers do not have a data
item attached. But the trace events that attach to tracepoints do have
a data item that is used to find state and know what tracing buffer to
write to.

The race window is probably extended by any synchronization the text
poke may do, which would cause the sched switch to be triggered at
vulnerable times.

I've seen this:

 Testing event sched_migrate_task: OK
 Testing event sched_switch:
 BUG: kernel NULL pointer dereference, address: 0048
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x) - not-present page
 PGD 0 P4D 0
 Oops:  [#1] PREEMPT SMP PTI
 CPU: 4 PID: 158 Comm: kworker/4:2 Not tainted 5.9.0-rc7-test-next-20201001+ #12
 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 
07/14/2016
 Workqueue:  0x0 (events)
 RIP: 0010:trace_event_raw_event_sched_switch+0x1d/0x160
 Code: 75 c3 e9 4e ff ff ff e8 01 bd 9f 00 90 55 48 89 e5 41 57 49 89 cf 41 56 
49 89 d6 41 55 41 89 f5 41 54 49 89 fc 53 48 83 ec 40 <48> 8b 5f 48 65 48 8b 04 
25 28 00 00 00 48
1 c0 f6 c7
 RSP: 0018:a93680487dc8 EFLAGS: 00010082
 RAX: 98a053129bb0 RBX: 98a05ab2df98 RCX: 98a059a7
 RDX: 98a052ce5180 RSI:  RDI: 
 RBP: a93680487e30 R08:  R09: 0001
 R10: 98a052ce5180 R11:  R12: 
 R13:  R14: 98a052ce5180 R15: 98a059a7
 FS:  () GS:98a05ab0() knlGS:
 CS:  0010 DS:  ES:  CR0: 80050033
 CR2: 0048 CR3: 3b612002 CR4: 001706e0
 Call Trace:
  ? trace_event_raw_event_sched_move_numa+0x100/0x100
  __schedule+0x5dd/0xa40
  schedule+0x45/0xe0
  worker_thread+0xc6/0x3a0
  ? process_one_work+0x570/0x570
  kthread+0x128/0x170
  ? kthread_park+0x90/0x90
  ret_from_fork+0x22/0x30

And that's called directly by the static call to the sched_switch trace
event callback (not the iterator), and it triggers with the data
pointer passed as NULL.

To solve this, add a tracepoint_synchronize_unregister() between
changing tp_funcs and updating the static tracepoint, that does both a
synchronize_rcu() and synchronize_srcu(). This will ensure that when
the static call is updated to the single callback that it will be
receiving the data that it registered with.

Note, to avoid over calling the synchronization functions, it is only
needed when going from the iterator back to a single caller, and if
that single caller wasn't the first one on the list before the update.

Link: 
https://lore.kernel.org/linux-next/CA+G9fYvPXVRO0NV7yL=FxCmFEMYkCwdz7R=9w+_votpt824...@mail.gmail.com

Reported-by: Naresh Kamboju 
Fixes: d25e37d89dd2f ("tracepoint: Optimize using static_call()")
Signed-off-by: Steven Rostedt (VMware) 
---

Changes since v1:

  - Noticed that tp_funcs wasn't assigned first before the static update.

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 1b4be44d1d2b..3f659f855074 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -221,7 +221,7 @@ static void *func_remove(struct tracepoint_func **funcs,
return old;
 }
 
-static void tracepoint_update_call(struct tracepoint *tp, struct 
tracepoint_func *tp_funcs)
+static void tracepoint_update_call(struct tracepoint *tp, struct 
tracepoint_func *tp_funcs, bool sync)
 {
void *func = tp->iterator;
 
@@ -229

Re: [PATCH v3] mmc: core: don't set limits.discard_granularity as 0

2020-10-01 Thread Coly Li
On 2020/10/2 02:47, Vicente Bergas wrote:
> On Thursday, October 1, 2020 9:18:24 AM CEST, Coly Li wrote:
>> In mmc_queue_setup_discard() the mmc driver queue's discard_granularity
>> might be set as 0 (when card->pref_erase > max_discard) while the mmc
>> device still declares to support discard operation. This is buggy and
>> triggered the following kernel warning message,
>>
>> WARNING: CPU: 0 PID: 135 at __blkdev_issue_discard+0x200/0x294
>> CPU: 0 PID: 135 Comm: f2fs_discard-17 Not tainted 5.9.0-rc6 #1
>> Hardware name: Google Kevin (DT)
>> pstate: 0005 (nzcv daif -PAN -UAO BTYPE=--)
>> pc : __blkdev_issue_discard+0x200/0x294
>> lr : __blkdev_issue_discard+0x54/0x294
>> sp : 800011dd3b10
>> x29: 800011dd3b10 x28:  x27: 800011dd3cc4 x26:
>> 800011dd3e18 x25: 0004e69b x24: 0c40 x23:
>> f1deaaf0 x22: f2849200 x21: 002734d8 x20:
>> 0008 x19:  x18:  x17:
>>  x16:  x15:  x14:
>> 0394 x13:  x12:  x11:
>>  x10: 08b0 x9 : 800011dd3cb0 x8 :
>> 0004e69b x7 :  x6 : f1926400 x5 :
>> f1940800 x4 :  x3 : 0c40 x2 :
>> 0008 x1 : 002734d8 x0 :  Call trace:
>> __blkdev_issue_discard+0x200/0x294
>> __submit_discard_cmd+0x128/0x374
>> __issue_discard_cmd_orderly+0x188/0x244
>> __issue_discard_cmd+0x2e8/0x33c
>> issue_discard_thread+0xe8/0x2f0
>> kthread+0x11c/0x120
>> ret_from_fork+0x10/0x1c
>> ---[ end trace e4c8023d33dfe77a ]---
>>
>> This patch fixes the issue by setting discard_granularity as SECTOR_SIZE
>> instead of 0 when (card->pref_erase > max_discard) is true. Now no more
>> complain from __blkdev_issue_discard() for the improper value of discard
>> granularity.
>>
>> This issue is exposed after commit b35fd7422c2f ("block: check queue's
>> limits.discard_granularity in __blkdev_issue_discard()"), a "Fixes:" tag
>> is also added for the commit to make sure people won't miss this patch
>> after applying the change of __blkdev_issue_discard().
>>
>> Fixes: e056a1b5b67b ("mmc: queue: let host controllers specify maximum
>> discard timeout")
>> Fixes: b35fd7422c2f ("block: check queue's limits.discard_granularity
>> in __blkdev_issue_discard()").
>> Reported-by: Vicente Bergas 
>> Signed-off-by: Coly Li 
>> Acked-by: Adrian Hunter 
>> Cc: Ulf Hansson 
>> ---
>> Changelog,
>> v3, add Fixes tag for both commits.
>> v2, change commit id of the Fixes tag.
>> v1, initial version.
>>
>>  drivers/mmc/core/queue.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
>> index 6c022ef0f84d..350d0cc4ee62 100644
>> --- a/drivers/mmc/core/queue.c
>> +++ b/drivers/mmc/core/queue.c
>> @@ -190,7 +190,7 @@ static void mmc_queue_setup_discard(struct
>> request_queue *q,
>>  q->limits.discard_granularity = card->pref_erase << 9;
>>  /* granularity must not be greater than max. discard */
>>  if (card->pref_erase > max_discard)
>> -    q->limits.discard_granularity = 0;
>> +    q->limits.discard_granularity = SECTOR_SIZE;
>>  if (mmc_can_secure_erase_trim(card))
>>  blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
>>  }
> 
> Tested on rk3399-gru-kevin with f2fs filesystem over the mmc driver, that
> is, the same setup that reproduced the issue.
> The kernel warning message is no longer reported.
> So,
> Tested-by: Vicente Bergas 
> 

Hi Vicente,

Thank you very much!

Coly Li


Re: [PATCH v3 2/3] iommu/tegra-smmu: Rework .probe_device and .attach_dev

2020-10-01 Thread Nicolin Chen
On Thu, Oct 01, 2020 at 12:46:14PM +0200, Thierry Reding wrote:
> > > > -   /*
> > > > -* This is a bit of a hack. Ideally we'd want to simply return 
> > > > this
> > > > -* value. However the IOMMU registration process will attempt 
> > > > to add
> > > > -* all devices to the IOMMU when bus_set_iommu() is called. In 
> > > > order
> > > > -* not to rely on global variables to track the IOMMU instance, 
> > > > we
> > > > -* set it here so that it can be looked up from the 
> > > > .probe_device()
> > > > -* callback via the IOMMU device's .drvdata field.
> > > > -*/
> > > > -   mc->smmu = smmu;
> > > 
> > > I don't think this is going to work. I distinctly remember putting this
> > > here because we needed access to this before ->probe_device() had been
> > > called for any of the devices.
> > 
> > Do you remember which exact part of code needs to access mc->smmu
> > before ->probe_device() is called?
> > 
> > What I understood is that IOMMU core didn't allow ERR_PTR(-ENODEV)
> > return value from ->probe_device(), previously ->add_device(), to
> > carry on when you added this code/driver:
> > commit 8918465163171322c77a19d5258a95f56d89d2e4
> > Author: Thierry Reding 
> > Date:   Wed Apr 16 09:24:44 2014 +0200
> > memory: Add NVIDIA Tegra memory controller support
> > 
> > ..until the core had a change one year later:
> > commit 38667f18900afe172a4fe44279b132b4140f920f
> > Author: Joerg Roedel 
> > Date:   Mon Jun 29 10:16:08 2015 +0200
> > iommu: Ignore -ENODEV errors from add_device call-back
> > 
> > As my commit message of this change states, ->probe_device() will
> > be called in from both bus_set_iommu() and really_probe() of each
> > device through of_iommu_configure() -- the later one initializes
> > an fwspec by polling the iommus property in the IOMMU core, same
> > as what we do here in tegra-smmu. If this works, we can probably
> > drop the hack here and get rid of tegra_smmu_configure().
> 
> Looking at this a bit more, I notice that tegra_smmu_configure() does a
> lot of what's already done during of_iommu_configure(), so it'd indeed
> be nice if we could somehow get rid of that. However, like I said, I do
> recall that for DMA/IOMMU we need this prior to ->probe_device(), so it
> isn't clear to me if we can do that.
> 
> So I think in order to make progress we need to check that dropping this
> does indeed still work when we enable DMA/IOMMU (and the preliminary
> patches to pass 1:1 mappings via reserved-memory regions). If so, I
> think it should be safe to remove this.

I am attaching a patch that works with both IOMMU_DOMAIN_UNMANAGED
and IOMMU_DOMAIN_DMA. Would it be possible for you to give a test?

The implementation of getting mc->smmu is using a parent_driver as
I was asking you in the other reply. Yet, it could let us give it a
try.
>From 01693c8d4af5abb38bb5ede4b22590a647909868 Mon Sep 17 00:00:00 2001
From: Nicolin Chen 
Date: Thu, 1 Oct 2020 17:51:26 -0700
Subject: [PATCH] iommu/tegra-smmu: Test

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 141 -
 drivers/memory/tegra/mc.c  |   5 +-
 include/soc/tegra/mc.h |   4 +-
 3 files changed, 51 insertions(+), 99 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 6a3ecc334481..ade952d3143c 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -61,6 +61,9 @@ struct tegra_smmu_as {
 	u32 attr;
 };
 
+static const struct iommu_ops tegra_smmu_ops;
+static struct device_driver *parent_driver;
+
 static struct tegra_smmu_as *to_smmu_as(struct iommu_domain *dom)
 {
 	return container_of(dom, struct tegra_smmu_as, domain);
@@ -484,60 +487,50 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
 static int tegra_smmu_attach_dev(struct iommu_domain *domain,
  struct device *dev)
 {
+	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
 	struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
 	struct tegra_smmu_as *as = to_smmu_as(domain);
-	struct device_node *np = dev->of_node;
-	struct of_phandle_args args;
 	unsigned int index = 0;
 	int err = 0;
 
-	while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index,
-	   )) {
-		unsigned int swgroup = args.args[0];
-
-		if (args.np != smmu->dev->of_node) {
-			of_node_put(args.np);
-			continue;
-		}
-
-		of_node_put(args.np);
+	if (!fwspec)
+		return -ENOENT;
 
+	for (index = 0; index < fwspec->num_ids; index++) {
 		err = tegra_smmu_as_prepare(smmu, as);
-		if (err < 0)
-			return err;
+		if (err)
+			goto disable;
 
-		tegra_smmu_enable(smmu, swgroup, as->id);
-		index++;
+		tegra_smmu_enable(smmu, fwspec->ids[index], as->id);
 	}
 
 	if (index == 0)
 		return -ENODEV;
 
 	return 0;
+
+disable:
+	while (index--) {
+		tegra_smmu_disable(smmu, fwspec->ids[index], as->id);
+		tegra_smmu_as_unprepare(smmu, as);
+	}
+
+	return err;
 }
 
 static void 

Re: [PATCH net-next] drivers/net/wan/hdlc_fr: Correctly handle special skb->protocol values

2020-10-01 Thread David Miller
From: Xie He 
Date: Mon, 28 Sep 2020 05:56:43 -0700

> The fr_hard_header function is used to prepend the header to skbs before
> transmission. It is used in 3 situations:
> 1) When a control packet is generated internally in this driver;
> 2) When a user sends an skb on an Ethernet-emulating PVC device;
> 3) When a user sends an skb on a normal PVC device.
> 
> These 3 situations need to be handled differently by fr_hard_header.
> Different headers should be prepended to the skb in different situations.
> 
> Currently fr_hard_header distinguishes these 3 situations using
> skb->protocol. For situation 1 and 2, a special skb->protocol value
> will be assigned before calling fr_hard_header, so that it can recognize
> these 2 situations. All skb->protocol values other than these special ones
> are treated by fr_hard_header as situation 3.
> 
> However, it is possible that in situation 3, the user sends an skb with
> one of the special skb->protocol values. In this case, fr_hard_header
> would incorrectly treat it as situation 1 or 2.
> 
> This patch tries to solve this issue by using skb->dev instead of
> skb->protocol to distinguish between these 3 situations. For situation
> 1, skb->dev would be NULL; for situation 2, skb->dev->type would be
> ARPHRD_ETHER; and for situation 3, skb->dev->type would be ARPHRD_DLCI.
> 
> This way fr_hard_header would be able to distinguish these 3 situations
> correctly regardless what skb->protocol value the user tries to use in
> situation 3.
> 
> Cc: Krzysztof Halasa 
> Signed-off-by: Xie He 

Applied, thank you.


[PATCH v4] mmc: core: don't set limits.discard_granularity as 0

2020-10-01 Thread Coly Li
In mmc_queue_setup_discard() the mmc driver queue's discard_granularity
might be set as 0 (when card->pref_erase > max_discard) while the mmc
device still declares to support discard operation. This is buggy and
triggered the following kernel warning message,

WARNING: CPU: 0 PID: 135 at __blkdev_issue_discard+0x200/0x294
CPU: 0 PID: 135 Comm: f2fs_discard-17 Not tainted 5.9.0-rc6 #1
Hardware name: Google Kevin (DT)
pstate: 0005 (nzcv daif -PAN -UAO BTYPE=--)
pc : __blkdev_issue_discard+0x200/0x294
lr : __blkdev_issue_discard+0x54/0x294
sp : 800011dd3b10
x29: 800011dd3b10 x28:  x27: 800011dd3cc4 x26: 
800011dd3e18 x25: 0004e69b x24: 0c40 x23: 
f1deaaf0 x22: f2849200 x21: 002734d8 x20: 
0008 x19:  x18:  x17: 
 x16:  x15:  x14: 
0394 x13:  x12:  x11: 
 x10: 08b0 x9 : 800011dd3cb0 x8 : 
0004e69b x7 :  x6 : f1926400 x5 : 
f1940800 x4 :  x3 : 0c40 x2 : 
0008 x1 : 002734d8 x0 :  Call trace:
__blkdev_issue_discard+0x200/0x294
__submit_discard_cmd+0x128/0x374
__issue_discard_cmd_orderly+0x188/0x244
__issue_discard_cmd+0x2e8/0x33c
issue_discard_thread+0xe8/0x2f0
kthread+0x11c/0x120
ret_from_fork+0x10/0x1c
---[ end trace e4c8023d33dfe77a ]---

This patch fixes the issue by setting discard_granularity as SECTOR_SIZE
instead of 0 when (card->pref_erase > max_discard) is true. Now no more
complain from __blkdev_issue_discard() for the improper value of discard
granularity.

This issue is exposed after commit b35fd7422c2f ("block: check queue's
limits.discard_granularity in __blkdev_issue_discard()"), a "Fixes:" tag
is also added for the commit to make sure people won't miss this patch
after applying the change of __blkdev_issue_discard().

Fixes: e056a1b5b67b ("mmc: queue: let host controllers specify maximum discard 
timeout")
Fixes: b35fd7422c2f ("block: check queue's limits.discard_granularity in 
__blkdev_issue_discard()").
Reported-and-tested-by: Vicente Bergas 
Signed-off-by: Coly Li 
Acked-by: Adrian Hunter 
Cc: Ulf Hansson 
---
Changelog,
v4, update to Reported-and-tested-by tag for Vicente Bergas.
v3, add Fixes tag for both commits.
v2, change commit id of the Fixes tag.
v1, initial version.

 drivers/mmc/core/queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 6c022ef0f84d..350d0cc4ee62 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -190,7 +190,7 @@ static void mmc_queue_setup_discard(struct request_queue *q,
q->limits.discard_granularity = card->pref_erase << 9;
/* granularity must not be greater than max. discard */
if (card->pref_erase > max_discard)
-   q->limits.discard_granularity = 0;
+   q->limits.discard_granularity = SECTOR_SIZE;
if (mmc_can_secure_erase_trim(card))
blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
 }
-- 
2.26.2



Re: linux-next: build failure after merge of the net-next tree

2020-10-01 Thread David Miller
From: Stephen Rothwell 
Date: Tue, 29 Sep 2020 13:04:46 +1000

> Caused by commit
> 
>   eff7423365a6 ("net: core: introduce struct netdev_nested_priv for nested 
> interface infrastructure")
> 
> interacting with commit
> 
>   e1189d9a5fbe ("net: marvell: prestera: Add Switchdev driver implementation")
> 
> also in the net-next tree.

I would argue against that "also" as the first commit is only in the
'net' tree right now. :-)

This is simply something I'll have to resolve the next time net is merged
into net-next.

Thanks.


Re: [PATCH] caif_virtio: Remove redundant initialization of variable err

2020-10-01 Thread David Miller
From: Jing Xiangfeng 
Date: Wed, 30 Sep 2020 09:29:54 +0800

> After commit a8c7687bf216 ("caif_virtio: Check that vringh_config is not
> null"), the variable err is being initialized with '-EINVAL' that is
> meaningless. So remove it.
> 
> Signed-off-by: Jing Xiangfeng 

Applied to net-next.


Re: [PATCH net v1] net: phy: realtek: Modify 2.5G PHY name to RTL8226

2020-10-01 Thread David Miller
From: Willy Liu 
Date: Wed, 30 Sep 2020 14:48:58 +0800

> Realtek single-chip Ethernet PHY solutions can be separated as below:
> 10M/100Mbps: RTL8201X
> 1Gbps: RTL8211X
> 2.5Gbps: RTL8226/RTL8221X
> RTL8226 is the first version for realtek that compatible 2.5Gbps single PHY.
> Since RTL8226 is single port only, realtek changes its name to RTL8221B from
> the second version.
> PHY ID for RTL8226 is 0x001cc800 and RTL8226B/RTL8221B is 0x001cc840.
> 
> RTL8125 is not a single PHY solution, it integrates PHY/MAC/PCIE bus
> controller and embedded memory.
> 
> Signed-off-by: Willy Liu 

Applied to net-next as this is just renaming rather than a functional
change.


Re: [PATCH v3 07/13] ASoC: audio-graph: Update driver as per new exposed members

2020-10-01 Thread Kuninori Morimoto


Hi Sameer

> As per the members exposed earlier in the series, audio graph driver
> is updated to make use of these. Functionally there is no change
> in behavior if these are not populated. So following changes are made
> as part of this.
> 
>  - Update 'dai_link->ops' for DPCM links if a custom 'snd_soc_ops'
>is defined by the vendor driver.
> 
>  - Consider 'force_dpcm' flag status when deciding if a DAI link
>needs to be treated as DPCM or not. In doing so the logic is
>moved to a separate inline function for a better readability.
> 
>  - Populate 'dpcm_selectable' flag which is then used to detect
>DPCM DAI links.
> 
> Signed-off-by: Sameer Pujar 
> Cc: Kuninori Morimoto 
> ---

Can we merge [06/13] and [07/13] patches ?

Thank you for your help !!

Best regards
---
Kuninori Morimoto


Re: [PATCH v3 06/13] ASoC: simple-card-utils: Expose new members for asoc_simple_priv

2020-10-01 Thread Kuninori Morimoto


Hi Sameer

Thank you for the patch

> Add new members in struct 'asoc_simple_priv'. Idea is to leverage
> simple or graph card driver as much as possible and vendor can
> maintain a thin driver to control the behavior by populating these
> newly exposed members.

re-use simple/audio-graph driver is nice idea.
My planning next new audio-graph2 can renuse it.

> diff --git a/include/sound/simple_card_utils.h 
> b/include/sound/simple_card_utils.h
> index 86a1e95..9825308 100644
> --- a/include/sound/simple_card_utils.h
> +++ b/include/sound/simple_card_utils.h
> @@ -56,6 +56,10 @@ struct asoc_simple_priv {
>   struct asoc_simple_dai *dais;
>   struct snd_soc_codec_conf *codec_conf;
>   struct gpio_desc *pa_gpio;
> + const struct snd_soc_ops *ops;
> + unsigned int force_dpcm:1;
> + uintptr_t dpcm_selectable;
> + void *data;
>  };

I have opinions about these.

About dpcm_selectable, indeed current audio-graph is using it as "uintptr_t",
but as you know, it checks whether it was non-NULL or not only.
This means we can use it as bit-field.

BTW, do we need to have dpcm_selectable at priv ?

One note is that, -scu- user is only me (locally),
and it will be removed when audio-graph2 was created.
(My plan is keep code for you, but remove compatible)

About *data, I think we can avoid *data
if driver side priv includes asoc_simple_priv ?

struct my_priv {
struct asoc_simple_priv *simple;
...
};

#define simple_to_priv(_simple) container_of((_simple), struct my_priv, 
simple)


Thank you for your help !!

Best regards
---
Kuninori Morimoto


Re: [PATCH v3 2/3] iommu/tegra-smmu: Rework .probe_device and .attach_dev

2020-10-01 Thread Dmitry Osipenko
02.10.2020 04:07, Nicolin Chen пишет:
> On Thu, Oct 01, 2020 at 11:33:38PM +0300, Dmitry Osipenko wrote:
> If we can't come to an agreement on globalizing mc pointer, would
> it be possible to pass tegra_mc_driver through tegra_smmu_probe()
> so we can continue to use driver_find_device_by_fwnode() as v1?
>
> v1: https://lkml.org/lkml/2020/9/26/68

 tegra_smmu_probe() already takes a struct tegra_mc *. Did you mean
 tegra_smmu_probe_device()? I don't think we can do that because it isn't
>>>
>>> I was saying to have a global parent_driver pointer: similar to
>>> my v1, yet rather than "extern" the tegra_mc_driver, we pass it
>>> through egra_smmu_probe() and store it in a static global value
>>> so as to call tegra_smmu_get_by_fwnode() in ->probe_device().
>>>
>>> Though I agree that creating a global device pointer (mc) might
>>> be controversial, yet having a global parent_driver pointer may
>>> not be against the rule, considering that it is common in iommu
>>> drivers to call driver_find_device_by_fwnode in probe_device().
>>
>> You don't need the global pointer if you have SMMU OF node.
>>
>> You could also get driver pointer from mc->dev->driver.
>>
>> But I don't think you need to do this at all. The probe_device() could
>> be invoked only for the tegra_smmu_ops and then seems you could use
>> dev_iommu_priv_set() in tegra_smmu_of_xlate(), like sun50i-iommu driver
>> does.
> 
> Getting iommu device pointer using driver_find_device_by_fwnode()
> is a common practice in ->probe_device() of other iommu drivers.

Please give me a full list of the IOMMU drivers which use this method.

> But this requires a device_driver pointer that tegra-smmu doesn't
> have. So passing tegra_mc_driver through tegra_smmu_probe() will
> address it.
> 

If you're borrowing code and ideas from other drivers, then at least
please borrow them from a modern good-looking drivers. And I already
pointed out that following cargo cult is not always a good idea.

ARM-SMMU isn't a modern driver and it has legacy code. You shouldn't
copy it blindly. The sun50i-iommu driver was added half year ago, you
may use it as a reference.

Always consult the IOMMU core code. If you're too unsure about
something, then maybe better to start a new thread and ask Joerg about
the best modern practices that IOMMU drivers should use.


Re: linux-next: build failure after merge of the net-next tree

2020-10-01 Thread Stephen Rothwell
Hi Dave,

On Thu, 01 Oct 2020 18:40:13 -0700 (PDT) David Miller  
wrote:
>
> From: Stephen Rothwell 
> Date: Tue, 29 Sep 2020 13:04:46 +1000
> 
> > Caused by commit
> > 
> >   eff7423365a6 ("net: core: introduce struct netdev_nested_priv for nested 
> > interface infrastructure")
> > 
> > interacting with commit
> > 
> >   e1189d9a5fbe ("net: marvell: prestera: Add Switchdev driver 
> > implementation")
> > 
> > also in the net-next tree.  
> 
> I would argue against that "also" as the first commit is only in the
> 'net' tree right now. :-)

Sorry, my mistake.  I was wondering why your testing did not seem to be
affected.

> This is simply something I'll have to resolve the next time net is merged
> into net-next.

Absolutely, no problem.

-- 
Cheers,
Stephen Rothwell


pgphqcGgvxOE6.pgp
Description: OpenPGP digital signature


Re: [PATCH] arm64: dts: zynqmp: Fix pcie ranges description

2020-10-01 Thread Laurent Pinchart
Hi Michal,

Thank you for the patch.

On Tue, Sep 29, 2020 at 01:43:22PM +0200, Michal Simek wrote:
> DT schema is checking tuples which should be properly separated. The patch
> is doing this separation to avoid the following warning:
> ..yaml: axi: pcie@fd0e:ranges: [[33554432, 0, 3758096384, 0,
> 3758096384, 0, 268435456, 1124073472, 6, 0, 6, 0, 2, 0]] is not valid under
> any of the given schemas (Possible causes of the failure):
> ...dt.yaml: axi: pcie@fd0e:ranges: True was expected
> ...dt.yaml: axi: pcie@fd0e:ranges:0: [33554432, 0, 3758096384, 0,
> 3758096384, 0, 268435456, 1124073472, 6, 0, 6, 0, 2, 0] is too long

I would keep the error message unwrapped as it's a bit confusing to
read, even if it exceeds the normal 72 columns limit of commit messaged.

> Signed-off-by: Michal Simek 

Reviewed-by: Laurent Pinchart 

> ---
> 
> I have seen one conversation about it but don't have link which I can point
> to.
> ---
>  arch/arm64/boot/dts/xilinx/zynqmp.dtsi | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi 
> b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
> index 771f60e0346d..98073f3223e5 100644
> --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
> +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
> @@ -542,8 +542,8 @@ pcie: pcie@fd0e {
> <0x0 0xfd48 0x0 0x1000>,
> <0x80 0x 0x0 0x100>;
>   reg-names = "breg", "pcireg", "cfg";
> - ranges = <0x0200 0x 0xe000 0x 
> 0xe000 0x 0x1000  /* non-prefetchable memory */
> -   0x4300 0x0006 0x 0x0006 
> 0x 0x0002 0x>;/* prefetchable memory */
> + ranges = <0x0200 0x 0xe000 0x 
> 0xe000 0x 0x1000>,/* non-prefetchable memory */
> +  <0x4300 0x0006 0x 0x0006 
> 0x 0x0002 0x>;/* prefetchable memory */
>   bus-range = <0x00 0xff>;
>   interrupt-map-mask = <0x0 0x0 0x0 0x7>;
>   interrupt-map = <0x0 0x0 0x0 0x1 _intc 0x1>,

-- 
Regards,

Laurent Pinchart


Re: [PATCH v13 19/26] mm: Re-introduce do_mmap_pgoff()

2020-10-01 Thread Peter Collingbourne
On Fri, Sep 25, 2020 at 7:57 AM Yu-cheng Yu  wrote:
>
> There was no more caller passing vm_flags to do_mmap(), and vm_flags was
> removed from the function's input by:
>
> commit 45e55300f114 ("mm: remove unnecessary wrapper function 
> do_mmap_pgoff()").
>
> There is a new user now.  Shadow stack allocation passes VM_SHSTK to
> do_mmap().  Re-introduce the vm_flags and do_mmap_pgoff().

I would prefer to change the callers to pass the additional 0 argument
instead of bringing the wrapper function back, but if we're going to
bring it back then we should fix the naming (both functions take a
pgoff argument, so the previous name do_mmap_pgoff() was just plain
confusing).

Peter


Re: [RFC PATCH 22/22] x86/fpu/xstate: Introduce boot-parameters for control some state component support

2020-10-01 Thread Randy Dunlap
Hi--

On 10/1/20 1:39 PM, Chang S. Bae wrote:
> diff --git a/Documentation/admin-guide/kernel-parameters.txt 
> b/Documentation/admin-guide/kernel-parameters.txt
> index a1068742a6df..742167c6f789 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -5838,6 +5838,21 @@
>   which allow the hypervisor to 'idle' the guest on lock
>   contention.
>  
> + xstate.enable=  [X86-64]
> + xstate.disable= [X86-64]
> + The kernel is compiled with a default xstate bitmask --
> + enabling it to use the XSAVE hardware to efficiently
> + save and restore thread states on context switch.
> + xstate.enable allows adding to that default mask at
> + boot-time without recompiling the kernel just to support
> + the new thread state. (Note that the kernel will ignore
> + any bits in the mask that do not correspond to features
> + that are actually available in CPUID)  xstate.disable

 in CPUID.)

> + allows clearing bits in the default mask, forcing the
> + kernel to forget that it supports the specified thread
> + state. When a bit set for both, the kernel takes
> + xstate.disable in a priority.

   as a priority.


What do these bitmasks look like?  what do the bits mean?
Where does a user find this info?


thanks.
-- 
~Randy



Re: [Linux-kernel-mentees][PATCH v2] net: usb: rtl8150: prevent set_ethernet_addr from setting uninit address

2020-10-01 Thread David Miller
From: Anant Thazhemadam 
Date: Thu,  1 Oct 2020 13:02:20 +0530

> When get_registers() fails (which happens when usb_control_msg() fails)
> in set_ethernet_addr(), the uninitialized value of node_id gets copied
> as the address.
> 
> Checking for the return values appropriately, and handling the case
> wherein set_ethernet_addr() fails like this, helps in avoiding the
> mac address being incorrectly set in this manner.
> 
> Reported-by: syzbot+abbc768b560c84d92...@syzkaller.appspotmail.com
> Tested-by: syzbot+abbc768b560c84d92...@syzkaller.appspotmail.com
> Signed-off-by: Anant Thazhemadam 
> Acked-by: Petko Manolov 

First, please remove "Linux-kernel-mentees" from the Subject line.

All patch submitters should have their work judged equally, whoever
they are.  So this Subject text gives no extra information, and it
simply makes scanning Subject lines in one's mailer more difficult.

Second, when a MAC address fails to probe a random MAC address should
be selected.  We have helpers for this.  This way an interface still
comes up and is usable, even in the event of a failed MAC address
probe.


[PATCH] drivers:tty:pty: Fix a race causing data loss on close

2020-10-01 Thread minyard
From: Corey Minyard 

If you write to a pty master an immediately close the pty master, the
receiver might get a chunk of data dropped, but then receive some later
data.  That's obviously something rather unexpected for a user.  It
certainly confused my test program.

It turns out that tty_vhangup() gets called from pty_close(), and that
causes the data on the slave side to be flushed, but due to races more
data can be copied into the slave side's buffer after that.  Consider
the following sequence:

thread1thread2thread3
   write data into buffer,
  n_tty buffer is filled
   pty_close()
tty_vhangup()
 tty_ldisc_hangup()
  n_tty_flush_buffer()
   reset_buffer_flags()
n_tty_read()
 up_read(>termios_rwsem);
down_read(>termios_rwsem);
clear n_tty buffer contents
up_read(>termios_rwsem);
 tty_buffer_flush_work()
  schedules work calling
flush_to_ldisc()
  flush_to_ldisc()
   receive_buf()
tty_port_default_receive_buf()
 tty_ldisc_receive_buf()
  tty_ldisc_receive_buf()
   n_tty_receive_buf2()
n_tty_receive_buf_common()
 down_read(>termios_rwsem);
 __receive_buf()
  copies data into n_tty buffer
 up_read(>termios_rwsem);
 down_read(>termios_rwsem);
 copy buffer data to user

This change checks to see if the tty is being hung up before copying
anything in n_tty_receive_buf_common().  It has to be done after the
tty->termios_rwsem semaphore is claimed, for reasons that should be
apparent from the sequence above.

Signed-off-by: Corey Minyard 
---
I sent a program to reproduce this, I extended it to prove it wasn't the
test program that caused the issue, and I've uploaded it to:
  http://sourceforge.net/projects/ser2net/files/tmp/testpty.c
if you want to run it.  It has a lot of comments that explain what is
going on.

This is not a very satisfying fix, though.  It works reliably, but it
doesn't seem right to me.  My inclination was to remove the up and down
semaphore around tty_buffer_flush_work() in n_tty_read(), as it just
schedules some work, no need to unlock for that.  But that resulted
in a deadlock elsewhere, so that up/down on the semaphore is there for
another reason.

The locking in the tty code is really hard to follow.  I believe this is
actually a locking problem, but fixing it looks daunting to me.

-corey

 drivers/tty/n_tty.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 1794d84e7bf6..1c33c26dc229 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1704,6 +1704,9 @@ n_tty_receive_buf_common(struct tty_struct *tty, const 
unsigned char *cp,
 
down_read(>termios_rwsem);
 
+   if (test_bit(TTY_HUPPING, >flags))
+   goto out_upsem;
+
do {
/*
 * When PARMRK is set, each input char may take up to 3 chars
@@ -1760,6 +1763,7 @@ n_tty_receive_buf_common(struct tty_struct *tty, const 
unsigned char *cp,
} else
n_tty_check_throttle(tty);
 
+out_upsem:
up_read(>termios_rwsem);
 
return rcvd;
-- 
2.17.1



Re: [RFC PATCH v1 02/26] docs: reporting-bugs: Create a TLDR how to report issues

2020-10-01 Thread Randy Dunlap
On 10/1/20 1:39 AM, Thorsten Leemhuis wrote:
> Get straight to the point in a few paragraphs instead of forcing users
> to read quite a bit of text, like the old approach did.
> 
> All normally needed fits into the first two paragraphs. The third is
> dedicated to issues only happening in stable and longterm kernels, as
> things otherwise get hard to follow. At the end explicitly spell out
> that some issues need to be handled slightly different.
> 
> This TLDR naturally leaves lots of details out. But it will be good
> enough in some situations, for example for users that recently reported
> an issue or are familiar with reporting issues to FLOSS projects.
> 
> Signed-off-by: Thorsten Leemhuis 
> ---
>  Documentation/admin-guide/reporting-bugs.rst | 43 
>  1 file changed, 43 insertions(+)
> 
> diff --git a/Documentation/admin-guide/reporting-bugs.rst 
> b/Documentation/admin-guide/reporting-bugs.rst
> index 4bbb9132782b..7bde6f32ff72 100644
> --- a/Documentation/admin-guide/reporting-bugs.rst
> +++ b/Documentation/admin-guide/reporting-bugs.rst
> @@ -10,6 +10,49 @@ Reporting bugs
>  .. inconsistent/not make sense before all patches of the rewrite got applied.
>  .. 
> ###
>  
> +
> +The short guide (aka TL;DR)
> +===
> +
> +This is how you report issues with the Linux kernel to its developers:
> +
> +If you deal with multiple issues at once, process each of them separately. 
> Try
> +your best guess which area of the kernel might be responsible for your issue.
> +Check the `MAINTAINERS file
> +`_
> +how developers of that particular area expect to be told about issues; note,

   for how
?

> +it's rarely `bugzilla.kernel.org `_, as most
> +subsystems expect reports by mail sent to their maintainers and their public
> +mailing list!
> +
> +Check the archives of the determined destination thoroughly for existing
> +reports; also search the LKML archives and the internet as a whole. If you 
> can't
> +find any, install the `latest Linux mainline version `_.
> +Make sure to use a vanilla kernel and avert any add-on kernel modules 
> externally
> +developed; also ensure the kernel is running in a healthy environment and 
> does
> +not 'taint' itself before the issue occurs. If you can reproduce it, write a

I don't care for "does not 'taint' itself". How about
 and is 
not
   already tainted before the issue occurs.

> +report to the destination you determined earlier. Afterwards keep the ball
> +rolling by proactive testing, a status update now and then, and helping where
> +you can.
> +
> +You can't reproduce an issue with mainline you want to see fixed in older
> +version lines? Then make sure the line you care about still gets support.
> +Install its latest release as vanilla kernel. If you can reproduce the issue

Is "vanilla" well understood?

> +there, try to find the commit that fixed it in mainline or any discussion
> +preceding it: those will often mention if backporting is planed or 
> impossible;
> +if not, ask for it. In case you don't find anything, check if it's a 
> regression
> +specific to the version line that need to be bisected and report just like a

that needs

> +problem in mainline with the stable mailing list CCed. If you reached this 
> point
> +without a solution, ask for advice by mailing the subsystem maintainer with 
> the
> +subsystem and stable mailing list in CC.
> +
> +If you deal with a regression, bisect it to find the culprit and CC or 
> forward
> +your report to its developers.
> +
> +Security issues are typically best report privately; also CC the security 
> team

  reported

> +or forward your report there.
> +
> +
>  .. 
> 
>  .. Temporary marker added while this document is rewritten. Sections above
>  .. are new and dual-licensed under GPLv2+ and CC-BY 4.0, those below are old.
> 


-- 
~Randy



Re: [PATCH v3 1/1] kdump: append uts_namespace.name offset to VMCOREINFO

2020-10-01 Thread lijiang
Hi, Alexander

在 2020年09月30日 18:23, Alexander Egorenkov 写道:
> The offset of the field 'init_uts_ns.name' has changed
> since commit 9a56493f6942 ("uts: Use generic ns_common::count").
> 
> Link: 
> https://lore.kernel.org/r/159644978167.604812.1773586504374412107.stgit@localhost.localdomain
> 
> Make the offset of the field 'uts_namespace.name' available
> in VMCOREINFO because tools like 'crash-utility' and
> 'makedumpfile' must be able to read it from crash dumps.
> 
> Signed-off-by: Alexander Egorenkov 
> ---
> 
> v2 -> v3:
>  * Added documentation to vmcoreinfo.rst
>  * Use the short form of the commit reference
> 
> v1 -> v2:
>  * Improved commit message
>  * Added link to the discussion of the uts namespace changes
> 
>  Documentation/admin-guide/kdump/vmcoreinfo.rst | 6 ++
>  kernel/crash_core.c| 1 +
>  2 files changed, 7 insertions(+)
> 
> diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst 
> b/Documentation/admin-guide/kdump/vmcoreinfo.rst
> index e44a6c01f336..3861a25faae1 100644
> --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
> +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
> @@ -39,6 +39,12 @@ call.
>  User-space tools can get the kernel name, host name, kernel release
>  number, kernel version, architecture name and OS type from it.
>  
> +(uts_namespace, name)
> +-
> +
> +Offset of the name's member. Crash Utility and Makedumpfile get
> +the start address of the init_uts_ns.name from this.
> +

Thank you for the update. The v3 looks good to me.

>  node_online_map
>  ---
>  
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index 106e4500fd53..173fdc261882 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -447,6 +447,7 @@ static int __init crash_save_vmcoreinfo_init(void)
>   VMCOREINFO_PAGESIZE(PAGE_SIZE);
>  
>   VMCOREINFO_SYMBOL(init_uts_ns);
> + VMCOREINFO_OFFSET(uts_namespace, name);
>   VMCOREINFO_SYMBOL(node_online_map);
>  #ifdef CONFIG_MMU
>   VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir);
> 



[PATCH net-next 1/4] net: dsa: Call dsa_untag_bridge_pvid() from dsa_switch_rcv()

2020-10-01 Thread Florian Fainelli
When a DSA switch driver needs to call dsa_untag_bridge_pvid(), it can
set dsa_switch::untag_brige_pvid to indicate this is necessary.

This is a pre-requisite to making sure that we are always calling
dsa_untag_bridge_pvid() after eth_type_trans() has been called.

Signed-off-by: Florian Fainelli 
---
 include/net/dsa.h | 8 
 net/dsa/dsa.c | 9 +
 2 files changed, 17 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index b502a63d196e..8b0696e08cac 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -308,6 +308,14 @@ struct dsa_switch {
 */
boolconfigure_vlan_while_not_filtering;
 
+   /* If the switch driver always programs the CPU port as egress tagged
+* despite the VLAN configuration indicating otherwise, then setting
+* @untag_bridge_pvid will force the DSA receive path to pop the 
bridge's
+* default_pvid VLAN tagged frames to offer a consistent behavior
+* between a vlan_filtering=0 and vlan_filtering=1 bridge device.
+*/
+   booluntag_bridge_pvid;
+
/* In case vlan_filtering_is_global is set, the VLAN awareness state
 * should be retrieved from here and not from the per-port settings.
 */
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 5c18c0214aac..dec4ab59b7c4 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -225,6 +225,15 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct 
net_device *dev,
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
 
+   if (unlikely(cpu_dp->ds->untag_bridge_pvid)) {
+   nskb = dsa_untag_bridge_pvid(skb);
+   if (!nskb) {
+   kfree_skb(skb);
+   return 0;
+   }
+   skb = nskb;
+   }
+
s = this_cpu_ptr(p->stats64);
u64_stats_update_begin(>syncp);
s->rx_packets++;
-- 
2.25.1



[PATCH net-next 3/4] net: dsa: Obtain VLAN protocol from skb->protocol

2020-10-01 Thread Florian Fainelli
Now that dsa_untag_bridge_pvid() is called after eth_type_trans() we are
guaranteed that skb->protocol will be set to a correct value, thus
allowing us to avoid calling vlan_eth_hdr().

Signed-off-by: Florian Fainelli 
---
 net/dsa/dsa_priv.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 0348dbab4131..d6ce8c2a2590 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -201,7 +201,6 @@ dsa_slave_to_master(const struct net_device *dev)
 static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb)
 {
struct dsa_port *dp = dsa_slave_to_port(skb->dev);
-   struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
struct net_device *br = dp->bridge_dev;
struct net_device *dev = skb->dev;
struct net_device *upper_dev;
@@ -217,7 +216,7 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct 
sk_buff *skb)
return skb;
 
/* Move VLAN tag from data to hwaccel */
-   if (!skb_vlan_tag_present(skb) && hdr->h_vlan_proto == htons(proto)) {
+   if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) {
skb = skb_vlan_untag(skb);
if (!skb)
return NULL;
-- 
2.25.1



[PATCH net-next 0/4] net: dsa: Improve dsa_untag_bridge_pvid()

2020-10-01 Thread Florian Fainelli
Hi David, Jakub,

This patch series is based on the recent discussions with Vladimir:

https://lore.kernel.org/netdev/20201001030623.343535-1-f.faine...@gmail.com/

the simplest way forward was to call dsa_untag_bridge_pvid() after
eth_type_trans() has been set which guarantees that skb->protocol is set
to a correct value and this allows us to utilize
__vlan_find_dev_deep_rcu() properly without playing or using the bridge
master as a net_device reference.

Florian Fainelli (4):
  net: dsa: Call dsa_untag_bridge_pvid() from dsa_switch_rcv()
  net: dsa: b53: Set untag_bridge_pvid
  net: dsa: Obtain VLAN protocol from skb->protocol
  net: dsa: Utilize __vlan_find_dev_deep_rcu()

 drivers/net/dsa/b53/b53_common.c |  1 +
 include/net/dsa.h|  8 
 net/dsa/dsa.c|  9 +
 net/dsa/dsa_priv.h   | 14 --
 net/dsa/tag_brcm.c   | 15 ++-
 5 files changed, 24 insertions(+), 23 deletions(-)

-- 
2.25.1



[PATCH net-next 2/4] net: dsa: b53: Set untag_bridge_pvid

2020-10-01 Thread Florian Fainelli
Indicate to the DSA receive path that we need to untage the bridge PVID,
this allows us to remove the dsa_untag_bridge_pvid() calls from
net/dsa/tag_brcm.c.

Signed-off-by: Florian Fainelli 
---
 drivers/net/dsa/b53/b53_common.c |  1 +
 net/dsa/tag_brcm.c   | 15 ++-
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 73507cff3bc4..ce18ba0b74eb 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -2603,6 +2603,7 @@ struct b53_device *b53_switch_alloc(struct device *base,
dev->ops = ops;
ds->ops = _switch_ops;
ds->configure_vlan_while_not_filtering = true;
+   ds->untag_bridge_pvid = true;
dev->vlan_enabled = ds->configure_vlan_while_not_filtering;
mutex_init(>reg_mutex);
mutex_init(>stats_mutex);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 69d6b8c597a9..ad72dff8d524 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -152,11 +152,6 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
/* Remove Broadcom tag and update checksum */
skb_pull_rcsum(skb, BRCM_TAG_LEN);
 
-   /* Set the MAC header to where it should point for
-* dsa_untag_bridge_pvid() to parse the correct VLAN header.
-*/
-   skb_set_mac_header(skb, -ETH_HLEN);
-
skb->offload_fwd_mark = 1;
 
return skb;
@@ -187,7 +182,7 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, 
struct net_device *dev,
nskb->data - ETH_HLEN - BRCM_TAG_LEN,
2 * ETH_ALEN);
 
-   return dsa_untag_bridge_pvid(nskb);
+   return nskb;
 }
 
 static const struct dsa_device_ops brcm_netdev_ops = {
@@ -214,14 +209,8 @@ static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff 
*skb,
struct net_device *dev,
struct packet_type *pt)
 {
-   struct sk_buff *nskb;
-
/* tag is prepended to the packet */
-   nskb = brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
-   if (!nskb)
-   return nskb;
-
-   return dsa_untag_bridge_pvid(nskb);
+   return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
 }
 
 static const struct dsa_device_ops brcm_prepend_netdev_ops = {
-- 
2.25.1



[PATCH net-next 4/4] net: dsa: Utilize __vlan_find_dev_deep_rcu()

2020-10-01 Thread Florian Fainelli
Now that we are guaranteed that dsa_untag_bridge_pvid() is called after
eth_type_trans() we can utilize __vlan_find_dev_deep_rcu() which will
take care of finding an 802.1Q upper on top of a bridge master.

A common use case, prior to 12a1526d067 ("net: dsa: untag the bridge
pvid from rx skbs") was to configure a bridge 802.1Q upper like this:

ip link add name br0 type bridge vlan_filtering 0
ip link add link br0 name br0.1 type vlan id 1

in order to pop the default_pvid VLAN tag.

With this change we restore that behavior while still allowing the DSA
receive path to automatically pop the VLAN tag.

Signed-off-by: Florian Fainelli 
---
 net/dsa/dsa_priv.h | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index d6ce8c2a2590..12998bf04e55 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -204,7 +204,6 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct 
sk_buff *skb)
struct net_device *br = dp->bridge_dev;
struct net_device *dev = skb->dev;
struct net_device *upper_dev;
-   struct list_head *iter;
u16 vid, pvid, proto;
int err;
 
@@ -246,13 +245,9 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct 
sk_buff *skb)
 * supports because vlan_filtering is 0. In that case, we should
 * definitely keep the tag, to make sure it keeps working.
 */
-   netdev_for_each_upper_dev_rcu(dev, upper_dev, iter) {
-   if (!is_vlan_dev(upper_dev))
-   continue;
-
-   if (vid == vlan_dev_vlan_id(upper_dev))
-   return skb;
-   }
+   upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid);
+   if (upper_dev)
+   return skb;
 
__vlan_hwaccel_clear_tag(skb);
 
-- 
2.25.1



[PATCH] seccomp: Make duplicate listener detection non-racy

2020-10-01 Thread Jann Horn
Currently, init_listener() tries to prevent adding a filter with
SECCOMP_FILTER_FLAG_NEW_LISTENER if one of the existing filters already
has a listener. However, this check happens without holding any lock that
would prevent another thread from concurrently installing a new filter
(potentially with a listener) on top of the ones we already have.

Theoretically, this is also a data race: The plain load from
current->seccomp.filter can race with concurrent writes to the same
location.

Fix it by moving the check into the region that holds the siglock to guard
against concurrent TSYNC.

(I am not marking this for stable backport because I believe that this does
not have any implications beyond a theoretical data race and allowing
userspace to create seccomp filters with weird semantics if userspace
concurrently installs seccomp filters in a way no benign userspace workload
would.)

(The "Fixes" tag points to the commit that introduced the theoretical
data race; concurrent installation of another filter with TSYNC only
became possible later, in commit 51891498f2da ("seccomp: allow TSYNC and
USER_NOTIF together").)

Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
Reviewed-by: Tycho Andersen 
Signed-off-by: Jann Horn 
---
NOTE: After Tycho gave his Reviewed-by, I've had to adjust the errno
to -EBUSY (my original patch broke UAPI, good thing we have selftests),
remove the unused "cur" from init_listener(), and remove the now
useless initialization of "ret".

 kernel/seccomp.c | 38 +++---
 1 file changed, 31 insertions(+), 7 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 676d4af62103..c359ef4380ad 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -1472,13 +1472,7 @@ static const struct file_operations
seccomp_notify_ops = {

 static struct file *init_listener(struct seccomp_filter *filter)
 {
-   struct file *ret = ERR_PTR(-EBUSY);
-   struct seccomp_filter *cur;
-
-   for (cur = current->seccomp.filter; cur; cur = cur->prev) {
-   if (cur->notif)
-   goto out;
-   }
+   struct file *ret;

ret = ERR_PTR(-ENOMEM);
filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL);
@@ -1504,6 +1498,31 @@ static struct file *init_listener(struct
seccomp_filter *filter)
return ret;
 }

+/*
+ * Does @new_child have a listener while an ancestor also has a listener?
+ * If so, we'll want to reject this filter.
+ * This only has to be tested for the current process, even in the TSYNC case,
+ * because TSYNC installs @child with the same parent on all threads.
+ * Note that @new_child is not hooked up to its parent at this point yet, so
+ * we use current->seccomp.filter.
+ */
+static bool has_duplicate_listener(struct seccomp_filter *new_child)
+{
+   struct seccomp_filter *cur;
+
+   /* must be protected against concurrent TSYNC */
+   lockdep_assert_held(>sighand->siglock);
+
+   if (!new_child->notif)
+   return false;
+   for (cur = current->seccomp.filter; cur; cur = cur->prev) {
+   if (cur->notif)
+   return true;
+   }
+
+   return false;
+}
+
 /**
  * seccomp_set_mode_filter: internal function for setting seccomp filter
  * @flags:  flags to change filter behavior
@@ -1575,6 +1594,11 @@ static long seccomp_set_mode_filter(unsigned int flags,
if (!seccomp_may_assign_mode(seccomp_mode))
goto out;

+   if (has_duplicate_listener(prepared)) {
+   ret = -EBUSY;
+   goto out;
+   }
+
ret = seccomp_attach_filter(flags, prepared);
if (ret)
goto out;

base-commit: fb0155a09b0224a7147cb07a4ce6034c8d29667f
-- 
2.28.0.806.g8561365e88-goog


drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c:426:32: warning: unused variable 'aq_pm_ops'

2020-10-01 Thread kernel test robot
Hi zhengbin,

FYI, the error/warning still remains.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   60e720931556fc1034d0981460164dcf02697679
commit: 0ee0bbb018938addf87b54d447cc5633d2e53490 net: atlantic: make symbol 
'aq_pm_ops' static
date:   11 months ago
config: x86_64-randconfig-a003-20201001 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 
bcd05599d0e53977a963799d6ee4f6e0bc21331b)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
# 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0ee0bbb018938addf87b54d447cc5633d2e53490
git remote add linus 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
git fetch --no-tags linus master
git checkout 0ee0bbb018938addf87b54d447cc5633d2e53490
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

>> drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c:426:32: warning: unused 
>> variable 'aq_pm_ops' [-Wunused-const-variable]
   static const struct dev_pm_ops aq_pm_ops = {
  ^
   1 warning generated.

vim +/aq_pm_ops +426 drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c

   425  
 > 426  static const struct dev_pm_ops aq_pm_ops = {
   427  .suspend = aq_pm_suspend_poweroff,
   428  .poweroff = aq_pm_suspend_poweroff,
   429  .freeze = aq_pm_freeze,
   430  .resume = aq_pm_resume_restore,
   431  .restore = aq_pm_resume_restore,
   432  .thaw = aq_pm_thaw,
   433  };
   434  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


Re: [PATCH v3 2/3] iommu/tegra-smmu: Rework .probe_device and .attach_dev

2020-10-01 Thread Nicolin Chen
On Fri, Oct 02, 2020 at 04:55:34AM +0300, Dmitry Osipenko wrote:
> 02.10.2020 04:07, Nicolin Chen пишет:
> > On Thu, Oct 01, 2020 at 11:33:38PM +0300, Dmitry Osipenko wrote:
> > If we can't come to an agreement on globalizing mc pointer, would
> > it be possible to pass tegra_mc_driver through tegra_smmu_probe()
> > so we can continue to use driver_find_device_by_fwnode() as v1?
> >
> > v1: https://lkml.org/lkml/2020/9/26/68
> 
>  tegra_smmu_probe() already takes a struct tegra_mc *. Did you mean
>  tegra_smmu_probe_device()? I don't think we can do that because it isn't
> >>>
> >>> I was saying to have a global parent_driver pointer: similar to
> >>> my v1, yet rather than "extern" the tegra_mc_driver, we pass it
> >>> through egra_smmu_probe() and store it in a static global value
> >>> so as to call tegra_smmu_get_by_fwnode() in ->probe_device().
> >>>
> >>> Though I agree that creating a global device pointer (mc) might
> >>> be controversial, yet having a global parent_driver pointer may
> >>> not be against the rule, considering that it is common in iommu
> >>> drivers to call driver_find_device_by_fwnode in probe_device().
> >>
> >> You don't need the global pointer if you have SMMU OF node.
> >>
> >> You could also get driver pointer from mc->dev->driver.
> >>
> >> But I don't think you need to do this at all. The probe_device() could
> >> be invoked only for the tegra_smmu_ops and then seems you could use
> >> dev_iommu_priv_set() in tegra_smmu_of_xlate(), like sun50i-iommu driver
> >> does.
> > 
> > Getting iommu device pointer using driver_find_device_by_fwnode()
> > is a common practice in ->probe_device() of other iommu drivers.
> 
> Please give me a full list of the IOMMU drivers which use this method.
> 
> > But this requires a device_driver pointer that tegra-smmu doesn't
> > have. So passing tegra_mc_driver through tegra_smmu_probe() will
> > address it.
> > 
> 
> If you're borrowing code and ideas from other drivers, then at least
> please borrow them from a modern good-looking drivers. And I already
> pointed out that following cargo cult is not always a good idea.
> 
> ARM-SMMU isn't a modern driver and it has legacy code. You shouldn't
> copy it blindly. The sun50i-iommu driver was added half year ago, you
> may use it as a reference.

I took a closer look at sun50i-iommu driver. It's a good idea.
I think I can come up with a cleaner one. Will send v4.

Thanks for the advice.


Re: [RFC PATCH v1 03/26] docs: reporting-bugs: step-by-step guide on how to report issues

2020-10-01 Thread Randy Dunlap
On 10/1/20 1:39 AM, Thorsten Leemhuis wrote:
> 
> Signed-off-by: Thorsten Leemhuis 
> ---
>  Documentation/admin-guide/reporting-bugs.rst | 103 +++
>  1 file changed, 103 insertions(+)
> 
> diff --git a/Documentation/admin-guide/reporting-bugs.rst 
> b/Documentation/admin-guide/reporting-bugs.rst
> index 7bde6f32ff72..203df36af55f 100644
> --- a/Documentation/admin-guide/reporting-bugs.rst
> +++ b/Documentation/admin-guide/reporting-bugs.rst
> @@ -53,6 +53,109 @@ Security issues are typically best report privately; also 
> CC the security team
>  or forward your report there.
>  
>  
> +Step-by-step guide how to report issues to the kernel maintainers
> +=
> +
> +Above TL;DR outlines roughly how to report issues to the Linux kernel

   The above

> +developers. It might be all that's needed for people already familiar with
> +reporting issues to Free/Libre & Open Source Software (FLOSS) projects. For
> +everyone else there is this section. It is more detailed and uses a
> +step-by-step approach. It still tries to be brief for readability; if it's to

  
too

> +brief for you, look up the details in the reference section below, where each
> +of the steps is explained in more detail.
> +
> +Note, this section covers a few more aspects than the TL;DR and does things 
> in a

   Note:

> +slightly different order. That's in your interest, to make sure you notice 
> early
> +if an issue that looks like a Linux kernel problem is actually caused by
> +something else. These steps thus help to ensure the time you invest in this
> +process won't feel wasted in the end:
> +
> + * Stop reading this document and report the problem to your vendor instead,
> +   unless you are running a vanilla mainline kernel already or are willing to
> +   install it.
> +
> + * See if the issue you are dealing with qualifies as regression, security
> +   issue, or a really severe problem: those are 'issues of high priority' 
> that
> +   need special handling in some steps that are about to follow.
> +
> + * Check if your kernel was 'tainted' when the issue occurred, as the event 
> that
> +   made the kernel set this flag might be causing the issue you face.
> +
> + * Locate the driver or kernel subsystem that seems to be causing the issue.
> +   Find out how and where its developers expect reports. Note: most of the 
> time
> +   this won't be `bugzilla.kernel.org `_, as 
> issues
> +   typically need to be sent by mail to a maintainer and a public mailing 
> list.
> +
> + * Search the archives of the bug tracker or mailing list in question
> +   thoroughly for reports that might match your issue. Also check if you find
> +   something with your favorite internet search engine or in the `Linux 
> Kernel
> +   Mailing List (LKML) archives `_. If you 
> find
> +   anything, join the discussion instead of sending a new report.
> +
> + * Create a fresh backup and put system repair and restore tools at hand.
> +
> + * Ensure your system does not enhance its kernels by building additional
> +   kernel modules on-the-fly locally, which solutions like DKMS might be 
> doing
> +   without your knowledge.
> +
> + * Make sure it's not the kernels surroundings that are causing the issue you

 kernel's

> +   face.
> +
> + * Write down coarsely how to reproduce the issue. If you deal with multiple
> +   issue at once, create separate notes for each of them and make sure they

  issues

> +   work independently on a freshly booted system. That's needed, as each 
> issue
> +   needs to get reported to the kernel developers separately, unless they are
> +   strongly entangled.
> +
> +After these preparations you'll now enter the main part:
> +
> + * Install the latest Linux mainline kernel: that's where all issue get fixed
> +   first, because it's the version line the kernel developers mainly care 
> about.
> +   Testing and reporting with the latest Linux stable kernel can be 
> acceptable

can be an 
acceptable

> +   alternative in some situations, but is best avoided.
> +
> + * Ensure the kernel you just installed does not 'taint' itself when running.
> +
> + * Reproduce the issue with the kernel you just installed. If it doesn't 
> show up
> +   there, head over to the instructions for issues only happening with stable
> +   and longterm kernels if you want to see it fixed there.

Can you link (reference) to that section?

> +
> + * Optimize your notes: try to find and write the most straightforward way to
> +   reproduce your issue. Make sure the end result has all the important 
> details,
> +   and at the same time is easy to read and understand for others that hear
> +   about it for the first time. And if you learned something in 

RE: [PATCH] perf evlist: fix memory corruption for Kernel PMU event

2020-10-01 Thread Song Bao Hua (Barry Song)



> -Original Message-
> From: Andi Kleen [mailto:a...@linux.intel.com]
> Sent: Friday, October 2, 2020 12:07 PM
> To: Song Bao Hua (Barry Song) 
> Cc: linux-kernel@vger.kernel.org; Linuxarm ; Peter
> Zijlstra ; Ingo Molnar ; Arnaldo
> Carvalho de Melo ; Mark Rutland
> ; Alexander Shishkin
> ; Jiri Olsa ;
> Namhyung Kim ; Adrian Hunter
> ; Alexey Budankov
> 
> Subject: Re: [PATCH] perf evlist: fix memory corruption for Kernel PMU event
> 
> On Fri, Oct 02, 2020 at 12:57:29AM +1300, Barry Song wrote:
> > Commit 7736627b865d ("perf stat: Use affinity for closing file
> > descriptors") will use FD(evsel, cpu, thread) to read and write file
> > descriptors xyarray. For a kernel PMU event, this leads to serious
> > memory corruption and perf crash.
> > I have seen evlist->core.cpus->nr is 1 while evsel has cpus->nr with
> > the total number of CPUs. so xyarray which is allocated by
> > evlist->core.cpus->nr will get overflow. This leads to various
> > segmentation faults in perf tool for kernel PMU events, eg:
> > ./perf stat -e bus_cycles  sleep 1
> > *** Error in `./perf': free(): invalid next size (fast):
> > 0x401e6370 *** Aborted (core dumped)
> 
> Thanks.
> 
> I believe there is already a patch queued for this.

Andi, thanks! Could you share the link or the commit ID? I'd like to take a 
look at the fix.
I could still reproduce this issue in the latest linus' tree and I didn't find 
any commit
related to this issue in linux-next and tip/perf/core.

> 
> The problem seems to only happen on ARM64.

My platform which has this issue is really ARM64.

Thanks
Barry


linux-next: manual merge of the net-next tree with the net tree

2020-10-01 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the net-next tree got a conflict in:

  Documentation/devicetree/bindings/net/renesas,ravb.txt

between commit:

  307eea32b202 ("dt-bindings: net: renesas,ravb: Add support for r8a774e1 SoC")

from the net tree and commit:

  d7adf6331189 ("dt-bindings: net: renesas,etheravb: Convert to json-schema")

from the net-next tree.

I fixed it up (I deleted the file and added the following patch) and
can carry the fix as necessary. This is now fixed as far as linux-next
is concerned, but any non trivial conflicts should be mentioned to your
upstream maintainer when your tree is submitted for merging.  You may
also want to consider cooperating with the maintainer of the conflicting
tree to minimise any particularly complex conflicts.

From: Stephen Rothwell 
Date: Fri, 2 Oct 2020 12:57:33 +1000
Subject: [PATCH] fix up for "dt-bindings: net: renesas,ravb: Add support for 
r8a774e1 SoC"

Signed-off-by: Stephen Rothwell 
---
 Documentation/devicetree/bindings/net/renesas,etheravb.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml 
b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
index e13653051b23..244befb6402a 100644
--- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
@@ -31,6 +31,7 @@ properties:
   - renesas,etheravb-r8a774a1 # RZ/G2M
   - renesas,etheravb-r8a774b1 # RZ/G2N
   - renesas,etheravb-r8a774c0 # RZ/G2E
+  - renesas,etheravb-r8a774e1 # RZ/G2H
   - renesas,etheravb-r8a7795  # R-Car H3
   - renesas,etheravb-r8a7796  # R-Car M3-W
   - renesas,etheravb-r8a77961 # R-Car M3-W+
-- 
2.28.0

-- 
Cheers,
Stephen Rothwell


pgpPrsixnGiQ6.pgp
Description: OpenPGP digital signature


Re: [PATCH] random: use correct memory barriers for crng_node_pool

2020-10-01 Thread Eric Biggers
On Thu, Sep 24, 2020 at 08:31:02PM -0700, Paul E. McKenney wrote:
> On Thu, Sep 24, 2020 at 07:09:08PM -0700, Eric Biggers wrote:
> > On Thu, Sep 24, 2020 at 05:59:34PM -0700, Paul E. McKenney wrote:
> > > On Tue, Sep 22, 2020 at 02:55:58PM -0700, Eric Biggers wrote:
> > > > On Tue, Sep 22, 2020 at 01:56:28PM -0700, Paul E. McKenney wrote:
> > > > > > You're missing the point here.  b and c could easily be allocated 
> > > > > > by a function
> > > > > > alloc_b() that's in another file.
> > > > > 
> > > > > I am still missing something.
> > > > > 
> > > > > If by "allocated" you mean something like kmalloc(), the compiler 
> > > > > doesn't
> > > > > know the address.  If you instead mean that there is a function that
> > > > > returns the address of another translation unit's static variable, 
> > > > > then
> > > > > any needed ordering should preferably be built into that function's 
> > > > > API.
> > > > > Either way, one would hope for some documentation of anything the 
> > > > > caller
> > > > > needed to be careful of.
> > > > > 
> > > > > > > Besides which, control dependencies should be used only by LKMM 
> > > > > > > experts
> > > > > > > at this point.  
> > > > > > 
> > > > > > What does that even mean?  Control dependencies are everywhere.
> > > > > 
> > > > > Does the following work better for you?
> > > > > 
> > > > > "... the non-local ordering properties of control dependencies should 
> > > > > be
> > > > > relied on only by LKMM experts ...".
> > > > 
> > > > No.  I don't know what that means.  And I think very few people would 
> > > > know.
> > > > 
> > > > I just want to know if I use the one-time init pattern with a pointer 
> > > > to a data
> > > > structure foo, are the readers using foo_use() supposed to use 
> > > > READ_ONCE() or
> > > > are they supposed to use smp_load_acquire().
> > > > 
> > > > It seems the answer is that smp_load_acquire() is the only safe choice, 
> > > > since
> > > > foo_use() *might* involve a control dependency, or might in the future 
> > > > since
> > > > it's part of another kernel subsystem and its implementation could 
> > > > change.
> > > 
> > > First, the specific issue of one-time init.
> > > 
> > > If you are the one writing the code implementing one-time init, it is your
> > > choice.  It seems like you prefer smp_load_acquire().  If someone sees
> > > performance problems due to the resulting memory-barrier instructions,
> > > they have the option of submitting a patch and either forking the
> > > implementation or taking your implementation over from you, depending
> > > on how that conversation goes.
> > 
> > It doesn't matter what I "prefer".  The question is, how to write code that 
> > is
> > actually guaranteed to be correct on all supported Linux architectures, 
> > without
> > assuming internal implementation details of other kernel subsystems.
> 
> And that question allows ample room for personal preferences.
> 
> There are after all tradeoffs.  Do you want to live within the current
> knowledge of your users, or are you willing to invest time and energy
> into teaching them something new?  If someone wants a level of performance
> that is accommodated only by a difficult-to-use pattern, will you choose
> to accommodate them, or will you tell them to build write their own?
> 
> There are often a number of ways to make something work, and they all
> have advantages and disadvantages.  There are tradeoffs, and preferences
> have a role to play as well.

Having options doesn't matter if no one can agree on which one to use.  This is
the second bug fix that I can't get accepted due to bikeshedding over how to
implement "one-time init":

First patch:
v1: 
https://lkml.kernel.org/linux-fsdevel/2020071300.205104-1-ebigg...@kernel.org
v2: 
https://lkml.kernel.org/linux-fsdevel/20200717050510.95832-1-ebigg...@kernel.org
Related thread: 
https://lkml.kernel.org/lkml/20200717044427.68747-1-ebigg...@kernel.org

Second patch (this one):
https://lkml.kernel.org/lkml/20200916233042.51634-1-ebigg...@kernel.org

The problem is identical in both cases.  In both cases, the code currently
implements "one-time init" using a plain load on the reader side, which is
undefined behavior and isn't sufficient on all supported Linux architectures
(even *if* there is no control dependency, which is something that usually is
hard to determine, as I've explained several times).

However in both cases, no one can agree on what to replace the broken code with.
And the opinions were conflicting.  In the first patch, people were advocating
for smp_load_acquire() over READ_ONCE() because it's too hard to determine when
READ_ONCE() is safe.  And even after I switched to smp_load_acquire(), the patch
was still rejected, with conflicting reasons.

Now in the second patch, people are instead advocating for READ_ONCE() over
smp_load_acquire().  And you're claiming that all kernel developers are expected
to read Documentation/RCU/rcu_dereference.rst and design all 

Re: [RFC PATCH v1 04/26] docs: reporting-bugs: step-by-step guide for issues in stable & longterm

2020-10-01 Thread Randy Dunlap
On 10/1/20 1:39 AM, Thorsten Leemhuis wrote:
> Handle stable and longterm kernels in a subsection, as dealing with them
> directly in the main part of the step-by-step guide turned out to make
> it messy and hard to follow: it looked a bit like code with a large
> amount of if-then-else section to handle special cases, which made the
> default code-flow hard to understand.
> 
> Yet again each step will later be repeated in a reference section and
> described in more detail.
> 
> Signed-off-by: Thorsten Leemhuis 
> ---
>  Documentation/admin-guide/reporting-bugs.rst | 49 
>  1 file changed, 49 insertions(+)
> 
> diff --git a/Documentation/admin-guide/reporting-bugs.rst 
> b/Documentation/admin-guide/reporting-bugs.rst
> index 203df36af55f..e0a6f4328e87 100644
> --- a/Documentation/admin-guide/reporting-bugs.rst
> +++ b/Documentation/admin-guide/reporting-bugs.rst
> @@ -156,6 +156,55 @@ After these preparations you'll now enter the main part:
> yourself, if you don't get any help or if it is unsatisfying.
>  
>  
> +Reporting issues only occurring in older kernel version lines
> +-
> +
> +This section is for you, if you tried the latest mainline kernel as outlined
> +above, but failed to reproduce your issue there; at the same time you want to
> +see the issue fixed in older version lines or a vendor kernel that's 
> regularly
> +rebased on new stable or longterm releases. If that case follow these steps:
> +
> + * Prepare yourself for the possibility that going through the next few steps
> +   might not get the issue solved in older releases: the fix might be too 
> big or
> +   risky to get backported there.
> +
> + * Check if the kernel developers still maintain the Linux kernel version 
> line
> +   you care about: go to `the front-page of kernel.org `_
> +   and make sure it mentions the latest release of the particular version 
> line
> +   without an '[EOL]' tag.

Explain somewhere that EOL = End Of Life (in parens).

> +
> + * Check the `archives of the Linux stable mailing list
> +   `_  for existing reports.
> +
> + * Install the latest release from the particular version line as a vanilla
> +   kernel. Ensure this kernel is not tainted and still shows the problem, as 
> the
> +   issue might have already been fixed there.
> +
> + * Search the Linux kernel version control system for the change that fixed
> +   the issue in mainline, as its commit message might tell you if the fix is
> +   scheduled for backporting already. If you don't find anything that way,
> +   search the appropriate mailing lists for posts that discuss such an issue 
> or
> +   peer-review possible fixes. That might lead you to the commit with the fix
> +   or tell you if it's unsuitable for backporting. If backporting was not
> +   considered at all, join the newest discussion, asking if its in the cards.

   it's

> +
> + * Check if you're dealing with a regression that was never present in
> +   mainline by installing the first release of the version line you care 
> about.
> +   If the issue doesn't show up with it, you basically need to report the 
> issue
> +   with this version like you would report a problem with mainline (see 
> above).
> +   This ideally includes a bisection followed by a search for existing 
> reports
> +   on the net; with the help of the subject and the two relevant commit-ids. 
> If
> +   that doesn't turn up anything, write the report; CC or forward the report 
> to
> +   the stable maintainers, the stable mailing list, and those that authored 
> the

   those who (?)

> +   change. Include the shortened commit-id if you found the change that 
> causes
> +   it.
> +
> + * One of the former steps should lead to a solution. If that doesn't work 
> out,
> +   ask the maintainers for the subsystem that seems to be causing the issue 
> for
> +   advice; CC the mailing list for the particular subsystem as well as the
> +   stable mailing list.
> +
> +
>  .. 
> 
>  .. Temporary marker added while this document is rewritten. Sections above
>  .. are new and dual-licensed under GPLv2+ and CC-BY 4.0, those below are old.
> 


-- 
~Randy



[tip:x86/misc] BUILD SUCCESS f94c91f7ba3ba7de2bc8aa31be28e1abb22f849e

2020-10-01 Thread kernel test robot
 allmodconfig
powerpc  allyesconfig
powerpc   allnoconfig
i386 randconfig-a003-20200930
i386 randconfig-a002-20200930
i386 randconfig-a006-20200930
i386 randconfig-a005-20200930
i386 randconfig-a004-20200930
i386 randconfig-a001-20200930
x86_64   randconfig-a015-20200930
x86_64   randconfig-a013-20200930
x86_64   randconfig-a012-20200930
x86_64   randconfig-a016-20200930
x86_64   randconfig-a014-20200930
x86_64   randconfig-a011-20200930
x86_64   randconfig-a012-20201001
x86_64   randconfig-a015-20201001
x86_64   randconfig-a014-20201001
x86_64   randconfig-a013-20201001
x86_64   randconfig-a011-20201001
x86_64   randconfig-a016-20201001
i386 randconfig-a011-20200930
i386 randconfig-a015-20200930
i386 randconfig-a012-20200930
i386 randconfig-a014-20200930
i386 randconfig-a016-20200930
i386 randconfig-a013-20200930
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  kexec

clang tested configs:
x86_64   randconfig-a001-20200930
x86_64   randconfig-a005-20200930
x86_64   randconfig-a003-20200930
x86_64   randconfig-a004-20200930
x86_64   randconfig-a002-20200930
x86_64   randconfig-a006-20200930
x86_64   randconfig-a004-20201001
x86_64   randconfig-a001-20201001
x86_64   randconfig-a002-20201001
x86_64   randconfig-a003-20201001
x86_64   randconfig-a005-20201001
x86_64   randconfig-a006-20201001

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


[tip:core/debugobjects] BUILD SUCCESS 88451f2cd3cec2abc30debdf129422d2699d1eba

2020-10-01 Thread kernel test robot
 randconfig-a001-20200930
x86_64   randconfig-a015-20200930
x86_64   randconfig-a013-20200930
x86_64   randconfig-a012-20200930
x86_64   randconfig-a016-20200930
x86_64   randconfig-a014-20200930
x86_64   randconfig-a011-20200930
x86_64   randconfig-a012-20201001
x86_64   randconfig-a015-20201001
x86_64   randconfig-a014-20201001
x86_64   randconfig-a013-20201001
x86_64   randconfig-a011-20201001
x86_64   randconfig-a016-20201001
i386 randconfig-a011-20200930
i386 randconfig-a015-20200930
i386 randconfig-a012-20200930
i386 randconfig-a014-20200930
i386 randconfig-a016-20200930
i386 randconfig-a013-20200930
i386 randconfig-a014-20201001
i386 randconfig-a015-20201001
i386 randconfig-a013-20201001
i386 randconfig-a016-20201001
i386 randconfig-a011-20201001
i386 randconfig-a012-20201001
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  kexec

clang tested configs:
x86_64   randconfig-a004-20201001
x86_64   randconfig-a001-20201001
x86_64   randconfig-a002-20201001
x86_64   randconfig-a003-20201001
x86_64   randconfig-a005-20201001
x86_64   randconfig-a006-20201001
x86_64   randconfig-a001-20200930
x86_64   randconfig-a005-20200930
x86_64   randconfig-a003-20200930
x86_64   randconfig-a004-20200930
x86_64   randconfig-a002-20200930
x86_64   randconfig-a006-20200930

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


linux-next: manual merge of the drm tree with Linus' tree

2020-10-01 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the drm tree got a conflict in:

  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c

between commit:

  b19515253623 ("drm/amd/pm: setup APU dpm clock table in SMU HW 
initialization")

from the Linus tree and commits:

  82cac71c1b64 ("drm/amd/pm: put Navi1X umc cdr workaround in post_smu_init")
  236b156f7388 ("drm/amd/pm: apply no power source workaround if dc reported by 
gpio")
  1653a179c822 ("drm/amd/pm: move NAVI1X power mode switching workaround to 
post_init")

from the drm tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 8dc5abb6931e,5c4b74f964fc..
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@@ -955,35 -1013,6 +1002,17 @@@ static int smu_smc_hw_setup(struct smu_
return ret;
}
  
-   ret = smu_disable_umc_cdr_12gbps_workaround(smu);
-   if (ret) {
-   dev_err(adev->dev, "Workaround failed to disable UMC CDR 
feature on 12Gbps SKU!\n");
-   return ret;
-   }
- 
-   /*
-* For Navi1X, manually switch it to AC mode as PMFW
-* may boot it with DC mode.
-*/
-   ret = smu_set_power_source(smu,
-  adev->pm.ac_power ? SMU_POWER_SOURCE_AC :
-  SMU_POWER_SOURCE_DC);
-   if (ret) {
-   dev_err(adev->dev, "Failed to switch to %s mode!\n", 
adev->pm.ac_power ? "AC" : "DC");
-   return ret;
-   }
- 
 +  /*
 +   * Set initialized values (get from vbios) to dpm tables context such as
 +   * gfxclk, memclk, dcefclk, and etc. And enable the DPM feature for each
 +   * type of clks.
 +   */
 +  ret = smu_set_default_dpm_table(smu);
 +  if (ret) {
 +  dev_err(adev->dev, "Failed to setup default dpm clock 
tables!\n");
 +  return ret;
 +  }
 +
ret = smu_notify_display_change(smu);
if (ret)
return ret;


pgpNa4QYOhfcn.pgp
Description: OpenPGP digital signature


Re: linux-next: manual merge of the bpf-next tree with the bpf tree

2020-10-01 Thread Stephen Rothwell
Hi all,

On Wed, 30 Sep 2020 14:07:15 +1000 Stephen Rothwell  
wrote:
>
> Today's linux-next merge of the bpf-next tree got a conflict in:
> 
>   tools/lib/bpf/btf.c
> 
> between commit:
> 
>   1245008122d7 ("libbpf: Fix native endian assumption when parsing BTF")
> 
> from the bpf tree and commit:
> 
>   3289959b97ca ("libbpf: Support BTF loading and raw data output in both 
> endianness")
> 
> from the bpf-next tree.
> 
> I fixed it up (I used the latter version) and can carry the fix as
> necessary. This is now fixed as far as linux-next is concerned, but any
> non trivial conflicts should be mentioned to your upstream maintainer
> when your tree is submitted for merging.  You may also want to consider
> cooperating with the maintainer of the conflicting tree to minimise any
> particularly complex conflicts.

This is now a conflict between the net-next and net trees.
-- 
Cheers,
Stephen Rothwell


pgpwaOh611k1j.pgp
Description: OpenPGP digital signature


[PATCH 1/2] scsi: ufs: Use memset to initialize variable in ufshcd_crypto_keyslot_program

2020-10-01 Thread Pujin Shi
Clang warns:

drivers/scsi/ufs/ufshcd-crypto.c:62:8: warning: missing braces around 
initializer [-Wmissing-braces]
  union ufs_crypto_cfg_entry cfg = { 0 };
^

Signed-off-by: Pujin Shi 
---
 drivers/scsi/ufs/ufshcd-crypto.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c
index d2edbd960ebf..8fca2a40c517 100644
--- a/drivers/scsi/ufs/ufshcd-crypto.c
+++ b/drivers/scsi/ufs/ufshcd-crypto.c
@@ -59,9 +59,11 @@ static int ufshcd_crypto_keyslot_program(struct 
blk_keyslot_manager *ksm,
u8 data_unit_mask = key->crypto_cfg.data_unit_size / 512;
int i;
int cap_idx = -1;
-   union ufs_crypto_cfg_entry cfg = { 0 };
+   union ufs_crypto_cfg_entry cfg;
int err;
 
+   memset(, 0, sizeof(cfg));
+
BUILD_BUG_ON(UFS_CRYPTO_KEY_SIZE_INVALID != 0);
for (i = 0; i < hba->crypto_capabilities.num_crypto_cap; i++) {
if (ccap_array[i].algorithm_id == alg->ufs_alg &&
-- 
2.18.1



[PATCH 2/2] scsi: ufs: Use memset to initialize variable in ufshcd_clear_keyslot

2020-10-01 Thread Pujin Shi
Clang warns:

drivers/scsi/ufs/ufshcd-crypto.c:103:8: warning: missing braces around 
initializer [-Wmissing-braces]
  union ufs_crypto_cfg_entry cfg = { 0 };
^

Signed-off-by: Pujin Shi 
---
 drivers/scsi/ufs/ufshcd-crypto.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c
index 8fca2a40c517..bd439021ccce 100644
--- a/drivers/scsi/ufs/ufshcd-crypto.c
+++ b/drivers/scsi/ufs/ufshcd-crypto.c
@@ -103,6 +103,9 @@ static int ufshcd_clear_keyslot(struct ufs_hba *hba, int 
slot)
 * might not be sufficient, so just clear the entire cfg.
 */
union ufs_crypto_cfg_entry cfg = { 0 };
+   union ufs_crypto_cfg_entry cfg;
+
+   memset(, 0, sizeof(cfg));
 
return ufshcd_program_key(hba, , slot);
 }
-- 
2.18.1



Re: [REGRESSION] hwmon: (applesmc) avoid overlong udelay()

2020-10-01 Thread Guenter Roeck
On 10/1/20 3:22 PM, Andreas Kemnade wrote:
> On Wed, 30 Sep 2020 22:00:09 +0200
> Arnd Bergmann  wrote:
> 
>> On Wed, Sep 30, 2020 at 6:44 PM Guenter Roeck  wrote:
>>>
>>> On Wed, Sep 30, 2020 at 10:54:42AM +0200, Andreas Kemnade wrote:  
 Hi,

 after the $subject patch I get lots of errors like this:  
>>>
>>> For reference, this refers to commit fff2d0f701e6 ("hwmon: (applesmc)
>>> avoid overlong udelay()").
>>>  
 [  120.378614] applesmc: send_byte(0x00, 0x0300) fail: 0x40
 [  120.378621] applesmc: LKSB: write data fail
 [  120.512782] applesmc: send_byte(0x00, 0x0300) fail: 0x40
 [  120.512787] applesmc: LKSB: write data fail

 CPU sticks at low speed and no fan is turning on.
 Reverting this patch on top of 5.9-rc6 solves this problem.

 Some information from dmidecode:

 Base Board Information
 Manufacturer: Apple Inc.
 Product Name: Mac-7DF21CB3ED6977E5
 Version: MacBookAir6,2

 Handle 0x0020, DMI type 11, 5 bytes OEM Strings String 1: Apple 
 ROM Version.  Model:   …,
 Handle 0x0020, DMI type 11, 5 bytes
 OEM Strings
 String 1: Apple ROM Version.  Model:MBA61.  EFI Version:  
 122.0.0
 String 2: .0.0.  Built by: root@saumon.  Date: Wed Jun 
 10 18:
 String 3: 10:36 PDT 2020.  Revision: 122 (B).  ROM Version:  
 F000_B
 String 4: 00.  Build Type:   Official Build, Release.  Compiler:   
   Appl
 String 5: e clang version 3.0 (tags/Apple/clang-211.10.1) (based 
 on LLVM
 String 6: 3.0svn).

 Writing to things in /sys/devices/platform/applesmc.768 gives also the
 said errors.
 But writing 1 to fan1_maunal and 5000 to fan1_output turns the fan on
 despite error messages.
  
>>> Not really sure what to do here. I could revert the patch, but then we'd 
>>> gain
>>> clang compile failures. Arnd, any idea ?  
>>
>> It seems that either I made a mistake in the conversion and it sleeps for
>> less time than before, or my assumption was wrong that converting a delay to
>> a sleep is safe here.
>>
>> The error message indicates that the write fails, not the read, so that
>> is what I'd look at first. Right away I can see that the maximum time to
>> retry is only half of what it used to be, as we used to wait for
>> 0x10, 0x20, 0x40, 0x80, ..., 0x2 microseconds for a total of
>> 0x3fff0 microseconds (262ms), while my patch went with the 131ms
>> total delay based on the comment saying "/* wait up to 128 ms for a
>> status change. */".
>>
> Yes, that is also what I read from the code. I just thought there must
> be something simple, which just needs a short look from another pair of
> eyes.
> 
>> Since there is sleeping wait, I see no reason the timeout couldn't
>> be extended a lot, e.g. to a second, as in
>>
>> #define APPLESMC_MAX_WAIT 0x10
>>
>> If that doesn't work, I'd try using mdelay() in place of
>> usleep_range(), such as
>>
>>mdelay(DIV_ROUND_UP(us, USEC_PER_MSEC)));
>>
>> This adds back a really nasty latency, but it should avoid the
>> compile-time problem.
>>
>> Andreas, can you try those two things? (one at a time,
>> not both)
> 
> Ok, I tried. None of them works. I rechecked my work and created real
> git commits out of them and CONFIG_LOCALVERSION_AUTO is also set so
> the usual stupid things are rules out.
> In detail:
> On top of 5.9-rc6 + *reverted* patch:
> diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
> index fd99c9df8a00..2a9bd7f2b71b 100644
> --- a/drivers/hwmon/applesmc.c
> +++ b/drivers/hwmon/applesmc.c
> @@ -45,7 +45,7 @@
>  /* wait up to 128 ms for a status change. */
>  #define APPLESMC_MIN_WAIT0x0010
>  #define APPLESMC_RETRY_WAIT  0x0100
> -#define APPLESMC_MAX_WAIT0x2
> +#define APPLESMC_MAX_WAIT0x8000
>  
>  #define APPLESMC_READ_CMD0x10
>  #define APPLESMC_WRITE_CMD   0x11
> 

Oh man, that code is so badlys broken.

send_byte() repeats sending the data if it was not immediately successful.
That is done for both data and commands. Effectively that happens if
the command is not immediately accepted. However, send_argument()
clearly assumes that each data byte is sent exactly once. Sending
it more than once will mess up the key that is supposed to be sent.
The Apple SMC emulation code in qemu confirms that data bytes can not
be written more than once.

Of course, theoretically it may be that the first data byte was not
accepted (after all, the ACK bit is not set), but the ACK bit is
not checked again after udelay(APPLESMC_RETRY_WAIT), so it may
well have been set in the 256 uS between its check and re-writing
the data.

In other words, this entire code only works accidentally to start with.

If you like, you could play around with the code and find out if and
when exactly bit 1 (busy) is set, if and when bit 2 (ack) is set, and
if and when any other bit is set. 

  1   2   3   4   5   6   7   8   9   10   >