[PATCH kernel v3 5/6] powerpc/powernv: Rework TCE level allocation

2018-07-04 Thread Alexey Kardashevskiy
This moves actual pages allocation to a separate function which is going
to be reused later in on-demand TCE allocation.

While we are at it, remove unnecessary level size round up as the caller
does this already.

Reviewed-by: David Gibson 
Signed-off-by: Alexey Kardashevskiy 
---
 arch/powerpc/platforms/powernv/pci-ioda-tce.c | 30 +--
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c 
b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index f14b282..36c2eb0 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -31,6 +31,23 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
tbl->it_type = TCE_PCI;
 }
 
+static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
+{
+   struct page *tce_mem = NULL;
+   __be64 *addr;
+
+   tce_mem = alloc_pages_node(nid, GFP_KERNEL, shift - PAGE_SHIFT);
+   if (!tce_mem) {
+   pr_err("Failed to allocate a TCE memory, level shift=%d\n",
+   shift);
+   return NULL;
+   }
+   addr = page_address(tce_mem);
+   memset(addr, 0, 1UL << shift);
+
+   return addr;
+}
+
 static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx)
 {
__be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
@@ -165,21 +182,12 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int 
nid, unsigned int shift,
unsigned int levels, unsigned long limit,
unsigned long *current_offset, unsigned long *total_allocated)
 {
-   struct page *tce_mem = NULL;
__be64 *addr, *tmp;
-   unsigned int order = max_t(unsigned int, shift, PAGE_SHIFT) -
-   PAGE_SHIFT;
-   unsigned long allocated = 1UL << (order + PAGE_SHIFT);
+   unsigned long allocated = 1UL << shift;
unsigned int entries = 1UL << (shift - 3);
long i;
 
-   tce_mem = alloc_pages_node(nid, GFP_KERNEL, order);
-   if (!tce_mem) {
-   pr_err("Failed to allocate a TCE memory, order=%d\n", order);
-   return NULL;
-   }
-   addr = page_address(tce_mem);
-   memset(addr, 0, allocated);
+   addr = pnv_alloc_tce_level(nid, shift);
*total_allocated += allocated;
 
--levels;
-- 
2.11.0



[PATCH kernel v3 6/6] powerpc/powernv/ioda: Allocate indirect TCE levels on demand

2018-07-04 Thread Alexey Kardashevskiy
At the moment we allocate the entire TCE table, twice (hardware part and
userspace translation cache). This normally works as we normally have
contigous memory and the guest will map entire RAM for 64bit DMA.

However if we have sparse RAM (one example is a memory device), then
we will allocate TCEs which will never be used as the guest only maps
actual memory for DMA. If it is a single level TCE table, there is nothing
we can really do but if it a multilevel table, we can skip allocating
TCEs we know we won't need.

This adds ability to allocate only first level, saving memory.

This changes iommu_table::free() to avoid allocating of an extra level;
iommu_table::set() will do this when needed.

This adds @alloc parameter to iommu_table::exchange() to tell the callback
if it can allocate an extra level; the flag is set to "false" for
the realmode KVM handlers of H_PUT_TCE hcalls and the callback returns
H_TOO_HARD.

This still requires the entire table to be counted in mm::locked_vm.

To be conservative, this only does on-demand allocation when
the usespace cache table is requested which is the case of VFIO.

The example math for a system replicating a powernv setup with NVLink2
in a guest:
16GB RAM mapped at 0x0
128GB GPU RAM window (16GB of actual RAM) mapped at 0x2440

the table to cover that all with 64K pages takes:
(((0x2440 + 0x20) >> 16)*8)>>20 = 4556MB

If we allocate only necessary TCE levels, we will only need:
(((0x4 + 0x4) >> 16)*8)>>20 = 4MB (plus some for indirect
levels).

Signed-off-by: Alexey Kardashevskiy 
Reviewed-by: David Gibson 
---
Changes:
v2:
* fixed bug in cleanup path which forced the entire table to be
allocated right before destroying
* added memory allocation error handling pnv_tce()
---
 arch/powerpc/include/asm/iommu.h  |  7 ++-
 arch/powerpc/platforms/powernv/pci.h  |  6 ++-
 arch/powerpc/kvm/book3s_64_vio_hv.c   |  4 +-
 arch/powerpc/platforms/powernv/pci-ioda-tce.c | 73 +--
 arch/powerpc/platforms/powernv/pci-ioda.c |  8 +--
 drivers/vfio/vfio_iommu_spapr_tce.c   |  2 +-
 6 files changed, 73 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 4bdcf22..daa3ee5 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -70,7 +70,7 @@ struct iommu_table_ops {
unsigned long *hpa,
enum dma_data_direction *direction);
 
-   __be64 *(*useraddrptr)(struct iommu_table *tbl, long index);
+   __be64 *(*useraddrptr)(struct iommu_table *tbl, long index, bool alloc);
 #endif
void (*clear)(struct iommu_table *tbl,
long index, long npages);
@@ -122,10 +122,13 @@ struct iommu_table {
__be64 *it_userspace; /* userspace view of the table */
struct iommu_table_ops *it_ops;
struct krefit_kref;
+   int it_nid;
 };
 
+#define IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry) \
+   ((tbl)->it_ops->useraddrptr((tbl), (entry), false))
 #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
-   ((tbl)->it_ops->useraddrptr((tbl), (entry)))
+   ((tbl)->it_ops->useraddrptr((tbl), (entry), true))
 
 /* Pure 2^n version of get_order */
 static inline __attribute_const__
diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index 2962f6d..0020937 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -266,8 +266,10 @@ extern int pnv_tce_build(struct iommu_table *tbl, long 
index, long npages,
unsigned long attrs);
 extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
 extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
-   unsigned long *hpa, enum dma_data_direction *direction);
-extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index);
+   unsigned long *hpa, enum dma_data_direction *direction,
+   bool alloc);
+extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index,
+   bool alloc);
 extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
 
 extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c 
b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 8cc1caf..efb90d8 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -200,7 +200,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
 {
struct mm_iommu_table_group_mem_t *mem = NULL;
const unsigned long pgsize = 1ULL << tbl->it_page_shift;
-   __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+   __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
 
if (!pua)
/* it_userspace allocation might be delayed */
@@ -264,7 +264,7 @@ static long 

[PATCH kernel v3 3/6] KVM: PPC: Make iommu_table::it_userspace big endian

2018-07-04 Thread Alexey Kardashevskiy
We are going to reuse multilevel TCE code for the userspace copy of
the TCE table and since it is big endian, let's make the copy big endian
too.

Reviewed-by: David Gibson 
Signed-off-by: Alexey Kardashevskiy 
---
 arch/powerpc/include/asm/iommu.h|  2 +-
 arch/powerpc/kvm/book3s_64_vio.c| 11 ++-
 arch/powerpc/kvm/book3s_64_vio_hv.c | 10 +-
 drivers/vfio/vfio_iommu_spapr_tce.c | 19 +--
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 20febe0..803ac70 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -117,7 +117,7 @@ struct iommu_table {
unsigned long *it_map;   /* A simple allocation bitmap for now */
unsigned long  it_page_shift;/* table iommu page size */
struct list_head it_group_list;/* List of iommu_table_group_link */
-   unsigned long *it_userspace; /* userspace view of the table */
+   __be64 *it_userspace; /* userspace view of the table */
struct iommu_table_ops *it_ops;
struct krefit_kref;
 };
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 8167ce8..6f34edd 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -377,19 +377,19 @@ static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
 {
struct mm_iommu_table_group_mem_t *mem = NULL;
const unsigned long pgsize = 1ULL << tbl->it_page_shift;
-   unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+   __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
 
if (!pua)
/* it_userspace allocation might be delayed */
return H_TOO_HARD;
 
-   mem = mm_iommu_lookup(kvm->mm, *pua, pgsize);
+   mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize);
if (!mem)
return H_TOO_HARD;
 
mm_iommu_mapped_dec(mem);
 
-   *pua = 0;
+   *pua = cpu_to_be64(0);
 
return H_SUCCESS;
 }
@@ -436,7 +436,8 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct 
iommu_table *tbl,
enum dma_data_direction dir)
 {
long ret;
-   unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+   unsigned long hpa;
+   __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
struct mm_iommu_table_group_mem_t *mem;
 
if (!pua)
@@ -463,7 +464,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct 
iommu_table *tbl,
if (dir != DMA_NONE)
kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
 
-   *pua = ua;
+   *pua = cpu_to_be64(ua);
 
return 0;
 }
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c 
b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 5b298f5..841aef7 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -200,7 +200,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
 {
struct mm_iommu_table_group_mem_t *mem = NULL;
const unsigned long pgsize = 1ULL << tbl->it_page_shift;
-   unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+   __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
 
if (!pua)
/* it_userspace allocation might be delayed */
@@ -210,13 +210,13 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm 
*kvm,
if (WARN_ON_ONCE_RM(!pua))
return H_HARDWARE;
 
-   mem = mm_iommu_lookup_rm(kvm->mm, *pua, pgsize);
+   mem = mm_iommu_lookup_rm(kvm->mm, be64_to_cpu(*pua), pgsize);
if (!mem)
return H_TOO_HARD;
 
mm_iommu_mapped_dec(mem);
 
-   *pua = 0;
+   *pua = cpu_to_be64(0);
 
return H_SUCCESS;
 }
@@ -268,7 +268,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, 
struct iommu_table *tbl,
 {
long ret;
unsigned long hpa = 0;
-   unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+   __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
struct mm_iommu_table_group_mem_t *mem;
 
if (!pua)
@@ -303,7 +303,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, 
struct iommu_table *tbl,
if (dir != DMA_NONE)
kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
 
-   *pua = ua;
+   *pua = cpu_to_be64(ua);
 
return 0;
 }
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
b/drivers/vfio/vfio_iommu_spapr_tce.c
index 7cd63b0..17a418c 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -230,7 +230,7 @@ static long tce_iommu_userspace_view_alloc(struct 
iommu_table *tbl,
decrement_locked_vm(mm, cb >> PAGE_SHIFT);
return -ENOMEM;
}
-   tbl->it_userspace = uas;
+   tbl->it_userspace = (__be64 *) uas;
 
return 0;
 }
@@ -482,20 +482,20 @@ static void tce_iommu_unuse_page_v2(struct 

Re: [PATCH 2/3] powerpc/powernv: DMA operations for discontiguous allocation

2018-07-04 Thread Russell Currey
On Fri, 2018-06-29 at 17:34 +1000, Russell Currey wrote:



> + /*
> +  * The TCE isn't being used, so let's try and
> allocate it.
> +  * Bits 0 and 1 are read/write, and we use bit 2 as
> a "lock"
> +  * bit.  This is to prevent any race where the value
> is set in
> +  * the TCE table but the invalidate/mb() hasn't
> finished yet.
> +  */
> + entry = cpu_to_be64((addr - offset) | 7);
> + ret = cmpxchg(>tces[i], tce, entry);
> + if (ret != tce) {
> + /* conflict, start looking again just in
> case */
> + i--;
> + continue;
> + }
> + pnv_pci_phb3_tce_invalidate(pe, 0, 0, addr - offset,
> 1);

This is wrong and won't work outside of PHB3, will make a generic
handler

> + mb();
> + /* clear the lock bit now that we know it's active
> */
> + ret = cmpxchg(>tces[i], entry, cpu_to_be64((addr
> - offset) | 3));
> + if (ret != entry) {
> + /* conflict, start looking again just in
> case */
> + i--;
> + continue;
> + }
> +
> + return (i << phb->ioda.max_tce_order) | offset;
> + }
> + /* If we get here, the table must be full, so error out. */
> + return -1ULL;
> +}
> +

qtpass.desktop
Description: application/desktop


[PATCH kernel v3 0/6] powerpc/powernv/iommu: Optimize memory use

2018-07-04 Thread Alexey Kardashevskiy


This patchset aims to reduce actual memory use for guests with
sparse memory. The pseries guest uses dynamic DMA windows to map
the entire guest RAM but it only actually maps onlined memory
which may be not be contiguous. I hit this when tried passing
through NVLink2-connected GPU RAM of NVIDIA V100 and trying to
map this RAM at the same offset as in the real hardware
forced me to rework I handle these windows.

This moves userspace-to-host-physical translation table
(iommu_table::it_userspace) from VFIO TCE IOMMU subdriver to
the platform code and reuses the already existing multilevel
TCE table code which we have for the hardware tables.
At last in 6/6 I switch to on-demand allocation so we do not
allocate huge chunks of the table if we do not have to;
there is some math in 6/6.

Changes:
v3:
* rebased on v4.18-rc3 and fixed compile error in 6/6

v2:
* bugfix and error handling in 6/6


This is based on sha1
021c917 Linus Torvalds "Linux 4.18-rc3".

Please comment. Thanks.



Alexey Kardashevskiy (6):
  powerpc/powernv: Remove useless wrapper
  powerpc/powernv: Move TCE manupulation code to its own file
  KVM: PPC: Make iommu_table::it_userspace big endian
  powerpc/powernv: Add indirect levels to it_userspace
  powerpc/powernv: Rework TCE level allocation
  powerpc/powernv/ioda: Allocate indirect TCE levels on demand

 arch/powerpc/platforms/powernv/Makefile   |   2 +-
 arch/powerpc/include/asm/iommu.h  |  11 +-
 arch/powerpc/platforms/powernv/pci.h  |  44 ++-
 arch/powerpc/kvm/book3s_64_vio.c  |  11 +-
 arch/powerpc/kvm/book3s_64_vio_hv.c   |  18 +-
 arch/powerpc/platforms/powernv/pci-ioda-tce.c | 399 ++
 arch/powerpc/platforms/powernv/pci-ioda.c | 184 ++--
 arch/powerpc/platforms/powernv/pci.c  | 158 --
 drivers/vfio/vfio_iommu_spapr_tce.c   |  65 +
 9 files changed, 478 insertions(+), 414 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/pci-ioda-tce.c

-- 
2.11.0



[PATCH kernel v3 1/6] powerpc/powernv: Remove useless wrapper

2018-07-04 Thread Alexey Kardashevskiy
This gets rid of a useless wrapper around
pnv_pci_ioda2_table_free_pages().

Reviewed-by: David Gibson 
Signed-off-by: Alexey Kardashevskiy 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index cc5942d..02275a0 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2199,11 +2199,6 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, 
long index,
pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
 }
 
-static void pnv_ioda2_table_free(struct iommu_table *tbl)
-{
-   pnv_pci_ioda2_table_free_pages(tbl);
-}
-
 static struct iommu_table_ops pnv_ioda2_iommu_ops = {
.set = pnv_ioda2_tce_build,
 #ifdef CONFIG_IOMMU_API
@@ -2212,7 +2207,7 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = {
 #endif
.clear = pnv_ioda2_tce_free,
.get = pnv_tce_get,
-   .free = pnv_ioda2_table_free,
+   .free = pnv_pci_ioda2_table_free_pages,
 };
 
 static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data)
-- 
2.11.0



[PATCH kernel v3 4/6] powerpc/powernv: Add indirect levels to it_userspace

2018-07-04 Thread Alexey Kardashevskiy
We want to support sparse memory and therefore huge chunks of DMA windows
do not need to be mapped. If a DMA window big enough to require 2 or more
indirect levels, and a DMA window is used to map all RAM (which is
a default case for 64bit window), we can actually save some memory by
not allocation TCE for regions which we are not going to map anyway.

The hardware tables alreary support indirect levels but we also keep
host-physical-to-userspace translation array which is allocated by
vmalloc() and is a flat array which might use quite some memory.

This converts it_userspace from vmalloc'ed array to a multi level table.

As the format becomes platform dependend, this replaces the direct access
to it_usespace with a iommu_table_ops::useraddrptr hook which returns
a pointer to the userspace copy of a TCE; future extension will return
NULL if the level was not allocated.

This should not change non-KVM handling of TCE tables and it_userspace
will not be allocated for non-KVM tables.

Reviewed-by: David Gibson 
Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v2:
* fixed compile error by ditching one inline helper
---
 arch/powerpc/include/asm/iommu.h  |  6 +--
 arch/powerpc/platforms/powernv/pci.h  |  3 +-
 arch/powerpc/kvm/book3s_64_vio_hv.c   |  8 
 arch/powerpc/platforms/powernv/pci-ioda-tce.c | 65 +--
 arch/powerpc/platforms/powernv/pci-ioda.c | 23 +++---
 drivers/vfio/vfio_iommu_spapr_tce.c   | 46 ---
 6 files changed, 73 insertions(+), 78 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 803ac70..4bdcf22 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -69,6 +69,8 @@ struct iommu_table_ops {
long index,
unsigned long *hpa,
enum dma_data_direction *direction);
+
+   __be64 *(*useraddrptr)(struct iommu_table *tbl, long index);
 #endif
void (*clear)(struct iommu_table *tbl,
long index, long npages);
@@ -123,9 +125,7 @@ struct iommu_table {
 };
 
 #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
-   ((tbl)->it_userspace ? \
-   &((tbl)->it_userspace[(entry) - (tbl)->it_offset]) : \
-   NULL)
+   ((tbl)->it_ops->useraddrptr((tbl), (entry)))
 
 /* Pure 2^n version of get_order */
 static inline __attribute_const__
diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index fa90f60..2962f6d 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -267,11 +267,12 @@ extern int pnv_tce_build(struct iommu_table *tbl, long 
index, long npages,
 extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
 extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction);
+extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index);
 extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
 
 extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
__u32 page_shift, __u64 window_size, __u32 levels,
-   struct iommu_table *tbl);
+   bool alloc_userspace_copy, struct iommu_table *tbl);
 extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
 
 extern long pnv_pci_link_table_and_group(int node, int num,
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c 
b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 841aef7..8cc1caf 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -206,10 +206,6 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
/* it_userspace allocation might be delayed */
return H_TOO_HARD;
 
-   pua = (void *) vmalloc_to_phys(pua);
-   if (WARN_ON_ONCE_RM(!pua))
-   return H_HARDWARE;
-
mem = mm_iommu_lookup_rm(kvm->mm, be64_to_cpu(*pua), pgsize);
if (!mem)
return H_TOO_HARD;
@@ -283,10 +279,6 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, 
struct iommu_table *tbl,
)))
return H_HARDWARE;
 
-   pua = (void *) vmalloc_to_phys(pua);
-   if (WARN_ON_ONCE_RM(!pua))
-   return H_HARDWARE;
-
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
return H_CLOSED;
 
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c 
b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index 700ceb1..f14b282 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -31,9 +31,9 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
tbl->it_type = TCE_PCI;
 }
 
-static __be64 *pnv_tce(struct iommu_table *tbl, long idx)
+static __be64 *pnv_tce(struct 

[PATCH kernel v3 2/6] powerpc/powernv: Move TCE manupulation code to its own file

2018-07-04 Thread Alexey Kardashevskiy
Right now we have allocation code in pci-ioda.c and traversing code in
pci.c, let's keep them toghether. However both files are big enough
already so let's move this business to a new file.

While we at it, move the code which links IOMMU table groups to
IOMMU tables as it is not specific to any PNV PHB model.

These puts exported symbols from the new file together.

This fixes several warnings from checkpatch.pl like this:
"WARNING: Prefer 'unsigned int' to bare use of 'unsigned'".

As this is almost cut-n-paste, there should be no behavioral change.

Reviewed-by: David Gibson 
Signed-off-by: Alexey Kardashevskiy 
---
 arch/powerpc/platforms/powernv/Makefile   |   2 +-
 arch/powerpc/platforms/powernv/pci.h  |  41 ++--
 arch/powerpc/platforms/powernv/pci-ioda-tce.c | 313 ++
 arch/powerpc/platforms/powernv/pci-ioda.c | 146 
 arch/powerpc/platforms/powernv/pci.c  | 158 -
 5 files changed, 340 insertions(+), 320 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/pci-ioda-tce.c

diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index 703a350..b540ce8e 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -6,7 +6,7 @@ obj-y   += opal-msglog.o opal-hmi.o 
opal-power.o opal-irqchip.o
 obj-y  += opal-kmsg.o opal-powercap.o opal-psr.o 
opal-sensor-groups.o
 
 obj-$(CONFIG_SMP)  += smp.o subcore.o subcore-asm.o
-obj-$(CONFIG_PCI)  += pci.o pci-ioda.o npu-dma.o
+obj-$(CONFIG_PCI)  += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
 obj-$(CONFIG_CXL_BASE) += pci-cxl.o
 obj-$(CONFIG_EEH)  += eeh-powernv.o
 obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index eada4b6..fa90f60 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -201,13 +201,6 @@ struct pnv_phb {
 };
 
 extern struct pci_ops pnv_pci_ops;
-extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
-   unsigned long uaddr, enum dma_data_direction direction,
-   unsigned long attrs);
-extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
-extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
-   unsigned long *hpa, enum dma_data_direction *direction);
-extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
 
 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
unsigned char *log_buff);
@@ -217,14 +210,6 @@ int pnv_pci_cfg_write(struct pci_dn *pdn,
  int where, int size, u32 val);
 extern struct iommu_table *pnv_pci_table_alloc(int nid);
 
-extern long pnv_pci_link_table_and_group(int node, int num,
-   struct iommu_table *tbl,
-   struct iommu_table_group *table_group);
-extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
-   struct iommu_table_group *table_group);
-extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
- void *tce_mem, u64 tce_size,
- u64 dma_offset, unsigned page_shift);
 extern void pnv_pci_init_ioda_hub(struct device_node *np);
 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
 extern void pnv_pci_init_npu_phb(struct device_node *np);
@@ -272,4 +257,30 @@ extern void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev 
*pdev);
 /* phb ops (cxl switches these when enabling the kernel api on the phb) */
 extern const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops;
 
+/* pci-ioda-tce.c */
+#define POWERNV_IOMMU_DEFAULT_LEVELS   1
+#define POWERNV_IOMMU_MAX_LEVELS   5
+
+extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+   unsigned long uaddr, enum dma_data_direction direction,
+   unsigned long attrs);
+extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
+extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
+   unsigned long *hpa, enum dma_data_direction *direction);
+extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
+
+extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+   __u32 page_shift, __u64 window_size, __u32 levels,
+   struct iommu_table *tbl);
+extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
+
+extern long pnv_pci_link_table_and_group(int node, int num,
+   struct iommu_table *tbl,
+   struct iommu_table_group *table_group);
+extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+   struct iommu_table_group *table_group);
+extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+   void *tce_mem, u64 tce_size,
+   u64 dma_offset, 

Re: [PATCH v2 2/2] hwmon: ibmpowernv: Add attributes to enable/disable sensor groups

2018-07-04 Thread Guenter Roeck

On 07/04/2018 09:53 AM, Shilpasri G Bhat wrote:

Hi Guenter,

Thanks for reviewing the patch.
On 07/04/2018 08:16 PM, Guenter Roeck wrote:

+/* Disable if last sensor in the group */
+send_command = true;
+for (i = 0; i < sg->nr_sensor; i++) {
+struct sensor_data *sd = sg->sensors[i];
+
+if (sd->enable) {
+send_command = false;
+break;
+}


This is weird. So there are situations where a request to disable
a sensor is accepted, but effectively ignored ? Shouldn't that
return, say, -EBUSY ?


This is because we do not support per-sensor enable/disable. We can only
enable/disable at a sensor-group level.

This patch follows the semantic to disable a sensor group iff all the sensors
belonging to that group have been disabled. Otherwise the sensor alone is marked
to be disabled and returns -ENODATA on reading it.

And a sensor group will be enabled if any of the sensor in that group is 
enabled.



In similar situations, where setting one attribute affects others, a common 
solution
is to make only the first attribute writable and have it affect all the others.
I think that would make sense here as well, and it would be much simpler to 
implement.

Guenter


I will make changes to the remaining code according to your suggestion.

Thanks and Regards,
Shilpa

--
To unsubscribe from this list: send the line "unsubscribe linux-hwmon" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html





Re: [PATCHv3 3/4] drivers/base: clean up the usage of devices_kset_move_last()

2018-07-04 Thread Pingfan Liu
On Wed, Jul 4, 2018 at 6:18 PM Rafael J. Wysocki  wrote:
>
> On Wednesday, July 4, 2018 6:40:09 AM CEST Pingfan Liu wrote:
> > On Tue, Jul 3, 2018 at 10:28 PM Rafael J. Wysocki  
> > wrote:
> > >
> > > On Tuesday, July 3, 2018 8:50:41 AM CEST Pingfan Liu wrote:
> > > > Clean up the referring to the code in commit 52cdbdd49853 ("driver core:
> > > > correct device's shutdown order"). So later we can revert it safely.
> > > >
> > > > Cc: Greg Kroah-Hartman 
> > > > Cc: Rafael J. Wysocki 
> > > > Cc: Grygorii Strashko 
> > > > Cc: Christoph Hellwig 
> > > > Cc: Bjorn Helgaas 
> > > > Cc: Dave Young 
> > > > Cc: linux-...@vger.kernel.org
> > > > Cc: linuxppc-dev@lists.ozlabs.org
> > > > Signed-off-by: Pingfan Liu 
> > > > ---
> > > >  drivers/base/core.c | 7 ---
> > > >  1 file changed, 7 deletions(-)
> > > >
> > > > diff --git a/drivers/base/core.c b/drivers/base/core.c
> > > > index 684b994..db3deb8 100644
> > > > --- a/drivers/base/core.c
> > > > +++ b/drivers/base/core.c
> > > > @@ -127,13 +127,6 @@ static int device_reorder_to_tail(struct device 
> > > > *dev, void *not_used)
> > > >  {
> > > >   struct device_link *link;
> > > >
> > > > - /*
> > > > -  * Devices that have not been registered yet will be put to the 
> > > > ends
> > > > -  * of the lists during the registration, so skip them here.
> > > > -  */
> > > > - if (device_is_registered(dev))
> > > > - devices_kset_move_last(dev);
> > > > -
> > > >   if (device_pm_initialized(dev))
> > > >   device_pm_move_last(dev);
> > >
> > > You can't do this.
> > >
> > > If you do it, that will break power management in some situations.
> > >
> > Could you shed light on it? I had a quick browsing of pm code, but it
> > is a big function, and I got lost in it.
> > If the above code causes failure, then does it imply that the seq in
> > devices_kset should be the same as dpm_list?
>
> Generally, yes it should.
>
> > But in device_shutdown(), it only intersect with pm by
> > pm_runtime_get_noresume(dev) and pm_runtime_barrier(dev). How do these
> > function affect the seq in dpm_list?
>
> They are not related to dpm_list directly.
>
> However, if you shut down a supplier device before its consumer and that
> involves power management, then the consumer shutdown may fail and lock up
> the system
>
Ah, get your point. The patch in this series "[PATCHv3 2/4]
drivers/base: utilize device tree info to shutdown devices" still obey
the shutdown order "parent<-child" and "supplier<-consumer". It just
utilizes device-tree info to achieve this, since it turns out not easy
to maintain such order in devices_kset. As I described in the commit
log of [2/4], it needs two nested recursion, and should consider the
breakage of devices_kset's spinlock.

> I asked you elsewhere to clearly describe the problem you are trying to
> address.  Please do that in the first place.
>
OK, I will reply your question in [0/4]

Thanks,
Pingfan


Re: [PATCHv3 0/4] drivers/base: bugfix for supplier<-consumer ordering in device_kset

2018-07-04 Thread Pingfan Liu
On Wed, Jul 4, 2018 at 6:23 PM Rafael J. Wysocki  wrote:
>
> On Wednesday, July 4, 2018 4:47:07 AM CEST Pingfan Liu wrote:
> > On Tue, Jul 3, 2018 at 10:36 PM Rafael J. Wysocki  
> > wrote:
> > >
> > > On Tuesday, July 3, 2018 8:50:38 AM CEST Pingfan Liu wrote:
> > > > commit 52cdbdd49853 ("driver core: correct device's shutdown order")
> > > > places an assumption of supplier<-consumer order on the process of 
> > > > probe.
> > > > But it turns out to break down the parent <- child order in some scene.
> > > > E.g in pci, a bridge is enabled by pci core, and behind it, the devices
> > > > have been probed. Then comes the bridge's module, which enables extra
> > > > feature(such as hotplug) on this bridge.
> > >
> > > So what *exactly* does happen in that case?
> > >
> > I saw the  shpc_probe() is called on the bridge, although the probing
> > failed on that bare-metal. But if it success, then it will enable the
> > hotplug feature on the bridge.
>
> I don't understand what you are saying here, sorry.
>
On the system, I observe the following:
[2.114986] devices_kset: Moving 0004:00:00.0 to end of list
<---pcie port drive's probe, but it failed
[2.115192] devices_kset: Moving 0004:01:00.0 to end of list
[2.115591] devices_kset: Moving 0004:02:02.0 to end of list
[2.115923] devices_kset: Moving 0004:02:0a.0 to end of list
[2.116141] devices_kset: Moving 0004:02:0b.0 to end of list
[2.116358] devices_kset: Moving 0004:02:0c.0 to end of list
[3.181860] devices_kset: Moving 0004:03:00.0 to end of list
<---the ata disk controller which sits behind the bridge
[   10.267081] devices_kset: Moving 0004:00:00.0 to end of list
 <---shpc_probe() on this bridge, failed too.

As you can the the parent device "0004:00:00.0" is moved twice, and
finally, it is after the "0004:03:00.0", this will break the
"parent<-child" order in devices_kset. This is caused by the code
really_probe()->devices_kset_move_last(). Apparently, it makes
assumption that child device's probing comes after its parent's. But
it does not stand up in the case.

> device_reorder_to_tail() walks the entire device hierarchy below the target
> and moves all of the children in there *after* their parents.
>
As described, the bug is not related with device_reorder_to_tail(), it
is related with really_probe()->devices_kset_move_last(). So [2/4]
uses different method to achieve the "parent<-child" and
"supplier<-consumer" order. The [3/4] clean up some code in
device_reorder_to_tail(), since I need to revert the commit.

> How can it break "the parent <- child order" then?
>
As described, it does not, just not be in use any longer.

Thanks and regards,
Pingfan


Re: [PATCH kernel v3 2/2] KVM: PPC: Check if IOMMU page is contained in the pinned physical page

2018-07-04 Thread David Gibson
On Wed, Jul 04, 2018 at 03:00:52PM +1000, Alexey Kardashevskiy wrote:
> A VM which has:
>  - a DMA capable device passed through to it (eg. network card);
>  - running a malicious kernel that ignores H_PUT_TCE failure;
>  - capability of using IOMMU pages bigger that physical pages
> can create an IOMMU mapping that exposes (for example) 16MB of
> the host physical memory to the device when only 64K was allocated to the VM.
> 
> The remaining 16MB - 64K will be some other content of host memory, possibly
> including pages of the VM, but also pages of host kernel memory, host
> programs or other VMs.
> 
> The attacking VM does not control the location of the page it can map,
> and is only allowed to map as many pages as it has pages of RAM.
> 
> We already have a check in drivers/vfio/vfio_iommu_spapr_tce.c that
> an IOMMU page is contained in the physical page so the PCI hardware won't
> get access to unassigned host memory; however this check is missing in
> the KVM fastpath (H_PUT_TCE accelerated code). We were lucky so far and
> did not hit this yet as the very first time when the mapping happens
> we do not have tbl::it_userspace allocated yet and fall back to
> the userspace which in turn calls VFIO IOMMU driver, this fails and
> the guest does not retry,
> 
> This stores the smallest preregistered page size in the preregistered
> region descriptor and changes the mm_iommu_xxx API to check this against
> the IOMMU page size. This only allows huge pages use if the entire
> preregistered block is backed with huge pages which are completely
> contained the preregistered chunk; otherwise this defaults to PAGE_SIZE.
> 
> Signed-off-by: Alexey Kardashevskiy 

Reviewed-by: David Gibson 

On the grounds that I think this version is safe, which the old one
wasn't.  However it still has some flaws..

[snip]
> @@ -125,7 +126,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, 
> unsigned long entries,
>  {
>   struct mm_iommu_table_group_mem_t *mem;
>   long i, j, ret = 0, locked_entries = 0;
> - struct page *page = NULL;
> + unsigned int pageshift;
> + struct page *page = NULL, *head = NULL;
>  
>   mutex_lock(_list_mutex);
>  
> @@ -159,6 +161,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, 
> unsigned long entries,
>   goto unlock_exit;
>   }
>  
> + mem->pageshift = 64;
>   mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
>   if (!mem->hpas) {
>   kfree(mem);
> @@ -199,9 +202,35 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long 
> ua, unsigned long entries,
>   }
>   }
>  populate:
> + pageshift = PAGE_SHIFT;
> + if (PageCompound(page)) {
> + /* Make sure huge page is contained completely */
> + struct page *tmphead = compound_head(page);
> + unsigned int n = compound_order(tmphead);
> +
> + if (!head) {
> + /* Is it a head of a huge page? */
> + if (page == tmphead) {
> + head = tmphead;
> + pageshift += n;
> + }
> + } else if (head == tmphead) {
> + /* Still same huge page, good */
> + pageshift += n;
> +
> + /* End of the huge page */
> + if (page - head == (1UL << n) - 1)
> + head = NULL;
> + }
> + }
> + mem->pageshift = min(mem->pageshift, pageshift);
>   mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
>   }
>  
> + /* We have an incomplete huge page, default to PAGE_SHIFT */
> + if (head)
> + mem->pageshift = PAGE_SHIFT;
> +

So, if the user attempts to prereg a region which starts or ends in
the middle of a hugepage, this logic will clamp the region's max page
shift down to PAGE_SHIFT.  That's safe, but not optimal.

Suppose userspace had an area backed with 16MiB hugepages, and wanted
to pre-reg a window that was 2MiB aligned, but not 16MiB aligned.  It
would still be safe to allow 2MiB TCEs, but the code above would clamp
it down to 64kiB (or 4kiB).

The code to do it is also pretty convoluted.

I think you'd be better off initializing mem->pageshift to the largest
possible natural alignment of the region:
mem->pageshift = ctz64(ua | (entries << PAGE_SHIFT));

Then it should just be sufficient to clamp pageshift down to
compound_order() + PAGE_SHIFT for each entry.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH kernel v3 2/2] KVM: PPC: Check if IOMMU page is contained in the pinned physical page

2018-07-04 Thread Alexey Kardashevskiy
On Thu, 5 Jul 2018 12:42:20 +1000
David Gibson  wrote:

> On Wed, Jul 04, 2018 at 03:00:52PM +1000, Alexey Kardashevskiy wrote:
> > A VM which has:
> >  - a DMA capable device passed through to it (eg. network card);
> >  - running a malicious kernel that ignores H_PUT_TCE failure;
> >  - capability of using IOMMU pages bigger that physical pages
> > can create an IOMMU mapping that exposes (for example) 16MB of
> > the host physical memory to the device when only 64K was allocated to the 
> > VM.
> > 
> > The remaining 16MB - 64K will be some other content of host memory, possibly
> > including pages of the VM, but also pages of host kernel memory, host
> > programs or other VMs.
> > 
> > The attacking VM does not control the location of the page it can map,
> > and is only allowed to map as many pages as it has pages of RAM.
> > 
> > We already have a check in drivers/vfio/vfio_iommu_spapr_tce.c that
> > an IOMMU page is contained in the physical page so the PCI hardware won't
> > get access to unassigned host memory; however this check is missing in
> > the KVM fastpath (H_PUT_TCE accelerated code). We were lucky so far and
> > did not hit this yet as the very first time when the mapping happens
> > we do not have tbl::it_userspace allocated yet and fall back to
> > the userspace which in turn calls VFIO IOMMU driver, this fails and
> > the guest does not retry,
> > 
> > This stores the smallest preregistered page size in the preregistered
> > region descriptor and changes the mm_iommu_xxx API to check this against
> > the IOMMU page size. This only allows huge pages use if the entire
> > preregistered block is backed with huge pages which are completely
> > contained the preregistered chunk; otherwise this defaults to PAGE_SIZE.
> > 
> > Signed-off-by: Alexey Kardashevskiy   
> 
> Reviewed-by: David Gibson 
> 
> On the grounds that I think this version is safe, which the old one
> wasn't.  However it still has some flaws..
> 
> [snip]
> > @@ -125,7 +126,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long 
> > ua, unsigned long entries,
> >  {
> > struct mm_iommu_table_group_mem_t *mem;
> > long i, j, ret = 0, locked_entries = 0;
> > -   struct page *page = NULL;
> > +   unsigned int pageshift;
> > +   struct page *page = NULL, *head = NULL;
> >  
> > mutex_lock(_list_mutex);
> >  
> > @@ -159,6 +161,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long 
> > ua, unsigned long entries,
> > goto unlock_exit;
> > }
> >  
> > +   mem->pageshift = 64;
> > mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
> > if (!mem->hpas) {
> > kfree(mem);
> > @@ -199,9 +202,35 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long 
> > ua, unsigned long entries,
> > }
> > }
> >  populate:
> > +   pageshift = PAGE_SHIFT;
> > +   if (PageCompound(page)) {
> > +   /* Make sure huge page is contained completely */
> > +   struct page *tmphead = compound_head(page);
> > +   unsigned int n = compound_order(tmphead);
> > +
> > +   if (!head) {
> > +   /* Is it a head of a huge page? */
> > +   if (page == tmphead) {
> > +   head = tmphead;
> > +   pageshift += n;
> > +   }
> > +   } else if (head == tmphead) {
> > +   /* Still same huge page, good */
> > +   pageshift += n;
> > +
> > +   /* End of the huge page */
> > +   if (page - head == (1UL << n) - 1)
> > +   head = NULL;
> > +   }
> > +   }
> > +   mem->pageshift = min(mem->pageshift, pageshift);
> > mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
> > }
> >  
> > +   /* We have an incomplete huge page, default to PAGE_SHIFT */
> > +   if (head)
> > +   mem->pageshift = PAGE_SHIFT;
> > +  
> 
> So, if the user attempts to prereg a region which starts or ends in
> the middle of a hugepage, this logic will clamp the region's max page
> shift down to PAGE_SHIFT.  That's safe, but not optimal.
> 
> Suppose userspace had an area backed with 16MiB hugepages, and wanted
> to pre-reg a window that was 2MiB aligned, but not 16MiB aligned.  It
> would still be safe to allow 2MiB TCEs, but the code above would clamp
> it down to 64kiB (or 4kiB).
> 
> The code to do it is also pretty convoluted.
> 
> I think you'd be better off initializing mem->pageshift to the largest
> possible natural alignment of the region:
>   mem->pageshift = ctz64(ua | (entries << PAGE_SHIFT));
> 
> Then it should just be sufficient to clamp pageshift down to
> compound_order() + PAGE_SHIFT for each entry.


I like this better, just one question - does hugetlbfs guarantee the @ua
alignment 

Re: [PATCH] cxl: Fix wrong comparison in cxl_adapter_context_get()

2018-07-04 Thread Andrew Donnellan

On 05/07/18 01:28, Vaibhav Jain wrote:

Function atomic_inc_unless_negative() returns a bool to indicate
success/failure. However cxl_adapter_context_get() wrongly compares
the return value against '>=0' which will always be true. The patch
fixes this comparison to '==0' there by also fixing this compile time
warning:

drivers/misc/cxl/main.c:290 cxl_adapter_context_get()
warn: 'atomic_inc_unless_negative(>contexts_num)' is unsigned

Cc: sta...@vger.kernel.org
Fixes: 70b565bbdb91 ("cxl: Prevent adapter reset if an active context exists")
Reported-by: Dan Carpenter 
Signed-off-by: Vaibhav Jain 


Acked-by: Andrew Donnellan 

--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited



[PATCH 02/11] hugetlb: Introduce generic version of hugetlb_free_pgd_range

2018-07-04 Thread Alexandre Ghiti
arm, arm64, mips, parisc, sh, x86 architectures use the
same version of hugetlb_free_pgd_range, so move this generic
implementation into asm-generic/hugetlb.h.

Signed-off-by: Alexandre Ghiti 
---
 arch/arm/include/asm/hugetlb.h | 12 ++--
 arch/arm64/include/asm/hugetlb.h   | 10 --
 arch/ia64/include/asm/hugetlb.h|  5 +++--
 arch/mips/include/asm/hugetlb.h| 13 ++---
 arch/parisc/include/asm/hugetlb.h  | 12 ++--
 arch/powerpc/include/asm/hugetlb.h |  4 +++-
 arch/sh/include/asm/hugetlb.h  | 12 ++--
 arch/sparc/include/asm/hugetlb.h   |  4 +++-
 arch/x86/include/asm/hugetlb.h | 11 ++-
 include/asm-generic/hugetlb.h  | 11 +++
 10 files changed, 30 insertions(+), 64 deletions(-)

diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h
index 7d26f6c4f0f5..047b893ef95d 100644
--- a/arch/arm/include/asm/hugetlb.h
+++ b/arch/arm/include/asm/hugetlb.h
@@ -23,19 +23,9 @@
 #define _ASM_ARM_HUGETLB_H
 
 #include 
-#include 
 
 #include 
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
- unsigned long addr, unsigned long end,
- unsigned long floor,
- unsigned long ceiling)
-{
-   free_pgd_range(tlb, addr, end, floor, ceiling);
-}
-
-
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 unsigned long addr, unsigned long len)
 {
@@ -68,4 +58,6 @@ static inline void arch_clear_hugepage_flags(struct page 
*page)
clear_bit(PG_dcache_clean, >flags);
 }
 
+#include 
+
 #endif /* _ASM_ARM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 3fcf14663dfa..4af1a800a900 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -25,16 +25,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
return READ_ONCE(*ptep);
 }
 
-
-
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
- unsigned long addr, unsigned long end,
- unsigned long floor,
- unsigned long ceiling)
-{
-   free_pgd_range(tlb, addr, end, floor, ceiling);
-}
-
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 unsigned long addr, unsigned long len)
 {
diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
index 74d2a5540aaf..afe9fa4d969b 100644
--- a/arch/ia64/include/asm/hugetlb.h
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -3,9 +3,8 @@
 #define _ASM_IA64_HUGETLB_H
 
 #include 
-#include 
-
 
+#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
 void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor,
unsigned long ceiling);
@@ -70,4 +69,6 @@ static inline void arch_clear_hugepage_flags(struct page 
*page)
 {
 }
 
+#include 
+
 #endif /* _ASM_IA64_HUGETLB_H */
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index 982bc0685330..53764050243e 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -10,8 +10,6 @@
 #define __ASM_HUGETLB_H
 
 #include 
-#include 
-
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 unsigned long addr,
@@ -38,15 +36,6 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
 }
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
- unsigned long addr,
- unsigned long end,
- unsigned long floor,
- unsigned long ceiling)
-{
-   free_pgd_range(tlb, addr, end, floor, ceiling);
-}
-
 static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
   pte_t *ptep, pte_t pte)
 {
@@ -114,4 +103,6 @@ static inline void arch_clear_hugepage_flags(struct page 
*page)
 {
 }
 
+#include 
+
 #endif /* __ASM_HUGETLB_H */
diff --git a/arch/parisc/include/asm/hugetlb.h 
b/arch/parisc/include/asm/hugetlb.h
index 58e0f4620426..28c23b68d38d 100644
--- a/arch/parisc/include/asm/hugetlb.h
+++ b/arch/parisc/include/asm/hugetlb.h
@@ -3,8 +3,6 @@
 #define _ASM_PARISC64_HUGETLB_H
 
 #include 
-#include 
-
 
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t *ptep, pte_t pte);
@@ -32,14 +30,6 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
 }
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
- unsigned long addr, unsigned long end,
- unsigned long 

Re: [PATCH 05/11] hugetlb: Introduce generic version of huge_ptep_clear_flush

2018-07-04 Thread Alexandre Ghiti
Just discovered my email provider limit of mails per minute, please drop 
this serie, I'll send a v2 using --batch-size option of git send-email. 
Sorry about that.



On 07/04/2018 07:52 AM, Alexandre Ghiti wrote:

arm, x86 architectures use the same version of
huge_ptep_clear_flush, so move this generic implementation into
asm-generic/hugetlb.h.

Signed-off-by: Alexandre Ghiti 
---
  arch/arm/include/asm/hugetlb-3level.h | 6 --
  arch/arm64/include/asm/hugetlb.h  | 1 +
  arch/ia64/include/asm/hugetlb.h   | 1 +
  arch/mips/include/asm/hugetlb.h   | 1 +
  arch/parisc/include/asm/hugetlb.h | 1 +
  arch/powerpc/include/asm/hugetlb.h| 1 +
  arch/sh/include/asm/hugetlb.h | 1 +
  arch/sparc/include/asm/hugetlb.h  | 1 +
  arch/x86/include/asm/hugetlb.h| 6 --
  include/asm-generic/hugetlb.h | 8 
  10 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/arch/arm/include/asm/hugetlb-3level.h 
b/arch/arm/include/asm/hugetlb-3level.h
index ad36e84b819a..b897541520ef 100644
--- a/arch/arm/include/asm/hugetlb-3level.h
+++ b/arch/arm/include/asm/hugetlb-3level.h
@@ -37,12 +37,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
return retval;
  }
  
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,

-unsigned long addr, pte_t *ptep)
-{
-   ptep_clear_flush(vma, addr, ptep);
-}
-
  static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
   unsigned long addr, pte_t *ptep)
  {
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 6ae0bcafe162..4c8dd488554d 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -71,6 +71,7 @@ extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 unsigned long addr, pte_t *ptep);
  extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
  extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
  unsigned long addr, pte_t *ptep);
  #define __HAVE_ARCH_HUGE_PTE_CLEAR
diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
index 6719c74da0de..41b5f6adeee4 100644
--- a/arch/ia64/include/asm/hugetlb.h
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -20,6 +20,7 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE);
  }
  
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH

  static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep)
  {
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index 0959cc5a41fa..7df1f116a3cc 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -48,6 +48,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct 
*mm,
return pte;
  }
  
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH

  static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep)
  {
diff --git a/arch/parisc/include/asm/hugetlb.h 
b/arch/parisc/include/asm/hugetlb.h
index 6e281e1bb336..9afff26747a1 100644
--- a/arch/parisc/include/asm/hugetlb.h
+++ b/arch/parisc/include/asm/hugetlb.h
@@ -32,6 +32,7 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
  }
  
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH

  static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep)
  {
diff --git a/arch/powerpc/include/asm/hugetlb.h 
b/arch/powerpc/include/asm/hugetlb.h
index ec3e0c2e78f8..de0769f0b5b2 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -143,6 +143,7 @@ static inline pte_t huge_ptep_get_and_clear(struct 
mm_struct *mm,
  #endif
  }
  
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH

  static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep)
  {
diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h
index 08ee6c00b5e9..9abf9c86b769 100644
--- a/arch/sh/include/asm/hugetlb.h
+++ b/arch/sh/include/asm/hugetlb.h
@@ -25,6 +25,7 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
  }
  
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH

  static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep)
  {
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 944e3a4bfaff..651a9593fcee 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -42,6 

[PATCH] powerpc: icp-hv: fix missing of_node_put in success path

2018-07-04 Thread Nicholas Mc Guire
 Both of_find_compatible_node() and of_find_node_by_type() will
return a refcounted node on success - thus for the success path
the node must be explicitly released with a of_node_put().

Signed-off-by: Nicholas Mc Guire 
Fixes: commit 0b05ac6e2480 ("powerpc/xics: Rewrite XICS driver")
---
Problem found by experimental coccinelle script

Patch was compiletested with: ppc64_defconfig (implies
CONFIG_PPC_ICP_HV=y)
with sparse warnings though not related to the proposed change

Patch is against 4.18-rc3 (localversion-next is next-20180704)

 arch/powerpc/sysdev/xics/icp-hv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/sysdev/xics/icp-hv.c 
b/arch/powerpc/sysdev/xics/icp-hv.c
index bbc839a..003deaa 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -179,6 +179,7 @@ int icp_hv_init(void)
 
icp_ops = _hv_ops;
 
+   of_node_put(np);
return 0;
 }
 
-- 
2.1.4



[PATCH v10 4/6] init: allow initcall tables to be emitted using relative references

2018-07-04 Thread Ard Biesheuvel
Allow the initcall tables to be emitted using relative references that
are only half the size on 64-bit architectures and don't require fixups
at runtime on relocatable kernels.

Acked-by: James Morris 
Acked-by: Sergey Senozhatsky 
Acked-by: Petr Mladek 
Acked-by: Michael Ellerman 
Acked-by: Ingo Molnar 
Signed-off-by: Ard Biesheuvel 
---
 include/linux/init.h   | 44 +++-
 init/main.c| 32 +++---
 kernel/printk/printk.c | 16 +++
 security/security.c| 17 
 4 files changed, 68 insertions(+), 41 deletions(-)

diff --git a/include/linux/init.h b/include/linux/init.h
index bc27cf03c41e..2538d176dd1f 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -116,8 +116,24 @@
 typedef int (*initcall_t)(void);
 typedef void (*exitcall_t)(void);
 
-extern initcall_t __con_initcall_start[], __con_initcall_end[];
-extern initcall_t __security_initcall_start[], __security_initcall_end[];
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+typedef int initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+   return offset_to_ptr(entry);
+}
+#else
+typedef initcall_t initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+   return *entry;
+}
+#endif
+
+extern initcall_entry_t __con_initcall_start[], __con_initcall_end[];
+extern initcall_entry_t __security_initcall_start[], __security_initcall_end[];
 
 /* Used for contructor calls. */
 typedef void (*ctor_fn_t)(void);
@@ -167,9 +183,20 @@ extern bool initcall_debug;
  * as KEEP() in the linker script.
  */
 
-#define __define_initcall(fn, id) \
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define ___define_initcall(fn, id, __sec)  \
+   __ADDRESSABLE(fn)   \
+   asm(".section   \"" #__sec ".init\", \"a\"  \n" \
+   "__initcall_" #fn #id ":\n" \
+   ".long  " #fn " - . \n" \
+   ".previous  \n");
+#else
+#define ___define_initcall(fn, id, __sec) \
static initcall_t __initcall_##fn##id __used \
-   __attribute__((__section__(".initcall" #id ".init"))) = fn;
+   __attribute__((__section__(#__sec ".init"))) = fn;
+#endif
+
+#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id)
 
 /*
  * Early initcalls run before initializing SMP.
@@ -208,13 +235,8 @@ extern bool initcall_debug;
 #define __exitcall(fn) \
static exitcall_t __exitcall_##fn __exit_call = fn
 
-#define console_initcall(fn)   \
-   static initcall_t __initcall_##fn   \
-   __used __section(.con_initcall.init) = fn
-
-#define security_initcall(fn)  \
-   static initcall_t __initcall_##fn   \
-   __used __section(.security_initcall.init) = fn
+#define console_initcall(fn)   ___define_initcall(fn,, .con_initcall)
+#define security_initcall(fn)  ___define_initcall(fn,, .security_initcall)
 
 struct obs_kernel_param {
const char *str;
diff --git a/init/main.c b/init/main.c
index 3b4ada11ed52..e59a01f163d6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -901,18 +901,18 @@ int __init_or_module do_one_initcall(initcall_t fn)
 }
 
 
-extern initcall_t __initcall_start[];
-extern initcall_t __initcall0_start[];
-extern initcall_t __initcall1_start[];
-extern initcall_t __initcall2_start[];
-extern initcall_t __initcall3_start[];
-extern initcall_t __initcall4_start[];
-extern initcall_t __initcall5_start[];
-extern initcall_t __initcall6_start[];
-extern initcall_t __initcall7_start[];
-extern initcall_t __initcall_end[];
-
-static initcall_t *initcall_levels[] __initdata = {
+extern initcall_entry_t __initcall_start[];
+extern initcall_entry_t __initcall0_start[];
+extern initcall_entry_t __initcall1_start[];
+extern initcall_entry_t __initcall2_start[];
+extern initcall_entry_t __initcall3_start[];
+extern initcall_entry_t __initcall4_start[];
+extern initcall_entry_t __initcall5_start[];
+extern initcall_entry_t __initcall6_start[];
+extern initcall_entry_t __initcall7_start[];
+extern initcall_entry_t __initcall_end[];
+
+static initcall_entry_t *initcall_levels[] __initdata = {
__initcall0_start,
__initcall1_start,
__initcall2_start,
@@ -938,7 +938,7 @@ static char *initcall_level_names[] __initdata = {
 
 static void __init do_initcall_level(int level)
 {
-   initcall_t *fn;
+   initcall_entry_t *fn;
 
strcpy(initcall_command_line, saved_command_line);
parse_args(initcall_level_names[level],
@@ -949,7 +949,7 @@ static void __init do_initcall_level(int level)
 
trace_initcall_level(initcall_level_names[level]);
for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
-   do_one_initcall(*fn);
+ 

[PATCH v2 0/2] hwmon: Add attributes to enable/disable sensors

2018-07-04 Thread Shilpasri G Bhat
This patch series adds new attribute to enable or disable a sensor in
runtime.

v1 : https://lkml.org/lkml/2018/3/22/214

Shilpasri G Bhat (2):
  powernv:opal-sensor-groups: Add support to enable sensor groups
  hwmon: ibmpowernv: Add attributes to enable/disable sensor groups

 Documentation/hwmon/sysfs-interface|  22 ++
 arch/powerpc/include/asm/opal-api.h|   1 +
 arch/powerpc/include/asm/opal.h|   2 +
 .../powerpc/platforms/powernv/opal-sensor-groups.c |  28 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
 drivers/hwmon/ibmpowernv.c | 281 ++---
 6 files changed, 296 insertions(+), 39 deletions(-)

-- 
1.8.3.1



[PATCH v2 2/2] hwmon: ibmpowernv: Add attributes to enable/disable sensor groups

2018-07-04 Thread Shilpasri G Bhat
On-Chip-Controller(OCC) is an embedded micro-processor in POWER9 chip
which measures various system and chip level sensors. These sensors
comprises of environmental sensors (like power, temperature, current
and voltage) and performance sensors (like utilization, frequency).
All these sensors are copied to main memory at a regular interval of
100ms. OCC provides a way to select a group of sensors that is copied
to the main memory to increase the update frequency of selected sensor
groups. When a sensor-group is disabled, OCC will not copy it to main
memory and those sensors read 0 values.

This patch provides support for enabling/disabling the sensor groups
like power, temperature, current and voltage. This patch adds new
per-senor sysfs attribute to disable and enable them.

Signed-off-by: Shilpasri G Bhat 
---
Changes from v1:
- Add per-sensor 'enable' attribute
- Return -ENODATA when sensor is disabled

 Documentation/hwmon/sysfs-interface |  22 +++
 drivers/hwmon/ibmpowernv.c  | 281 +++-
 2 files changed, 264 insertions(+), 39 deletions(-)

diff --git a/Documentation/hwmon/sysfs-interface 
b/Documentation/hwmon/sysfs-interface
index fc337c3..38ab05c 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface
@@ -184,6 +184,11 @@ vrmVoltage Regulator Module version number.
Affects the way the driver calculates the CPU core reference
voltage from the vid pins.
 
+in[0-*]_enable Enable or disable the sensor
+   1 : Enable
+   0 : Disable
+   RW
+
 Also see the Alarms section for status flags associated with voltages.
 
 
@@ -409,6 +414,12 @@ temp_reset_history
Reset temp_lowest and temp_highest for all sensors
WO
 
+temp[1-*]_enable
+   Enable or disable the sensor
+   1 : Enable
+   0 : Disable
+   RW
+
 Some chips measure temperature using external thermistors and an ADC, and
 report the temperature measurement as a voltage. Converting this voltage
 back to a temperature (or the other way around for limits) requires
@@ -468,6 +479,12 @@ curr_reset_history
Reset currX_lowest and currX_highest for all sensors
WO
 
+curr[1-*]_enable
+   Enable or disable the sensor
+   1 : Enable
+   0 : Disable
+   RW
+
 Also see the Alarms section for status flags associated with currents.
 
 *
@@ -566,6 +583,11 @@ power[1-*]_critCritical maximum power.
Unit: microWatt
RW
 
+power[1-*]_enable  Enable or disable the sensor
+   1 : Enable
+   0 : Disable
+   RW
+
 Also see the Alarms section for status flags associated with power readings.
 
 **
diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c
index f829dad..61e04cf 100644
--- a/drivers/hwmon/ibmpowernv.c
+++ b/drivers/hwmon/ibmpowernv.c
@@ -90,8 +90,28 @@ struct sensor_data {
char label[MAX_LABEL_LEN];
char name[MAX_ATTR_LEN];
struct device_attribute dev_attr;
+   struct sensor_group_data *sgdata;
+   struct sensor_data *sdata[3];
+   bool enable;
 };
 
+static struct sensor_group_data {
+   u32 gid;
+   u32 nr_phandle;
+   u32 nr_sensor;
+   enum sensors type;
+   const __be32 *phandles;
+   struct sensor_data **sensors;
+   bool enable;
+} *sg_data;
+
+/*
+ * To synchronise writes to struct sensor_data.enable and
+ * struct sensor_group_data.enable
+ */
+DEFINE_MUTEX(sensor_groups_mutex);
+static int nr_sensor_groups;
+
 struct platform_data {
const struct attribute_group *attr_groups[MAX_SENSOR_TYPE + 1];
u32 sensors_count; /* Total count of sensors from each group */
@@ -105,6 +125,9 @@ static ssize_t show_sensor(struct device *dev, struct 
device_attribute *devattr,
ssize_t ret;
u64 x;
 
+   if (sdata->sgdata && !sdata->enable)
+   return -ENODATA;
+
ret =  opal_get_sensor_data_u64(sdata->id, );
 
if (ret)
@@ -120,6 +143,74 @@ static ssize_t show_sensor(struct device *dev, struct 
device_attribute *devattr,
return sprintf(buf, "%llu\n", x);
 }
 
+static ssize_t show_enable(struct device *dev,
+  struct device_attribute *devattr, char *buf)
+{
+   struct sensor_data *sdata = container_of(devattr, struct sensor_data,
+dev_attr);
+
+   return sprintf(buf, "%u\n", sdata->enable);
+}
+
+static ssize_t store_enable(struct device *dev,
+   struct device_attribute *devattr,
+   const char *buf, size_t count)
+{
+   struct sensor_data *sdata = container_of(devattr, struct sensor_data,
+   

[PATCH 01/11] hugetlb: Harmonize hugetlb.h arch specific defines with pgtable.h

2018-07-04 Thread Alexandre Ghiti
asm-generic/hugetlb.h proposes generic implementations of hugetlb
related functions: use __HAVE_ARCH_HUGE* defines in order to make arch
specific implementations of hugetlb functions consistent with pgtable.h
scheme.

Signed-off-by: Alexandre Ghiti 
---
 arch/arm64/include/asm/hugetlb.h | 2 +-
 include/asm-generic/hugetlb.h| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index e73f68569624..3fcf14663dfa 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -81,9 +81,9 @@ extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
 extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
  unsigned long addr, pte_t *ptep);
+#define __HAVE_ARCH_HUGE_PTE_CLEAR
 extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
   pte_t *ptep, unsigned long sz);
-#define huge_pte_clear huge_pte_clear
 extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t *ptep, pte_t pte, unsigned long sz);
 #define set_huge_swap_pte_at set_huge_swap_pte_at
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 9d0cde8ab716..3da7cff52360 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -32,7 +32,7 @@ static inline pte_t huge_pte_modify(pte_t pte, pgprot_t 
newprot)
return pte_modify(pte, newprot);
 }
 
-#ifndef huge_pte_clear
+#ifndef __HAVE_ARCH_HUGE_PTE_CLEAR
 static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz)
 {
-- 
2.16.2



[PATCH 00/11] hugetlb: Factorize architecture hugetlb primitives

2018-07-04 Thread Alexandre Ghiti
In order to reduce copy/paste of functions across architectures and then
make riscv hugetlb port simpler and smaller, this patchset intends to
factorize the numerous hugetlb primitives that are defined across all the
architectures.

Except for prepare_hugepage_range, this patchset moves the versions that
are just pass-through to standard pte primitives into
asm-generic/hugetlb.h by using the same #ifdef semantic that can be
found in asm-generic/pgtable.h, i.e. __HAVE_ARCH_***.

s390 architecture has not been tackled in this serie since it does not
use asm-generic/hugetlb.h at all.
powerpc could be factorized a bit more (cf huge_ptep_set_wrprotect).

This patchset has been compiled on x86 only.

Alexandre Ghiti (11):
  hugetlb: Harmonize hugetlb.h arch specific defines with pgtable.h
  hugetlb: Introduce generic version of hugetlb_free_pgd_range
  hugetlb: Introduce generic version of set_huge_pte_at
  hugetlb: Introduce generic version of huge_ptep_get_and_clear
  hugetlb: Introduce generic version of huge_ptep_clear_flush
  hugetlb: Introduce generic version of huge_pte_none
  hugetlb: Introduce generic version of huge_pte_wrprotect
  hugetlb: Introduce generic version of prepare_hugepage_range
  hugetlb: Introduce generic version of huge_ptep_set_wrprotect
  hugetlb: Introduce generic version of huge_ptep_set_access_flags
  hugetlb: Introduce generic version of huge_ptep_get

 arch/arm/include/asm/hugetlb-3level.h| 32 +-
 arch/arm/include/asm/hugetlb.h   | 33 +--
 arch/arm64/include/asm/hugetlb.h | 39 +++-
 arch/ia64/include/asm/hugetlb.h  | 47 ++-
 arch/mips/include/asm/hugetlb.h  | 40 +++--
 arch/parisc/include/asm/hugetlb.h| 33 +++
 arch/powerpc/include/asm/book3s/32/pgtable.h |  2 +
 arch/powerpc/include/asm/book3s/64/pgtable.h |  1 +
 arch/powerpc/include/asm/hugetlb.h   | 43 ++
 arch/powerpc/include/asm/nohash/32/pgtable.h |  2 +
 arch/powerpc/include/asm/nohash/64/pgtable.h |  1 +
 arch/sh/include/asm/hugetlb.h| 54 ++---
 arch/sparc/include/asm/hugetlb.h | 40 +++--
 arch/x86/include/asm/hugetlb.h   | 72 +--
 include/asm-generic/hugetlb.h| 88 +++-
 15 files changed, 143 insertions(+), 384 deletions(-)

-- 
2.16.2



[PATCH 04/11] hugetlb: Introduce generic version of huge_ptep_get_and_clear

2018-07-04 Thread Alexandre Ghiti
arm, ia64, sh, x86 architectures use the
same version of huge_ptep_get_and_clear, so move this generic
implementation into asm-generic/hugetlb.h.

Signed-off-by: Alexandre Ghiti 
---
 arch/arm/include/asm/hugetlb-3level.h | 6 --
 arch/arm64/include/asm/hugetlb.h  | 1 +
 arch/ia64/include/asm/hugetlb.h   | 6 --
 arch/mips/include/asm/hugetlb.h   | 1 +
 arch/parisc/include/asm/hugetlb.h | 1 +
 arch/powerpc/include/asm/hugetlb.h| 1 +
 arch/sh/include/asm/hugetlb.h | 6 --
 arch/sparc/include/asm/hugetlb.h  | 1 +
 arch/x86/include/asm/hugetlb.h| 6 --
 include/asm-generic/hugetlb.h | 8 
 10 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/arch/arm/include/asm/hugetlb-3level.h 
b/arch/arm/include/asm/hugetlb-3level.h
index 398fb06e8207..ad36e84b819a 100644
--- a/arch/arm/include/asm/hugetlb-3level.h
+++ b/arch/arm/include/asm/hugetlb-3level.h
@@ -49,12 +49,6 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct 
*mm,
ptep_set_wrprotect(mm, addr, ptep);
 }
 
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-   unsigned long addr, pte_t *ptep)
-{
-   return ptep_get_and_clear(mm, addr, ptep);
-}
-
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep,
 pte_t pte, int dirty)
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 874661a1dff1..6ae0bcafe162 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -66,6 +66,7 @@ extern void set_huge_pte_at(struct mm_struct *mm, unsigned 
long addr,
 extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
  unsigned long addr, pte_t *ptep,
  pte_t pte, int dirty);
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
 extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 unsigned long addr, pte_t *ptep);
 extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
index a235d6f60fb3..6719c74da0de 100644
--- a/arch/ia64/include/asm/hugetlb.h
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -20,12 +20,6 @@ static inline int is_hugepage_only_range(struct mm_struct 
*mm,
REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE);
 }
 
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-   unsigned long addr, pte_t *ptep)
-{
-   return ptep_get_and_clear(mm, addr, ptep);
-}
-
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index 8ea439041d5d..0959cc5a41fa 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -36,6 +36,7 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/parisc/include/asm/hugetlb.h 
b/arch/parisc/include/asm/hugetlb.h
index 77c8adbac7c3..6e281e1bb336 100644
--- a/arch/parisc/include/asm/hugetlb.h
+++ b/arch/parisc/include/asm/hugetlb.h
@@ -8,6 +8,7 @@
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t *ptep, pte_t pte);
 
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
  pte_t *ptep);
 
diff --git a/arch/powerpc/include/asm/hugetlb.h 
b/arch/powerpc/include/asm/hugetlb.h
index ba7d5d8b543f..ec3e0c2e78f8 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -132,6 +132,7 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h
index bc552e37c1c9..08ee6c00b5e9 100644
--- a/arch/sh/include/asm/hugetlb.h
+++ b/arch/sh/include/asm/hugetlb.h
@@ -25,12 +25,6 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
 }
 
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-   unsigned long addr, pte_t *ptep)
-{
-   return ptep_get_and_clear(mm, addr, ptep);
-}
-
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 

[PATCH 03/11] hugetlb: Introduce generic version of set_huge_pte_at

2018-07-04 Thread Alexandre Ghiti
arm, ia64, mips, powerpc, sh, x86 architectures use the
same version of set_huge_pte_at, so move this generic
implementation into asm-generic/hugetlb.h.

Signed-off-by: Alexandre Ghiti 
---
 arch/arm/include/asm/hugetlb-3level.h | 6 --
 arch/arm64/include/asm/hugetlb.h  | 1 +
 arch/ia64/include/asm/hugetlb.h   | 6 --
 arch/mips/include/asm/hugetlb.h   | 6 --
 arch/parisc/include/asm/hugetlb.h | 1 +
 arch/powerpc/include/asm/hugetlb.h| 6 --
 arch/sh/include/asm/hugetlb.h | 6 --
 arch/sparc/include/asm/hugetlb.h  | 1 +
 arch/x86/include/asm/hugetlb.h| 6 --
 include/asm-generic/hugetlb.h | 8 +++-
 10 files changed, 10 insertions(+), 37 deletions(-)

diff --git a/arch/arm/include/asm/hugetlb-3level.h 
b/arch/arm/include/asm/hugetlb-3level.h
index d4014fbe5ea3..398fb06e8207 100644
--- a/arch/arm/include/asm/hugetlb-3level.h
+++ b/arch/arm/include/asm/hugetlb-3level.h
@@ -37,12 +37,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
return retval;
 }
 
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-  pte_t *ptep, pte_t pte)
-{
-   set_pte_at(mm, addr, ptep, pte);
-}
-
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 4af1a800a900..874661a1dff1 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -60,6 +60,7 @@ static inline void arch_clear_hugepage_flags(struct page 
*page)
 extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
struct page *page, int writable);
 #define arch_make_huge_pte arch_make_huge_pte
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
 extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
 extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
index afe9fa4d969b..a235d6f60fb3 100644
--- a/arch/ia64/include/asm/hugetlb.h
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -20,12 +20,6 @@ static inline int is_hugepage_only_range(struct mm_struct 
*mm,
REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE);
 }
 
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-  pte_t *ptep, pte_t pte)
-{
-   set_pte_at(mm, addr, ptep, pte);
-}
-
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index 53764050243e..8ea439041d5d 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -36,12 +36,6 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
 }
 
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-  pte_t *ptep, pte_t pte)
-{
-   set_pte_at(mm, addr, ptep, pte);
-}
-
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/parisc/include/asm/hugetlb.h 
b/arch/parisc/include/asm/hugetlb.h
index 28c23b68d38d..77c8adbac7c3 100644
--- a/arch/parisc/include/asm/hugetlb.h
+++ b/arch/parisc/include/asm/hugetlb.h
@@ -4,6 +4,7 @@
 
 #include 
 
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t *ptep, pte_t pte);
 
diff --git a/arch/powerpc/include/asm/hugetlb.h 
b/arch/powerpc/include/asm/hugetlb.h
index de46ee16b615..ba7d5d8b543f 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -132,12 +132,6 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
 }
 
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-  pte_t *ptep, pte_t pte)
-{
-   set_pte_at(mm, addr, ptep, pte);
-}
-
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h
index f6a51b609409..bc552e37c1c9 100644
--- a/arch/sh/include/asm/hugetlb.h
+++ b/arch/sh/include/asm/hugetlb.h
@@ -25,12 +25,6 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
 }
 
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-  pte_t *ptep, pte_t pte)
-{
-   set_pte_at(mm, addr, ptep, pte);
-}
-
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,

[PATCH 05/11] hugetlb: Introduce generic version of huge_ptep_clear_flush

2018-07-04 Thread Alexandre Ghiti
arm, x86 architectures use the same version of
huge_ptep_clear_flush, so move this generic implementation into
asm-generic/hugetlb.h.

Signed-off-by: Alexandre Ghiti 
---
 arch/arm/include/asm/hugetlb-3level.h | 6 --
 arch/arm64/include/asm/hugetlb.h  | 1 +
 arch/ia64/include/asm/hugetlb.h   | 1 +
 arch/mips/include/asm/hugetlb.h   | 1 +
 arch/parisc/include/asm/hugetlb.h | 1 +
 arch/powerpc/include/asm/hugetlb.h| 1 +
 arch/sh/include/asm/hugetlb.h | 1 +
 arch/sparc/include/asm/hugetlb.h  | 1 +
 arch/x86/include/asm/hugetlb.h| 6 --
 include/asm-generic/hugetlb.h | 8 
 10 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/arch/arm/include/asm/hugetlb-3level.h 
b/arch/arm/include/asm/hugetlb-3level.h
index ad36e84b819a..b897541520ef 100644
--- a/arch/arm/include/asm/hugetlb-3level.h
+++ b/arch/arm/include/asm/hugetlb-3level.h
@@ -37,12 +37,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
return retval;
 }
 
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
-unsigned long addr, pte_t *ptep)
-{
-   ptep_clear_flush(vma, addr, ptep);
-}
-
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
   unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 6ae0bcafe162..4c8dd488554d 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -71,6 +71,7 @@ extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 unsigned long addr, pte_t *ptep);
 extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
  unsigned long addr, pte_t *ptep);
 #define __HAVE_ARCH_HUGE_PTE_CLEAR
diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
index 6719c74da0de..41b5f6adeee4 100644
--- a/arch/ia64/include/asm/hugetlb.h
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -20,6 +20,7 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE);
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index 0959cc5a41fa..7df1f116a3cc 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -48,6 +48,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct 
*mm,
return pte;
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/parisc/include/asm/hugetlb.h 
b/arch/parisc/include/asm/hugetlb.h
index 6e281e1bb336..9afff26747a1 100644
--- a/arch/parisc/include/asm/hugetlb.h
+++ b/arch/parisc/include/asm/hugetlb.h
@@ -32,6 +32,7 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/powerpc/include/asm/hugetlb.h 
b/arch/powerpc/include/asm/hugetlb.h
index ec3e0c2e78f8..de0769f0b5b2 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -143,6 +143,7 @@ static inline pte_t huge_ptep_get_and_clear(struct 
mm_struct *mm,
 #endif
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h
index 08ee6c00b5e9..9abf9c86b769 100644
--- a/arch/sh/include/asm/hugetlb.h
+++ b/arch/sh/include/asm/hugetlb.h
@@ -25,6 +25,7 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 944e3a4bfaff..651a9593fcee 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -42,6 +42,7 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long 

Re: [PATCH v3 0/2] powernv/cpuidle Device-tree parsing cleanup

2018-07-04 Thread Rafael J. Wysocki
On Tuesday, July 3, 2018 11:20:54 AM CEST Akshay Adiga wrote:
> 
> Device-tree parsed multiple time in powernv cpuidle and powernv
> hotplug code. 
> 
> First to identify supported flags. Second time, to identify deepest_state
> and first deep state. Third time, during cpuidle init to find the available
> idle states. Any change in device-tree format will lead to make changes in
> these 3 places. Errors in device-tree can be handled in a better manner.
> 
> This series adds code to parse device tree once and save in global structure.
> 
> Changes from v2 :
>  - Fix build error (moved a hunk from patch 1 to patch 2)
> Changes from v1 :
>  - fold first 2 patches into 1
>  - rename pm_ctrl_reg_* as psscr_*
>  - added comment stating removal of pmicr parsing code
>  - removed parsing code for pmicr
>  - add member valid in pnv_idle_states_t to indicate if the psscr-mask/val
> are valid combination,
>  - Change function description of pnv_parse_cpuidle_dt
>  - Added error handling code.
> 
> 
> Akshay Adiga (2):
>   powernv/cpuidle: Parse dt idle properties into global structure
>   powernv/cpuidle: Use parsed device tree values for cpuidle_init
> 
>  arch/powerpc/include/asm/cpuidle.h|  13 ++
>  arch/powerpc/platforms/powernv/idle.c | 216 --
>  drivers/cpuidle/cpuidle-powernv.c | 154 --
>  3 files changed, 177 insertions(+), 206 deletions(-)
> 
> 

I am assuming that this series will go in via the powerpc tree.

Thanks,
Rafael




[PATCH v10 6/6] kernel: tracepoints: add support for relative references

2018-07-04 Thread Ard Biesheuvel
To avoid the need for relocating absolute references to tracepoint
structures at boot time when running relocatable kernels (which may
take a disproportionate amount of space), add the option to emit
these tables as relative references instead.

Acked-by: Michael Ellerman 
Acked-by: Ingo Molnar 
Acked-by: Steven Rostedt (VMware) 
Signed-off-by: Ard Biesheuvel 
---
 include/linux/tracepoint.h | 19 ++--
 kernel/tracepoint.c| 49 +++-
 2 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 19a690b559ca..b130e40d82cb 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -225,6 +225,19 @@ extern void syscall_unregfunc(void);
return static_key_false(&__tracepoint_##name.key);  \
}
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __TRACEPOINT_ENTRY(name)   \
+   asm("   .section \"__tracepoints_ptrs\", \"a\"  \n" \
+   "   .balign 4   \n" \
+   "   .long   __tracepoint_" #name " - .  \n" \
+   "   .previous   \n")
+#else
+#define __TRACEPOINT_ENTRY(name)\
+   static struct tracepoint * const __tracepoint_ptr_##name __used  \
+   __attribute__((section("__tracepoints_ptrs"))) = \
+   &__tracepoint_##name
+#endif
+
 /*
  * We have no guarantee that gcc and the linker won't up-align the tracepoint
  * structures, so we create an array of pointers that will be used for 
iteration
@@ -234,11 +247,9 @@ extern void syscall_unregfunc(void);
static const char __tpstrtab_##name[]\
__attribute__((section("__tracepoints_strings"))) = #name;   \
struct tracepoint __tracepoint_##name\
-   __attribute__((section("__tracepoints"))) =  \
+   __attribute__((section("__tracepoints"), used)) =\
{ __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
-   static struct tracepoint * const __tracepoint_ptr_##name __used  \
-   __attribute__((section("__tracepoints_ptrs"))) = \
-   &__tracepoint_##name;
+   __TRACEPOINT_ENTRY(name);
 
 #define DEFINE_TRACE(name) \
DEFINE_TRACE_FN(name, NULL, NULL);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 6dc6356c3327..451c8f5e8345 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -325,6 +325,27 @@ int tracepoint_probe_unregister(struct tracepoint *tp, 
void *probe, void *data)
 }
 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 
+static void for_each_tracepoint_range(struct tracepoint * const *begin,
+   struct tracepoint * const *end,
+   void (*fct)(struct tracepoint *tp, void *priv),
+   void *priv)
+{
+   if (!begin)
+   return;
+
+   if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) {
+   const int *iter;
+
+   for (iter = (const int *)begin; iter < (const int *)end; iter++)
+   fct(offset_to_ptr(iter), priv);
+   } else {
+   struct tracepoint * const *iter;
+
+   for (iter = begin; iter < end; iter++)
+   fct(*iter, priv);
+   }
+}
+
 #ifdef CONFIG_MODULES
 bool trace_module_has_bad_taint(struct module *mod)
 {
@@ -389,15 +410,9 @@ EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier);
  * Ensure the tracer unregistered the module's probes before the module
  * teardown is performed. Prevents leaks of probe and data pointers.
  */
-static void tp_module_going_check_quiescent(struct tracepoint * const *begin,
-   struct tracepoint * const *end)
+static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv)
 {
-   struct tracepoint * const *iter;
-
-   if (!begin)
-   return;
-   for (iter = begin; iter < end; iter++)
-   WARN_ON_ONCE((*iter)->funcs);
+   WARN_ON_ONCE(tp->funcs);
 }
 
 static int tracepoint_module_coming(struct module *mod)
@@ -448,8 +463,9 @@ static void tracepoint_module_going(struct module *mod)
 * Called the going notifier before checking for
 * quiescence.
 */
-   tp_module_going_check_quiescent(mod->tracepoints_ptrs,
-   mod->tracepoints_ptrs + mod->num_tracepoints);
+   for_each_tracepoint_range(mod->tracepoints_ptrs,
+   mod->tracepoints_ptrs + mod->num_tracepoints,
+   tp_module_going_check_quiescent, NULL);
break;
}
}
@@ -501,19 +517,6 @@ static 

Re: [PATCHv3 0/4] drivers/base: bugfix for supplier<-consumer ordering in device_kset

2018-07-04 Thread Rafael J. Wysocki
On Wednesday, July 4, 2018 4:47:07 AM CEST Pingfan Liu wrote:
> On Tue, Jul 3, 2018 at 10:36 PM Rafael J. Wysocki  wrote:
> >
> > On Tuesday, July 3, 2018 8:50:38 AM CEST Pingfan Liu wrote:
> > > commit 52cdbdd49853 ("driver core: correct device's shutdown order")
> > > places an assumption of supplier<-consumer order on the process of probe.
> > > But it turns out to break down the parent <- child order in some scene.
> > > E.g in pci, a bridge is enabled by pci core, and behind it, the devices
> > > have been probed. Then comes the bridge's module, which enables extra
> > > feature(such as hotplug) on this bridge.
> >
> > So what *exactly* does happen in that case?
> >
> I saw the  shpc_probe() is called on the bridge, although the probing
> failed on that bare-metal. But if it success, then it will enable the
> hotplug feature on the bridge.

I don't understand what you are saying here, sorry.

device_reorder_to_tail() walks the entire device hierarchy below the target
and moves all of the children in there *after* their parents.

How can it break "the parent <- child order" then?

Thanks,
Rafael



Re: [PATCH v11 00/26] Speculative page faults

2018-07-04 Thread Laurent Dufour
On 04/07/2018 05:23, Song, HaiyanX wrote:
> Hi Laurent,
> 
> 
> For the test result on Intel 4s skylake platform (192 CPUs, 768G Memory), the 
> below test cases all were run 3 times.
> I check the test results, only page_fault3_thread/enable THP have 6% stddev 
> for head commit, other tests have lower stddev.

Repeating the test only 3 times seems a bit too low to me.

I'll focus on the higher change for the moment, but I don't have access to such
a hardware.

Is possible to provide a diff between base and SPF of the performance cycles
measured when running page_fault3 and page_fault2 when the 20% change is 
detected.

Please stay focus on the test case process to see exactly where the series is
impacting.

Thanks,
Laurent.

> 
> And I did not find other high variation on test case result.
> 
> a). Enable THP
> testcase  base stddev   change  head 
> stddev metric
> page_fault3/enable THP   10519  ± 3%-20.5%  8368  
> ±6%  will-it-scale.per_thread_ops
> page_fault2/enalbe THP8281  ± 2%-18.8%  6728  
>  will-it-scale.per_thread_ops
> brk1/eanble THP 998475   -2.2%976893  
>  will-it-scale.per_process_ops
> context_switch1/enable THP  223910   -1.3%220930  
>  will-it-scale.per_process_ops
> context_switch1/enable THP  233722   -1.0%231288  
>  will-it-scale.per_thread_ops
> 
> b). Disable THP
> page_fault3/disable THP  10856  -23.1%  8344  
>  will-it-scale.per_thread_ops
> page_fault2/disable THP   8147  -18.8%  6613  
>  will-it-scale.per_thread_ops
> brk1/disable THP   957-7.9%  881  
>  will-it-scale.per_thread_ops
> context_switch1/disable THP 237006-2.2%231907 
>  will-it-scale.per_thread_ops
> brk1/disable THP997317-2.0%98 
>  will-it-scale.per_process_ops
> page_fault3/disable THP 467454-1.8%459251 
>  will-it-scale.per_process_ops
> context_switch1/disable THP 224431-1.3%221567 
>  will-it-scale.per_process_ops
> 
> 
> Best regards,
> Haiyan Song
> 
> From: Laurent Dufour [lduf...@linux.vnet.ibm.com]
> Sent: Monday, July 02, 2018 4:59 PM
> To: Song, HaiyanX
> Cc: a...@linux-foundation.org; mho...@kernel.org; pet...@infradead.org; 
> kir...@shutemov.name; a...@linux.intel.com; d...@stgolabs.net; j...@suse.cz; 
> Matthew Wilcox; khand...@linux.vnet.ibm.com; aneesh.ku...@linux.vnet.ibm.com; 
> b...@kernel.crashing.org; m...@ellerman.id.au; pau...@samba.org; Thomas 
> Gleixner; Ingo Molnar; h...@zytor.com; Will Deacon; Sergey Senozhatsky; 
> sergey.senozhatsky.w...@gmail.com; Andrea Arcangeli; Alexei Starovoitov; 
> Wang, Kemi; Daniel Jordan; David Rientjes; Jerome Glisse; Ganesh Mahendran; 
> Minchan Kim; Punit Agrawal; vinayak menon; Yang Shi; 
> linux-ker...@vger.kernel.org; linux...@kvack.org; ha...@linux.vnet.ibm.com; 
> npig...@gmail.com; bsinghar...@gmail.com; paul...@linux.vnet.ibm.com; Tim 
> Chen; linuxppc-dev@lists.ozlabs.org; x...@kernel.org
> Subject: Re: [PATCH v11 00/26] Speculative page faults
> 
> On 11/06/2018 09:49, Song, HaiyanX wrote:
>> Hi Laurent,
>>
>> Regression test for v11 patch serials have been run, some regression is 
>> found by LKP-tools (linux kernel performance)
>> tested on Intel 4s skylake platform. This time only test the cases which 
>> have been run and found regressions on
>> V9 patch serials.
>>
>> The regression result is sorted by the metric will-it-scale.per_thread_ops.
>> branch: Laurent-Dufour/Speculative-page-faults/20180520-045126
>> commit id:
>>   head commit : a7a8993bfe3ccb54ad468b9f1799649e4ad1ff12
>>   base commit : ba98a1cdad71d259a194461b3a61471b49b14df1
>> Benchmark: will-it-scale
>> Download link: https://github.com/antonblanchard/will-it-scale/tree/master
>>
>> Metrics:
>>   will-it-scale.per_process_ops=processes/nr_cpu
>>   will-it-scale.per_thread_ops=threads/nr_cpu
>>   test box: lkp-skl-4sp1(nr_cpu=192,memory=768G)
>> THP: enable / disable
>> nr_task:100%
>>
>> 1. Regressions:
>>
>> a). Enable THP
>> testcase  base   change  head   
>> metric
>> page_fault3/enable THP   10519  -20.5%836  
>> will-it-scale.per_thread_ops
>> page_fault2/enalbe THP8281  -18.8%   6728  
>> will-it-scale.per_thread_ops
>> brk1/eanble THP 998475   -2.2% 976893  
>> will-it-scale.per_process_ops
>> context_switch1/enable THP  223910   -1.3% 220930  
>> will-it-scale.per_process_ops

[PATCH v10 1/6] arch: enable relative relocations for arm64, power and x86

2018-07-04 Thread Ard Biesheuvel
Before updating certain subsystems to use place relative 32-bit
relocations in special sections, to save space and reduce the
number of absolute relocations that need to be processed at runtime
by relocatable kernels, introduce the Kconfig symbol and define it
for some architectures that should be able to support and benefit
from it.

Acked-by: Michael Ellerman 
Reviewed-by: Will Deacon 
Acked-by: Ingo Molnar 
Signed-off-by: Ard Biesheuvel 
---
 arch/Kconfig | 10 ++
 arch/arm64/Kconfig   |  1 +
 arch/powerpc/Kconfig |  1 +
 arch/x86/Kconfig |  1 +
 4 files changed, 13 insertions(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 1aa59063f1fd..2b8b70820002 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -971,4 +971,14 @@ config REFCOUNT_FULL
  against various use-after-free conditions that can be used in
  security flaw exploits.
 
+config HAVE_ARCH_PREL32_RELOCATIONS
+   bool
+   help
+ May be selected by an architecture if it supports place-relative
+ 32-bit relocations, both in the toolchain and in the module loader,
+ in which case relative references can be used in special sections
+ for PCI fixup, initcalls etc which are only half the size on 64 bit
+ architectures, and don't require runtime relocation on relocatable
+ kernels.
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 42c090cf0292..1940c6405d04 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -95,6 +95,7 @@ config ARM64
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+   select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9f2b75fe2c2d..e4fe19789b8b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -177,6 +177,7 @@ config PPC
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if COMPAT
+   select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_CBPF_JITif !PPC64
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f1dbb4ee19d7..e10a3542db7e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -123,6 +123,7 @@ config X86
select HAVE_ARCH_MMAP_RND_BITS  if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if MMU && COMPAT
select HAVE_ARCH_COMPAT_MMAP_BASES  if MMU && COMPAT
+   select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
-- 
2.17.1



[PATCH v10 5/6] PCI: Add support for relative addressing in quirk tables

2018-07-04 Thread Ard Biesheuvel
Allow the PCI quirk tables to be emitted in a way that avoids absolute
references to the hook functions. This reduces the size of the entries,
and, more importantly, makes them invariant under runtime relocation
(e.g., for KASLR)

Acked-by: Bjorn Helgaas 
Acked-by: Michael Ellerman 
Acked-by: Ingo Molnar 
Signed-off-by: Ard Biesheuvel 
---
 drivers/pci/quirks.c | 12 +---
 include/linux/pci.h  | 20 
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index f439de848658..0ba4e446e5db 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -64,9 +64,15 @@ static void pci_do_fixups(struct pci_dev *dev, struct 
pci_fixup *f,
 f->vendor == (u16) PCI_ANY_ID) &&
(f->device == dev->device ||
 f->device == (u16) PCI_ANY_ID)) {
-   calltime = fixup_debug_start(dev, f->hook);
-   f->hook(dev);
-   fixup_debug_report(dev, calltime, f->hook);
+   void (*hook)(struct pci_dev *dev);
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+   hook = offset_to_ptr(>hook_offset);
+#else
+   hook = f->hook;
+#endif
+   calltime = fixup_debug_start(dev, hook);
+   hook(dev);
+   fixup_debug_report(dev, calltime, hook);
}
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..51baa3ab5195 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1795,7 +1795,11 @@ struct pci_fixup {
u16 device; /* Or PCI_ANY_ID */
u32 class;  /* Or PCI_ANY_ID */
unsigned int class_shift;   /* should be 0, 8, 16 */
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+   int hook_offset;
+#else
void (*hook)(struct pci_dev *dev);
+#endif
 };
 
 enum pci_fixup_pass {
@@ -1809,12 +1813,28 @@ enum pci_fixup_pass {
pci_fixup_suspend_late, /* pci_device_suspend_late() */
 };
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,  \
+   class_shift, hook)  \
+   __ADDRESSABLE(hook) \
+   asm(".section " #sec ", \"a\"   \n" \
+   ".balign16  \n" \
+   ".short "   #vendor ", " #device "  \n" \
+   ".long "#class ", " #class_shift "  \n" \
+   ".long "#hook " - . \n" \
+   ".previous  \n");
+#define DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,\
+ class_shift, hook)\
+   __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,   \
+ class_shift, hook)
+#else
 /* Anonymous variables would be nice... */
 #define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class,
\
  class_shift, hook)\
static const struct pci_fixup __PASTE(__pci_fixup_##name,__LINE__) 
__used   \
__attribute__((__section__(#section), aligned((sizeof(void *)\
= { vendor, device, class, class_shift, hook };
+#endif
 
 #define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class,   \
 class_shift, hook) \
-- 
2.17.1



Re: [v2 PATCH 1/2] powerpc: Detect the presence of big-cores via "ibm, thread-groups"

2018-07-04 Thread Gautham R Shenoy
Hello Murilo,

Thanks for reviewing the patch. Replies inline.

On Tue, Jul 03, 2018 at 02:16:55PM -0300, Murilo Opsfelder Araujo wrote:
> On Tue, Jul 03, 2018 at 04:33:50PM +0530, Gautham R. Shenoy wrote:
> > From: "Gautham R. Shenoy" 
> > 
> > On IBM POWER9, the device tree exposes a property array identifed by
> > "ibm,thread-groups" which will indicate which groups of threads share a
> > particular set of resources.
> > 
> > As of today we only have one form of grouping identifying the group of
> > threads in the core that share the L1 cache, translation cache and
> > instruction data flow.
> > 
> > This patch defines the helper function to parse the contents of
> > "ibm,thread-groups" and a new structure to contain the parsed output.
> > 
> > The patch also creates the sysfs file named "small_core_siblings" that
> > returns the physical ids of the threads in the core that share the L1
> > cache, translation cache and instruction data flow.
> > 
> > Signed-off-by: Gautham R. Shenoy 
> > ---
> >  Documentation/ABI/testing/sysfs-devices-system-cpu |   8 ++
> >  arch/powerpc/include/asm/cputhreads.h  |  22 +
> >  arch/powerpc/kernel/setup-common.c | 110 
> > +
> >  arch/powerpc/kernel/sysfs.c|  35 +++
> >  4 files changed, 175 insertions(+)
> > 
> > diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu 
> > b/Documentation/ABI/testing/sysfs-devices-system-cpu
> > index 9c5e7732..53a823a 100644
> > --- a/Documentation/ABI/testing/sysfs-devices-system-cpu
> > +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
> > @@ -487,3 +487,11 @@ Description:   Information about CPU vulnerabilities
> > "Not affected"CPU is not affected by the vulnerability
> > "Vulnerable"  CPU is affected and no mitigation in effect
> > "Mitigation: $M"  CPU is affected and mitigation $M is in effect
> > +
> > +What:  /sys/devices/system/cpu/cpu[0-9]+/small_core_sibings
> 
> s/small_core_sibings/small_core_siblings

Nice catch! Will fix this. 
> 
> By the way, big_core_siblings was mentioned in the introductory
email.

It should be small_core_siblings in the introductory e-mail. My bad.


> 
> > +Date:  03-Jul-2018
> > +KernelVersion: v4.18.0
> > +Contact:   Gautham R. Shenoy 
> > +Description:   List of Physical ids of CPUs which share the the L1 
> > cache,
> > +   translation cache and instruction data-flow with this CPU.
> > +Values:Comma separated list of decimal integers.

[..snip..]

> > +/*
> > + * parse_thread_groups: Parses the "ibm,thread-groups" device tree
> > + *  property for the CPU device node dn and stores
> > + *  the parsed output in the thread_groups
> > + *  structure tg.
> 
> Perhaps document the arguments of this function, as done in the second
> patch?

Will do this. Thanks.

> 
> > + *
> > + * ibm,thread-groups[0..N-1] array defines which group of threads in
> > + * the CPU-device node can be grouped together based on the property.
> > + *
> > + * ibm,thread-groups[0] tells us the property based on which the
> > + * threads are being grouped together. If this value is 1, it implies
> > + * that the threads in the same group share L1, translation cache.
> > + *
> > + * ibm,thread-groups[1] tells us how many such thread groups exist.
> > + *
> > + * ibm,thread-groups[2] tells us the number of threads in each such
> > + * group.
> > + *
> > + * ibm,thread-groups[3..N-1] is the list of threads identified by
> > + * "ibm,ppc-interrupt-server#s" arranged as per their membership in
> > + * the grouping.
> > + *
> > + * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it
> > + * implies that there are 2 groups of 4 threads each, where each group
> > + * of threads share L1, translation cache.
> > + *
> > + * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8}
> > + * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10,
> > + * 11, 12} structure
> > + *
> > + * Returns 0 on success, -EINVAL if the property does not exist,
> > + * -ENODATA if property does not have a value, and -EOVERFLOW if the
> > + * property data isn't large enough.
> > + */
> > +int parse_thread_groups(struct device_node *dn,
> > +   struct thread_groups *tg)
> > +{
> > +   unsigned int nr_groups, threads_per_group, property;
> > +   int i;
> > +   u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
> > +   u32 *thread_list;
> > +   size_t total_threads;
> > +   int ret;
> > +
> > +   ret = of_property_read_u32_array(dn, "ibm,thread-groups",
> > +thread_group_array, 3);
> > +
> > +   if (ret)
> > +   return ret;
> > +
> > +   property = thread_group_array[0];
> > +   nr_groups = thread_group_array[1];
> > +   threads_per_group = thread_group_array[2];
> > +   total_threads = nr_groups * threads_per_group;

Re: [v2 PATCH 2/2] powerpc: Enable CPU_FTR_ASYM_SMT for interleaved big-cores

2018-07-04 Thread Gautham R Shenoy
Hi Murilo,

Thanks for the review.

On Tue, Jul 03, 2018 at 02:53:46PM -0300, Murilo Opsfelder Araujo wrote:
[..snip..]

> > -/* Initialize CPU <=> thread mapping/
> > +   if (has_interleaved_big_core) {
> > +   int key = __builtin_ctzl(CPU_FTR_ASYM_SMT);
> > +
> > +   cur_cpu_spec->cpu_features |= CPU_FTR_ASYM_SMT;
> > +   static_branch_enable(_feature_keys[key]);
> > +   pr_info("Detected interleaved big-cores\n");
> > +   }
> 
> Shouldn't we use cpu_has_feature(CPU_FTR_ASYM_SMT) before setting
> > it?


Are you suggesting that we do the following?

if (has_interleaved_big_core &&
!cpu_has_feature(CPU_FTR_ASYM_SMT)) {
...
}

Currently CPU_FTR_ASYM_SMT is set at compile time for only POWER7
where running the tasks on lower numbered threads give us the benefit
of SMT thread folding. Interleaved big core is a feature introduced
only on POWER9. Thus, we know that CPU_FTR_ASYM_SMT is not set in
cpu_features at this point.

> 
> > +
> > +   /* Initialize CPU <=> thread mapping/
> >  *
> >  * WARNING: We assume that the number of threads is the same for
> >  * every CPU in the system. If that is not the case, then some code
> > -- 
> > 1.9.4
> > 
> 
> -- 
> Murilo

--
Thanks and Regards
gautham.



[PATCH v10 2/6] module: allow symbol exports to be disabled

2018-07-04 Thread Ard Biesheuvel
To allow existing C code to be incorporated into the decompressor or
the UEFI stub, introduce a CPP macro that turns all EXPORT_SYMBOL_xxx
declarations into nops, and #define it in places where such exports
are undesirable. Note that this gets rid of a rather dodgy redefine
of linux/export.h's header guard.

Acked-by: Nicolas Pitre 
Acked-by: Michael Ellerman 
Reviewed-by: Will Deacon 
Acked-by: Ingo Molnar 
Signed-off-by: Ard Biesheuvel 
---
 arch/x86/boot/compressed/kaslr.c  |  5 +
 drivers/firmware/efi/libstub/Makefile |  3 ++-
 include/linux/export.h| 11 ++-
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index b87a7582853d..ed7a123bba42 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -23,11 +23,8 @@
  * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
  * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
  * which is meaningless and will cause compiling error in some cases.
- * So do not include linux/export.h and define EXPORT_SYMBOL(sym)
- * as empty.
  */
-#define _LINUX_EXPORT_H
-#define EXPORT_SYMBOL(sym)
+#define __DISABLE_EXPORTS
 
 #include "misc.h"
 #include "error.h"
diff --git a/drivers/firmware/efi/libstub/Makefile 
b/drivers/firmware/efi/libstub/Makefile
index a34e9290a699..0d0d3483241c 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -20,7 +20,8 @@ cflags-$(CONFIG_EFI_ARMSTUB)  += 
-I$(srctree)/scripts/dtc/libfdt
 KBUILD_CFLAGS  := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
   -D__NO_FORTIFY \
   $(call cc-option,-ffreestanding) \
-  $(call cc-option,-fno-stack-protector)
+  $(call cc-option,-fno-stack-protector) \
+  -D__DISABLE_EXPORTS
 
 GCOV_PROFILE   := n
 KASAN_SANITIZE := n
diff --git a/include/linux/export.h b/include/linux/export.h
index b768d6dd3c90..ea7df303d68d 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -66,7 +66,16 @@ extern struct module __this_module;
__attribute__((section("___ksymtab" sec "+" #sym), used))   \
= { (unsigned long), __kstrtab_##sym }
 
-#if defined(__KSYM_DEPS__)
+#if defined(__DISABLE_EXPORTS)
+
+/*
+ * Allow symbol exports to be disabled completely so that C code may
+ * be reused in other execution contexts such as the UEFI stub or the
+ * decompressor.
+ */
+#define __EXPORT_SYMBOL(sym, sec)
+
+#elif defined(__KSYM_DEPS__)
 
 /*
  * For fine grained build dependencies, we want to tell the build system
-- 
2.17.1



Re: [PATCHv3 3/4] drivers/base: clean up the usage of devices_kset_move_last()

2018-07-04 Thread Rafael J. Wysocki
On Wednesday, July 4, 2018 6:40:09 AM CEST Pingfan Liu wrote:
> On Tue, Jul 3, 2018 at 10:28 PM Rafael J. Wysocki  wrote:
> >
> > On Tuesday, July 3, 2018 8:50:41 AM CEST Pingfan Liu wrote:
> > > Clean up the referring to the code in commit 52cdbdd49853 ("driver core:
> > > correct device's shutdown order"). So later we can revert it safely.
> > >
> > > Cc: Greg Kroah-Hartman 
> > > Cc: Rafael J. Wysocki 
> > > Cc: Grygorii Strashko 
> > > Cc: Christoph Hellwig 
> > > Cc: Bjorn Helgaas 
> > > Cc: Dave Young 
> > > Cc: linux-...@vger.kernel.org
> > > Cc: linuxppc-dev@lists.ozlabs.org
> > > Signed-off-by: Pingfan Liu 
> > > ---
> > >  drivers/base/core.c | 7 ---
> > >  1 file changed, 7 deletions(-)
> > >
> > > diff --git a/drivers/base/core.c b/drivers/base/core.c
> > > index 684b994..db3deb8 100644
> > > --- a/drivers/base/core.c
> > > +++ b/drivers/base/core.c
> > > @@ -127,13 +127,6 @@ static int device_reorder_to_tail(struct device 
> > > *dev, void *not_used)
> > >  {
> > >   struct device_link *link;
> > >
> > > - /*
> > > -  * Devices that have not been registered yet will be put to the ends
> > > -  * of the lists during the registration, so skip them here.
> > > -  */
> > > - if (device_is_registered(dev))
> > > - devices_kset_move_last(dev);
> > > -
> > >   if (device_pm_initialized(dev))
> > >   device_pm_move_last(dev);
> >
> > You can't do this.
> >
> > If you do it, that will break power management in some situations.
> >
> Could you shed light on it? I had a quick browsing of pm code, but it
> is a big function, and I got lost in it.
> If the above code causes failure, then does it imply that the seq in
> devices_kset should be the same as dpm_list?

Generally, yes it should.

> But in device_shutdown(), it only intersect with pm by
> pm_runtime_get_noresume(dev) and pm_runtime_barrier(dev). How do these
> function affect the seq in dpm_list?

They are not related to dpm_list directly.

However, if you shut down a supplier device before its consumer and that
involves power management, then the consumer shutdown may fail and lock up
the system

I asked you elsewhere to clearly describe the problem you are trying to
address.  Please do that in the first place.

Thanks,
Rafael



[PATCH v10 0/6] add support for relative references in special sections

2018-07-04 Thread Ard Biesheuvel
This adds support for emitting special sections such as initcall arrays,
PCI fixups and tracepoints as relative references rather than absolute
references. This reduces the size by 50% on 64-bit architectures, but
more importantly, it removes the need for carrying relocation metadata
for these sections in relocatable kernels (e.g., for KASLR) that needs
to be fixed up at boot time. On arm64, this reduces the vmlinux footprint
of such a reference by 8x (8 byte absolute reference + 24 byte RELA entry
vs 4 byte relative reference)

Patch #3 was sent out before as a single patch. This series supersedes
the previous submission. This version makes relative ksymtab entries
dependent on the new Kconfig symbol HAVE_ARCH_PREL32_RELOCATIONS rather
than trying to infer from kbuild test robot replies for which architectures
it should be blacklisted.

Patch #1 introduces the new Kconfig symbol HAVE_ARCH_PREL32_RELOCATIONS,
and sets it for the main architectures that are expected to benefit the
most from this feature, i.e., 64-bit architectures or ones that use
runtime relocations.

Patch #2 add support for #define'ing __DISABLE_EXPORTS to get rid of
ksymtab/kcrctab sections in decompressor and EFI stub objects when
rebuilding existing C files to run in a different context.

Patches #4 - #6 implement relative references for initcalls, PCI fixups
and tracepoints, respectively, all of which produce sections with order
~1000 entries on an arm64 defconfig kernel with tracing enabled. This
means we save about 28 KB of vmlinux space for each of these patches.

[From the v7 series blurb, which included the jump_label patches as well]:
  For the arm64 kernel, all patches combined reduce the memory footprint of
  vmlinux by about 1.3 MB (using a config copied from Ubuntu that has KASLR
  enabled), of which ~1 MB is the size reduction of the RELA section in .init,
  and the remaining 300 KB is reduction of .text/.data.

Branch:
git://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git 
relative-special-sections-v10

Andrew, this series now has all the prerequisite acks in place. Could you please
take this through the -mm tree? Thanks.

Changes since v9:
- use .discard.addressable section (not .discard) for emitting dummy symbol
  references, to work around a build issue on powerpc
- add acks from Michael, Ingo, Will, James, Petr and Sergey

Changes since v8:
- add Nico's ack (#2)
- drop 'const' qualifier from __ADDRESSABLE(sym) to prevent mismatching
  attributes for the .discard section (#3)
- drop all uses of VMLINUX_SYMBOL_STR(), which is on its way out (#3 - #6)

Changes since v7:
- dropped the jump_label patches, these will be revisited in a separate series
- reorder __DISABLE_EXPORTS with __KSYM_DEPS__ check in #2
- use offset_to_ptr() helper function to abstract the relative pointer
  conversion [int *off -> (ulong)off + *off] (#3 - #6)
- rebase onto v4.16-rc3

Changes since v6:
- drop S390 from patch #1 introducing HAVE_ARCH_PREL32_RELOCATIONS: kbuild
  robot threw me some s390 curveballs, and given that s390 does not define
  CONFIG_RELOCATABLE in the first place, it does not benefit as much from
  relative references as arm64, x86 and power do
- add patch to allow symbol exports to be disabled at compilation unit
  granularity (#2)
- get rid of arm64 vmlinux.lds.S hunk to ensure code generated by __ADDRESSABLE
  gets discarded from the EFI stub - it is no longer needed after adding #2 (#1)
- change _ADDRESSABLE() to emit a data reference, not a code reference - this
  is another simplification made possible by patch #2 (#3)
- add Steven's ack to #6
- split x86 jump_label patch into two (#9, #10)

Changes since v5:
- add missing jump_label prototypes to s390 jump_label.h (#6)
- fix inverted condition in call to jump_entry_is_module_init() (#6)

Changes since v4:
- add patches to convert x86 and arm64 to use relative references for jump
  tables (#6 - #8)
- rename PCI patch and add Bjorn's ack (#4)
- rebase onto v4.15-rc5

Changes since v3:
- fix module unload issue in patch #5 reported by Jessica, by reusing the
  updated routine for_each_tracepoint_range() for the quiescent check at
  module unload time; this requires this routine to be moved before
  tracepoint_module_going() in kernel/tracepoint.c
- add Jessica's ack to #2
- rebase onto v4.14-rc1

Changes since v2:
- Revert my slightly misguided attempt to appease checkpatch, which resulted
  in needless churn and worse code. This v3 is based on v1 with a few tweaks
  that were actually reasonable checkpatch warnings: unnecessary braces (as
  pointed out by Ingo) and other minor whitespace misdemeanors.

Changes since v1:
- Remove checkpatch errors to the extent feasible: in some cases, this
  involves moving extern declarations into C files, and switching to
  struct definitions rather than typedefs. Some errors are impossible
  to fix: please find the remaining ones after the diffstat.
- Used 'int' instead if 'signed int' for the various offset fields: there
  is 

[PATCH v10 3/6] module: use relative references for __ksymtab entries

2018-07-04 Thread Ard Biesheuvel
An ordinary arm64 defconfig build has ~64 KB worth of __ksymtab
entries, each consisting of two 64-bit fields containing absolute
references, to the symbol itself and to a char array containing
its name, respectively.

When we build the same configuration with KASLR enabled, we end
up with an additional ~192 KB of relocations in the .init section,
i.e., one 24 byte entry for each absolute reference, which all need
to be processed at boot time.

Given how the struct kernel_symbol that describes each entry is
completely local to module.c (except for the references emitted
by EXPORT_SYMBOL() itself), we can easily modify it to contain
two 32-bit relative references instead. This reduces the size of
the __ksymtab section by 50% for all 64-bit architectures, and
gets rid of the runtime relocations entirely for architectures
implementing KASLR, either via standard PIE linking (arm64) or
using custom host tools (x86).

Note that the binary search involving __ksymtab contents relies
on each section being sorted by symbol name. This is implemented
based on the input section names, not the names in the ksymtab
entries, so this patch does not interfere with that.

Given that the use of place-relative relocations requires support
both in the toolchain and in the module loader, we cannot enable
this feature for all architectures. So make it dependent on whether
CONFIG_HAVE_ARCH_PREL32_RELOCATIONS is defined.

Acked-by: Jessica Yu 
Acked-by: Michael Ellerman 
Reviewed-by: Will Deacon 
Acked-by: Ingo Molnar 
Signed-off-by: Ard Biesheuvel 
---
 arch/x86/include/asm/Kbuild   |  1 +
 arch/x86/include/asm/export.h |  5 ---
 include/asm-generic/export.h  | 12 -
 include/linux/compiler.h  | 19 
 include/linux/export.h| 46 +++-
 kernel/module.c   | 32 +++---
 6 files changed, 91 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index de690c2d2e33..a0ab9ab61c75 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -8,5 +8,6 @@ generated-y += xen-hypercalls.h
 
 generic-y += dma-contiguous.h
 generic-y += early_ioremap.h
+generic-y += export.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h
deleted file mode 100644
index 2a51d66689c5..
--- a/arch/x86/include/asm/export.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_64BIT
-#define KSYM_ALIGN 16
-#endif
-#include 
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index 68efb950a918..4d73e6e3c66c 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -5,12 +5,10 @@
 #define KSYM_FUNC(x) x
 #endif
 #ifdef CONFIG_64BIT
-#define __put .quad
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 8
 #endif
 #else
-#define __put .long
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 4
 #endif
@@ -19,6 +17,16 @@
 #define KCRC_ALIGN 4
 #endif
 
+.macro __put, val, name
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+   .long   \val - ., \name - .
+#elif defined(CONFIG_64BIT)
+   .quad   \val, \name
+#else
+   .long   \val, \name
+#endif
+.endm
+
 /*
  * note on .section use: @progbits vs %progbits nastiness doesn't matter,
  * since we immediately emit into those sections anyway.
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 42506e4d1f53..61c844d4ab48 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -280,6 +280,25 @@ unsigned long read_word_at_a_time(const void *addr)
 
 #endif /* __KERNEL__ */
 
+/*
+ * Force the compiler to emit 'sym' as a symbol, so that we can reference
+ * it from inline assembler. Necessary in case 'sym' could be inlined
+ * otherwise, or eliminated entirely due to lack of references that are
+ * visible to the compiler.
+ */
+#define __ADDRESSABLE(sym) \
+   static void * __attribute__((section(".discard.addressable"), used)) \
+   __PASTE(__addressable_##sym, __LINE__) = (void *)
+
+/**
+ * offset_to_ptr - convert a relative memory offset to an absolute pointer
+ * @off:   the address of the 32-bit offset value
+ */
+static inline void *offset_to_ptr(const int *off)
+{
+   return (void *)((unsigned long)off + *off);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #ifndef __optimize
diff --git a/include/linux/export.h b/include/linux/export.h
index ea7df303d68d..ae072bc5aacf 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -18,12 +18,6 @@
 #define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x)
 
 #ifndef __ASSEMBLY__
-struct kernel_symbol
-{
-   unsigned long value;
-   const char *name;
-};
-
 #ifdef MODULE
 extern struct module __this_module;
 #define THIS_MODULE (&__this_module)
@@ -54,17 +48,47 @@ extern struct module __this_module;
 #define __CRC_SYMBOL(sym, sec)
 #endif
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#include 
+/*
+ * Emit the ksymtab entry as a pair of 

[PATCH v2 1/2] powernv:opal-sensor-groups: Add support to enable sensor groups

2018-07-04 Thread Shilpasri G Bhat
Adds support to enable/disable a sensor group at runtime. This
can be used to select the sensor groups that needs to be copied to
main memory by OCC. Sensor groups like power, temperature, current,
voltage, frequency, utilization can be enabled/disabled at runtime.

Signed-off-by: Shilpasri G Bhat 
---
- Rebased on master. No changes from v1.

 arch/powerpc/include/asm/opal-api.h|  1 +
 arch/powerpc/include/asm/opal.h|  2 ++
 .../powerpc/platforms/powernv/opal-sensor-groups.c | 28 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |  1 +
 4 files changed, 32 insertions(+)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 3bab299..56a94a1 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -206,6 +206,7 @@
 #define OPAL_NPU_SPA_CLEAR_CACHE   160
 #define OPAL_NPU_TL_SET161
 #define OPAL_SENSOR_READ_U64   162
+#define OPAL_SENSOR_GROUP_ENABLE   163
 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   164
 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   165
 #define OPAL_LAST  165
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index e1b2910..fc0550e 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -292,6 +292,7 @@ int64_t opal_imc_counters_init(uint32_t type, uint64_t 
address,
 int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr);
 int opal_set_power_shift_ratio(u32 handle, int token, u32 psr);
 int opal_sensor_group_clear(u32 group_hndl, int token);
+int opal_sensor_group_enable(u32 group_hndl, int token, bool enable);
 
 s64 opal_signal_system_reset(s32 cpu);
 s64 opal_quiesce(u64 shutdown_type, s32 cpu);
@@ -326,6 +327,7 @@ extern int opal_async_wait_response_interruptible(uint64_t 
token,
struct opal_msg *msg);
 extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data);
 extern int opal_get_sensor_data_u64(u32 sensor_hndl, u64 *sensor_data);
+extern int sensor_group_enable(u32 grp_hndl, bool enable);
 
 struct rtc_time;
 extern time64_t opal_get_boot_time(void);
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c 
b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
index 541c9ea..f7d04b6 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -32,6 +32,34 @@ struct sg_attr {
struct sg_attr *sgattrs;
 } *sgs;
 
+int sensor_group_enable(u32 handle, bool enable)
+{
+   struct opal_msg msg;
+   int token, ret;
+
+   token = opal_async_get_token_interruptible();
+   if (token < 0)
+   return token;
+
+   ret = opal_sensor_group_enable(handle, token, enable);
+   if (ret == OPAL_ASYNC_COMPLETION) {
+   ret = opal_async_wait_response(token, );
+   if (ret) {
+   pr_devel("Failed to wait for the async response\n");
+   ret = -EIO;
+   goto out;
+   }
+   ret = opal_error_code(opal_get_async_rc(msg));
+   } else {
+   ret = opal_error_code(ret);
+   }
+
+out:
+   opal_async_release_token(token);
+   return ret;
+}
+EXPORT_SYMBOL_GPL(sensor_group_enable);
+
 static ssize_t sg_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
 {
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index a8d9b40..8268a1e 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -327,3 +327,4 @@ OPAL_CALL(opal_npu_tl_set,  
OPAL_NPU_TL_SET);
 OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,
OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
 OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,
OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
 OPAL_CALL(opal_sensor_read_u64,OPAL_SENSOR_READ_U64);
+OPAL_CALL(opal_sensor_group_enable,OPAL_SENSOR_GROUP_ENABLE);
-- 
1.8.3.1



[PATCH] cxl: Fix wrong comparison in cxl_adapter_context_get()

2018-07-04 Thread Vaibhav Jain
Function atomic_inc_unless_negative() returns a bool to indicate
success/failure. However cxl_adapter_context_get() wrongly compares
the return value against '>=0' which will always be true. The patch
fixes this comparison to '==0' there by also fixing this compile time
warning:

drivers/misc/cxl/main.c:290 cxl_adapter_context_get()
warn: 'atomic_inc_unless_negative(>contexts_num)' is unsigned

Cc: sta...@vger.kernel.org
Fixes: 70b565bbdb91 ("cxl: Prevent adapter reset if an active context exists")
Reported-by: Dan Carpenter 
Signed-off-by: Vaibhav Jain 
---
 drivers/misc/cxl/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index c1ba0d42cbc8..e0f29b8a872d 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -287,7 +287,7 @@ int cxl_adapter_context_get(struct cxl *adapter)
int rc;
 
rc = atomic_inc_unless_negative(>contexts_num);
-   return rc >= 0 ? 0 : -EBUSY;
+   return rc ? 0 : -EBUSY;
 }
 
 void cxl_adapter_context_put(struct cxl *adapter)
-- 
2.17.1



Re: [PATCH v2 2/2] hwmon: ibmpowernv: Add attributes to enable/disable sensor groups

2018-07-04 Thread Guenter Roeck

On 07/04/2018 02:16 AM, Shilpasri G Bhat wrote:

On-Chip-Controller(OCC) is an embedded micro-processor in POWER9 chip
which measures various system and chip level sensors. These sensors
comprises of environmental sensors (like power, temperature, current
and voltage) and performance sensors (like utilization, frequency).
All these sensors are copied to main memory at a regular interval of
100ms. OCC provides a way to select a group of sensors that is copied
to the main memory to increase the update frequency of selected sensor
groups. When a sensor-group is disabled, OCC will not copy it to main
memory and those sensors read 0 values.

This patch provides support for enabling/disabling the sensor groups
like power, temperature, current and voltage. This patch adds new
per-senor sysfs attribute to disable and enable them.

Signed-off-by: Shilpasri G Bhat 
---
Changes from v1:
- Add per-sensor 'enable' attribute
- Return -ENODATA when sensor is disabled

  Documentation/hwmon/sysfs-interface |  22 +++
  drivers/hwmon/ibmpowernv.c  | 281 +++-
  2 files changed, 264 insertions(+), 39 deletions(-)

diff --git a/Documentation/hwmon/sysfs-interface 
b/Documentation/hwmon/sysfs-interface
index fc337c3..38ab05c 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface


Separate patch please.


@@ -184,6 +184,11 @@ vrmVoltage Regulator Module version number.
Affects the way the driver calculates the CPU core reference
voltage from the vid pins.
  
+in[0-*]_enable	Enable or disable the sensor

+   1 : Enable
+   0 : Disable
+   RW
+
  Also see the Alarms section for status flags associated with voltages.
  
  
@@ -409,6 +414,12 @@ temp_reset_history

Reset temp_lowest and temp_highest for all sensors
WO
  
+temp[1-*]_enable

+   Enable or disable the sensor
+   1 : Enable
+   0 : Disable > +  RW
+
  Some chips measure temperature using external thermistors and an ADC, and
  report the temperature measurement as a voltage. Converting this voltage
  back to a temperature (or the other way around for limits) requires
@@ -468,6 +479,12 @@ curr_reset_history
Reset currX_lowest and currX_highest for all sensors
WO
  
+curr[1-*]_enable

+   Enable or disable the sensor
+   1 : Enable
+   0 : Disable
+   RW
+
  Also see the Alarms section for status flags associated with currents.
  
  *

@@ -566,6 +583,11 @@ power[1-*]_critCritical maximum power.
Unit: microWatt
RW
  
+power[1-*]_enable		Enable or disable the sensor

+   1 : Enable
+   0 : Disable
+   RW
+
  Also see the Alarms section for status flags associated with power readings.
  


Any reason for excluding fan, energy, humidity ?


  **
diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c
index f829dad..61e04cf 100644
--- a/drivers/hwmon/ibmpowernv.c
+++ b/drivers/hwmon/ibmpowernv.c
@@ -90,8 +90,28 @@ struct sensor_data {
char label[MAX_LABEL_LEN];
char name[MAX_ATTR_LEN];
struct device_attribute dev_attr;
+   struct sensor_group_data *sgdata;
+   struct sensor_data *sdata[3];
+   bool enable;
  };
  
+static struct sensor_group_data {

+   u32 gid;
+   u32 nr_phandle;
+   u32 nr_sensor;
+   enum sensors type;
+   const __be32 *phandles;
+   struct sensor_data **sensors;
+   bool enable;
+} *sg_data;
+
+/*
+ * To synchronise writes to struct sensor_data.enable and
+ * struct sensor_group_data.enable
+ */
+DEFINE_MUTEX(sensor_groups_mutex);


Not as global variable, please.


+static int nr_sensor_groups;
+


Do those have to be static variables ? Why not in struct platform_data ?


  struct platform_data {
const struct attribute_group *attr_groups[MAX_SENSOR_TYPE + 1];
u32 sensors_count; /* Total count of sensors from each group */
@@ -105,6 +125,9 @@ static ssize_t show_sensor(struct device *dev, struct 
device_attribute *devattr,
ssize_t ret;
u64 x;
  
+	if (sdata->sgdata && !sdata->enable)

+   return -ENODATA;
+


This return code should be documented in the ABI.


ret =  opal_get_sensor_data_u64(sdata->id, );
  
  	if (ret)

@@ -120,6 +143,74 @@ static ssize_t show_sensor(struct device *dev, struct 
device_attribute *devattr,
return sprintf(buf, "%llu\n", x);
  }
  
+static ssize_t show_enable(struct device *dev,

+  struct device_attribute *devattr, char *buf)
+{
+   struct sensor_data *sdata = container_of(devattr, struct sensor_data,
+dev_attr);
+
+

Re: [bug report] cxl: Prevent adapter reset if an active context exists

2018-07-04 Thread Vaibhav Jain


Dan Carpenter  writes:

> The patch 70b565bbdb91: "cxl: Prevent adapter reset if an active
> context exists" from Oct 14, 2016, leads to the following static
> checker warning:
>
>   drivers/misc/cxl/main.c:290 cxl_adapter_context_get()
>   warn: 'atomic_inc_unless_negative(>contexts_num)' is unsigned
>
Thanks for reporting this. I have sent out a patch to fix this at
http://patchwork.ozlabs.org/patch/939426/

-- 
Vaibhav Jain 
Linux Technology Center, IBM India Pvt. Ltd.



Re: How is this possible - Register r30 contains 0xc2236400 instead of 0xc6236400

2018-07-04 Thread Michael Ellerman
Christophe LEROY  writes:

> Kernel Oops at 0xc0334d5c for reading at address 0xc2236450 which 
> corresponds to r30 + 80
>
> But r30 should contain what's at r3 + 16 that is at 0xc619ec10 so r30 
> should be c6236400 as shown below (print_hex_dump(regs->gpr[3]) added at 
> end of __die() )
>
> So how can r30 contain 0xc2236400 instead ?

The simplest answer is that memory was modified between the time we
loaded it into r30 and when you print it.

So it did contain 0xc2236400 but has since been modified to now contain
0xc6236400.

The thing that makes me less certain, is that c6 would be the correct
value (I think?), so it's been modified back to the correct value, which
seems lucky.

Mysterious.

cheers

> And this is not random, it happens at most if not every startup.
>
> c0334d44 :
> c0334d44:   7c 08 02 a6 mflr    r0
> c0334d48:   94 21 ff f0 stwu    r1,-16(r1)
> c0334d4c:   bf c1 00 08 stmw    r30,8(r1)
> c0334d50:   90 01 00 14 stw r0,20(r1)
> c0334d54:   83 c3 00 10 lwz r30,16(r3)
> c0334d58:   81 23 00 a8 lwz r9,168(r3)
> c0334d5c:   81 5e 00 50 lwz r10,80(r30)
>
>
> [  152.288237] Unable to handle kernel paging request for data at 
> address 0xc2236450
> [  152.295444] Faulting instruction address: 0xc0334d5c
> [  152.300369] Oops: Kernel access of bad area, sig: 11 [#1]
> [  152.305665] BE PREEMPT DEBUG_PAGEALLOC CMPC885
> [  152.313630] CPU: 0 PID: 269 Comm: in:imuxsock Not tainted 
> 4.14.52-00025-g5bada429cf-dirty #36
> [  152.322729] task: c623e100 task.stack: c650c000
> [  152.327202] NIP:  c0334d5c LR: c043602c CTR: c0435fb8
> [  152.332200] REGS: c650dc00 TRAP: 0300   Not tainted 
> (4.14.52-00025-g5bada429cf-dirty)
> [  152.340699] MSR:  9032   CR: 28002822 XER: 2000
> [  152.347333] DAR: c2236450 DSISR: c000
> [  152.347333] GPR00: c043602c c650dcb0 c623e100 c619ec00 c642c060 
> 0008 0018 c650dd4c
> [  152.347333] GPR08: c0435fb8 02b0 c068d830 0004 28004822 
> 100d4208  7780c848
> [  152.347333] GPR16: 0ff58398 777674b0 1024b050 1024b0a8 1005ddbc 
> 0ff5a7bc 03e8 
> [  152.347333] GPR24: 008e c5011650 c650deb8 008e c619ec00 
> 0040 c2236400 c619ec00
> [  152.385015] NIP [c0334d5c] sock_wfree+0x18/0xa4
> [  152.389458] LR [c043602c] unix_destruct_scm+0x74/0x88
> [  152.394399] Call Trace:
> [  152.396868] [c650dcb0] [c006348c] ns_to_timeval+0x4c/0x7c (unreliable)
> [  152.403305] [c650dcc0] [c043602c] unix_destruct_scm+0x74/0x88
> [  152.408999] [c650dcf0] [c033a10c] skb_release_head_state+0x8c/0x110
> [  152.415184] [c650dd00] [c033a3c4] skb_release_all+0x18/0x50
> [  152.420690] [c650dd10] [c033a7cc] consume_skb+0x38/0xec
> [  152.425869] [c650dd20] [c0342d7c] skb_free_datagram+0x1c/0x68
> [  152.431535] [c650dd30] [c0435c8c] unix_dgram_recvmsg+0x19c/0x4ac
> [  152.437476] [c650ddb0] [c0331370] ___sys_recvmsg+0x98/0x138
> [  152.442984] [c650deb0] [c0333280] __sys_recvmsg+0x40/0x84
> [  152.448321] [c650df10] [c0333680] SyS_socketcall+0xb8/0x1d4
> [  152.453832] [c650df40] [c000d1ac] ret_from_syscall+0x0/0x38
> [  152.459286] Instruction dump:
> [  152.462225] 41beffac 4b58 3883 4ba0 3881 4b98 
> 7c0802a6 9421fff0
> [  152.469881] bfc10008 90010014 83c30010 812300a8 <815e0050> 3bfe00e0 
> 71480200 4082003c
> [  152.477739] c619ec00: 00 00 00 00 00 00 00 00 00 00 00 23 6f d9 b1 65
> [  152.484100] c619ec10: c6 23 64 00 00 00 00 00 c6 42 c0 60 00 00 03 e8
> [  152.490471] c619ec20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> [  152.496837] c619ec30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> [  152.503205] c619ec40: 00 00 00 00 00 00 00 00 00 00 00 00 c0 43 5f b8
> [  152.509575] c619ec50: 00 00 00 00 00 00 00 00 00 00 00 8e 00 00 00 00
> [  152.515943] c619ec60: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> [  152.522311] c619ec70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> [  152.528680] c619ec80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> [  152.535048] c619ec90: 00 00 ff ff 00 00 ff ff c6 42 30 8e c6 42 31 50
> [  152.541417] c619eca0: c6 42 30 00 c6 42 30 00 00 00 02 b0 00 00 00 01
> [  152.547781] ---[ end trace 0710a9d231876a27 ]---
>
> Christophe


Re: [PATCH v5 5/7] powerpc/pseries: flush SLB contents on SLB MCE errors.

2018-07-04 Thread Michael Ellerman
Michal Suchánek  writes:
> On Tue, 3 Jul 2018 08:08:14 +1000
> Nicholas Piggin  wrote:
>> On Mon, 02 Jul 2018 11:17:06 +0530
>> Mahesh J Salgaonkar  wrote:
>> > From: Mahesh Salgaonkar 
>> > 
>> > On pseries, as of today system crashes if we get a machine check
>> > exceptions due to SLB errors. These are soft errors and can be
>> > fixed by flushing the SLBs so the kernel can continue to function
>> > instead of system crash. We do this in real mode before turning on
>> > MMU. Otherwise we would run into nested machine checks. This patch
>> > now fetches the rtas error log in real mode and flushes the SLBs on
>> > SLB errors.
>> > 
>> > Signed-off-by: Mahesh Salgaonkar 
>> > ---
>> >  arch/powerpc/include/asm/book3s/64/mmu-hash.h |1 
>> >  arch/powerpc/include/asm/machdep.h|1 
>> >  arch/powerpc/kernel/exceptions-64s.S  |   42
>> > + arch/powerpc/kernel/mce.c
>> > |   16 +++- arch/powerpc/mm/slb.c |
>> > 6 +++ arch/powerpc/platforms/powernv/opal.c |1 
>> >  arch/powerpc/platforms/pseries/pseries.h  |1 
>> >  arch/powerpc/platforms/pseries/ras.c  |   51
>> > +
>> > arch/powerpc/platforms/pseries/setup.c|1 9 files
>> > changed, 116 insertions(+), 4 deletions(-) 
>> 
>> 
>> > +TRAMP_REAL_BEGIN(machine_check_pSeries_early)
>> > +BEGIN_FTR_SECTION
>> > +  EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
>> > +  mr  r10,r1  /* Save r1 */
>> > +  ld  r1,PACAMCEMERGSP(r13)   /* Use MC emergency
>> > stack */
>> > +  subir1,r1,INT_FRAME_SIZE/* alloc stack
>> > frame  */
>> > +  mfspr   r11,SPRN_SRR0   /* Save SRR0 */
>> > +  mfspr   r12,SPRN_SRR1   /* Save SRR1 */
>> > +  EXCEPTION_PROLOG_COMMON_1()
>> > +  EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
>> > +  EXCEPTION_PROLOG_COMMON_3(0x200)
>> > +  addir3,r1,STACK_FRAME_OVERHEAD
>> > +  BRANCH_LINK_TO_FAR(machine_check_early) /* Function call
>> > ABI */  
>> 
>> Is there any reason you can't use the existing
>> machine_check_powernv_early code to do all this?
>
> Code sharing is nice but if we envision this going to stable kernels
> butchering the existing handler is going to be a nightmare. The code is
> quite a bit different between kernel versions.

I'm not sure if we'll send it to stable kernels. But we obviously will
back port it to some distros :)

So if sharing the code is a significant impediment to that, then I'm
happy if we don't share code initially. That could be done as a
follow-up to this series.

cheers


Re: How is this possible - Register r30 contains 0xc2236400 instead of 0xc6236400

2018-07-04 Thread Christophe LEROY




Le 04/07/2018 à 15:45, Segher Boessenkool a écrit :

On Wed, Jul 04, 2018 at 11:11:59PM +1000, Michael Ellerman wrote:

Christophe LEROY  writes:


Kernel Oops at 0xc0334d5c for reading at address 0xc2236450 which
corresponds to r30 + 80

But r30 should contain what's at r3 + 16 that is at 0xc619ec10 so r30
should be c6236400 as shown below (print_hex_dump(regs->gpr[3]) added at
end of __die() )

So how can r30 contain 0xc2236400 instead ?


The simplest answer is that memory was modified between the time we
loaded it into r30 and when you print it.

So it did contain 0xc2236400 but has since been modified to now contain
0xc6236400.

The thing that makes me less certain, is that c6 would be the correct
value (I think?), so it's been modified back to the correct value, which
seems lucky.

Mysterious.


That depends.  Is this reproducible at all?  It is a single bit flip.


Yes it is reproductible.

It isn't reproduced if I modify the function in such a way that there is 
an additional (unrelated) instruction before that read.


It isn't reproduced if I move the kernel base address to 0xd000 or 
0xb000 instead of 0xc000


If I force a second read of this address in the function, I get the same 
value (in another register).


If I add a dcbi before the second read I get the correct address.

So it still looks mysterious to me ...

Christophe




Segher



Re: How is this possible - Register r30 contains 0xc2236400 instead of 0xc6236400

2018-07-04 Thread Segher Boessenkool
On Wed, Jul 04, 2018 at 11:11:59PM +1000, Michael Ellerman wrote:
> Christophe LEROY  writes:
> 
> > Kernel Oops at 0xc0334d5c for reading at address 0xc2236450 which 
> > corresponds to r30 + 80
> >
> > But r30 should contain what's at r3 + 16 that is at 0xc619ec10 so r30 
> > should be c6236400 as shown below (print_hex_dump(regs->gpr[3]) added at 
> > end of __die() )
> >
> > So how can r30 contain 0xc2236400 instead ?
> 
> The simplest answer is that memory was modified between the time we
> loaded it into r30 and when you print it.
> 
> So it did contain 0xc2236400 but has since been modified to now contain
> 0xc6236400.
> 
> The thing that makes me less certain, is that c6 would be the correct
> value (I think?), so it's been modified back to the correct value, which
> seems lucky.
> 
> Mysterious.

That depends.  Is this reproducible at all?  It is a single bit flip.


Segher


[PATCH] ASoC: fsl_spdif: Use 64-bit arithmetic instead of 32-bit

2018-07-04 Thread Gustavo A. R. Silva
Add suffix ULL to constant 64 in order to give the compiler complete
information about the proper arithmetic to use.

Notice that such constant is used in a context that expects an
expression of type u64 (64 bits, unsigned) and the following
expression is currently being evaluated using 32-bit arithmetic:

rate[index] * txclk_df * 64

Addresses-Coverity-ID: 1222129 ("Unintentional integer overflow")
Signed-off-by: Gustavo A. R. Silva 
---
 sound/soc/fsl/fsl_spdif.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c
index 9b59d87..740b90d 100644
--- a/sound/soc/fsl/fsl_spdif.c
+++ b/sound/soc/fsl/fsl_spdif.c
@@ -1118,7 +1118,7 @@ static u32 fsl_spdif_txclk_caldiv(struct fsl_spdif_priv 
*spdif_priv,
 
for (sysclk_df = sysclk_dfmin; sysclk_df <= sysclk_dfmax; sysclk_df++) {
for (txclk_df = 1; txclk_df <= 128; txclk_df++) {
-   rate_ideal = rate[index] * txclk_df * 64;
+   rate_ideal = rate[index] * txclk_df * 64ULL;
if (round)
rate_actual = clk_round_rate(clk, rate_ideal);
else
-- 
2.7.4



Re: [PATCH] powerpc/mpic: Cleanup irq vector accounting

2018-07-04 Thread Michael Ellerman
Bharat Bhushan  writes:

> Available vector space accounts ipis and timer interrupts
> while spurious vector was not accounted.

OK. What is the symptom of that? Nothing? Total system crash?

Looks like this can be tagged:

Fixes: 0a4081641d72 ("powerpc/mpic: FSL MPIC error interrupt support.")

Which added the code that uses "12".

> Also later
> mpic_setup_error_int() escape one more vector, seemingly it
> assumes one spurious vector.

Ah right, I get it now.

So there is no bug. It's just a disagreement about whether the "intvec"
argument to mpic_setup_error_int() indicates the first number that's
free to use or the last number that has been allocated.

Right?

cheers

> Signed-off-by: Bharat Bhushan 
> ---
>  arch/powerpc/sysdev/fsl_mpic_err.c | 2 +-
>  arch/powerpc/sysdev/mpic.c | 6 +++---
>  2 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c 
> b/arch/powerpc/sysdev/fsl_mpic_err.c
> index 488ec45..2a98837 100644
> --- a/arch/powerpc/sysdev/fsl_mpic_err.c
> +++ b/arch/powerpc/sysdev/fsl_mpic_err.c
> @@ -76,7 +76,7 @@ int mpic_setup_error_int(struct mpic *mpic, int intvec)
>   mpic->flags |= MPIC_FSL_HAS_EIMR;
>   /* allocate interrupt vectors for error interrupts */
>   for (i = MPIC_MAX_ERR - 1; i >= 0; i--)
> - mpic->err_int_vecs[i] = --intvec;
> + mpic->err_int_vecs[i] = intvec--;
>  
>   return 0;
>  }
> diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
> index 1d4e0ef6..e098d1e 100644
> --- a/arch/powerpc/sysdev/mpic.c
> +++ b/arch/powerpc/sysdev/mpic.c
> @@ -1380,12 +1380,12 @@ struct mpic * __init mpic_alloc(struct device_node 
> *node,
>* global vector number space, as in case of ipis
>* and timer interrupts.
>*
> -  * Available vector space = intvec_top - 12, where 12
> +  * Available vector space = intvec_top - 13, where 13
>* is the number of vectors which have been consumed by
> -  * ipis and timer interrupts.
> +  * ipis, timer interrupts and spurious.
>*/
>   if (fsl_version >= 0x401) {
> - ret = mpic_setup_error_int(mpic, intvec_top - 12);
> + ret = mpic_setup_error_int(mpic, intvec_top - 13);
>   if (ret)
>   return NULL;
>   }
> -- 
> 1.9.3


[bug report] cxl: Prevent adapter reset if an active context exists

2018-07-04 Thread Dan Carpenter
Hello Vaibhav Jain,

The patch 70b565bbdb91: "cxl: Prevent adapter reset if an active
context exists" from Oct 14, 2016, leads to the following static
checker warning:

drivers/misc/cxl/main.c:290 cxl_adapter_context_get()
warn: 'atomic_inc_unless_negative(>contexts_num)' is unsigned

drivers/misc/cxl/main.c
   285  int cxl_adapter_context_get(struct cxl *adapter)
   286  {
   287  int rc;
   288  
   289  rc = atomic_inc_unless_negative(>contexts_num);
   290  return rc >= 0 ? 0 : -EBUSY;

atomic_inc_unless_negative() returns bool so it's always >= 0.

   291  }

regards,
dan carpenter


[RFC PATCH 1/2] dma-mapping: Clean up dma_set_*mask() hooks

2018-07-04 Thread Robin Murphy
Arch-specific implementions for dma_set_{coherent_,}mask() currently
rely on an inconsistent mix of arch-defined Kconfig symbols and macro
overrides. Now that we have a nice centralised home for DMA API gubbins,
let's consolidate these loose ends under consistent config options.

Signed-off-by: Robin Murphy 
---

Here's hoping the buildbot comes by to point out what I've inevitably
missed, although I did check a cursory cross-compile of ppc64_defconfig
to iron out the obvious howlers.

The motivation here is that I'm looking at adding set_mask overrides
for arm64, and having discovered a bit of a mess it seemed prudent to
clean up before ingraining it any more.

Robin.


 arch/arm/Kconfig   | 3 ---
 arch/powerpc/Kconfig   | 4 +---
 arch/powerpc/include/asm/dma-mapping.h | 3 ---
 include/linux/dma-mapping.h| 4 +++-
 kernel/dma/Kconfig | 6 ++
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 843edfd000be..ab0c081b6ec2 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -227,9 +227,6 @@ config ZONE_DMA
 config ARCH_SUPPORTS_UPROBES
def_bool y
 
-config ARCH_HAS_DMA_SET_COHERENT_MASK
-   bool
-
 config GENERIC_ISA_DMA
bool
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9f2b75fe2c2d..08d85412d783 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -119,9 +119,6 @@ config GENERIC_HWEIGHT
bool
default y
 
-config ARCH_HAS_DMA_SET_COHERENT_MASK
-bool
-
 config PPC
bool
default y
@@ -129,6 +126,7 @@ config PPC
# Please keep this list sorted alphabetically.
#
select ARCH_HAS_DEVMEM_IS_ALLOWED
+   select ARCH_HAS_DMA_SET_MASK
select ARCH_HAS_DMA_SET_COHERENT_MASK
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
diff --git a/arch/powerpc/include/asm/dma-mapping.h 
b/arch/powerpc/include/asm/dma-mapping.h
index 8fa394520af6..fe912c4367f2 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -107,9 +107,6 @@ static inline void set_dma_offset(struct device *dev, 
dma_addr_t off)
dev->archdata.dma_offset = off;
 }
 
-#define HAVE_ARCH_DMA_SET_MASK 1
-extern int dma_set_mask(struct device *dev, u64 dma_mask);
-
 extern u64 __dma_get_required_mask(struct device *dev);
 
 #define ARCH_HAS_DMA_MMAP_COHERENT
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index ffeca3ab59c0..30fe0c900420 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -596,7 +596,9 @@ static inline int dma_supported(struct device *dev, u64 
mask)
return ops->dma_supported(dev, mask);
 }
 
-#ifndef HAVE_ARCH_DMA_SET_MASK
+#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK
+int dma_set_mask(struct device *dev, u64 mask);
+#else
 static inline int dma_set_mask(struct device *dev, u64 mask)
 {
if (!dev->dma_mask || !dma_supported(dev, mask))
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 9bd54304446f..01001371d892 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -16,6 +16,12 @@ config ARCH_DMA_ADDR_T_64BIT
 config HAVE_GENERIC_DMA_COHERENT
bool
 
+config ARCH_HAS_DMA_SET_MASK
+bool
+
+config ARCH_HAS_DMA_SET_COHERENT_MASK
+bool
+
 config ARCH_HAS_SYNC_DMA_FOR_DEVICE
bool
 
-- 
2.17.1.dirty



[RFC PATCH 2/2] dma-mapping: Clean up dma_get_required_mask() hooks

2018-07-04 Thread Robin Murphy
As for the other mask-related hooks, standardise the arch override into
a Kconfig option, and also pull the generic implementation into the DMA
mapping code rather than having it hide away in the platform bus code.

Signed-off-by: Robin Murphy 
---
 arch/ia64/Kconfig   |  1 +
 arch/ia64/include/asm/dma-mapping.h |  2 --
 arch/powerpc/Kconfig|  1 +
 arch/powerpc/include/asm/device.h   |  2 --
 drivers/base/platform.c | 23 ---
 drivers/pci/controller/vmd.c|  4 ++--
 include/linux/dma-mapping.h |  2 +-
 kernel/dma/Kconfig  |  3 +++
 kernel/dma/mapping.c| 23 +++
 9 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index ff861420b8f5..a6274e79b155 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -12,6 +12,7 @@ menu "Processor type and features"
 
 config IA64
bool
+   select ARCH_HAS_DMA_GET_REQUIRED_MASK
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
select PCI if (!IA64_HP_SIM)
diff --git a/arch/ia64/include/asm/dma-mapping.h 
b/arch/ia64/include/asm/dma-mapping.h
index 76e4d6632d68..522745ae67bb 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -10,8 +10,6 @@
 #include 
 #include 
 
-#define ARCH_HAS_DMA_GET_REQUIRED_MASK
-
 extern const struct dma_map_ops *dma_ops;
 extern struct ia64_machine_vector ia64_mv;
 extern void set_iommu_machvec(void);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 08d85412d783..3581c576c762 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -126,6 +126,7 @@ config PPC
# Please keep this list sorted alphabetically.
#
select ARCH_HAS_DEVMEM_IS_ALLOWED
+   select ARCH_HAS_DMA_GET_REQUIRED_MASK
select ARCH_HAS_DMA_SET_MASK
select ARCH_HAS_DMA_SET_COHERENT_MASK
select ARCH_HAS_ELF_RANDOMIZE
diff --git a/arch/powerpc/include/asm/device.h 
b/arch/powerpc/include/asm/device.h
index 0245bfcaac32..17cceab5ccf9 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -54,6 +54,4 @@ struct pdev_archdata {
u64 dma_mask;
 };
 
-#define ARCH_HAS_DMA_GET_REQUIRED_MASK
-
 #endif /* _ASM_POWERPC_DEVICE_H */
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index dff82a3c2caa..dae427a77b0a 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -16,7 +16,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -1179,28 +1178,6 @@ int __init platform_bus_init(void)
return error;
 }
 
-#ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK
-u64 dma_get_required_mask(struct device *dev)
-{
-   u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
-   u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT));
-   u64 mask;
-
-   if (!high_totalram) {
-   /* convert to mask just covering totalram */
-   low_totalram = (1 << (fls(low_totalram) - 1));
-   low_totalram += low_totalram - 1;
-   mask = low_totalram;
-   } else {
-   high_totalram = (1 << (fls(high_totalram) - 1));
-   high_totalram += high_totalram - 1;
-   mask = (((u64)high_totalram) << 32) + 0x;
-   }
-   return mask;
-}
-EXPORT_SYMBOL_GPL(dma_get_required_mask);
-#endif
-
 static __initdata LIST_HEAD(early_platform_driver_list);
 static __initdata LIST_HEAD(early_platform_device_list);
 
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index 942b64fc7f1f..9dd721d36783 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -393,7 +393,7 @@ static int vmd_dma_supported(struct device *dev, u64 mask)
return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask);
 }
 
-#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
+#ifdef CONFIG_ARCH_HAS_DMA_GET_REQUIRED_MASK
 static u64 vmd_get_required_mask(struct device *dev)
 {
return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev));
@@ -439,7 +439,7 @@ static void vmd_setup_dma_ops(struct vmd_dev *vmd)
ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device);
ASSIGN_VMD_DMA_OPS(source, dest, mapping_error);
ASSIGN_VMD_DMA_OPS(source, dest, dma_supported);
-#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
+#ifdef CONFIG_ARCH_HAS_DMA_GET_REQUIRED_MASK
ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask);
 #endif
add_dma_domain(domain);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 30fe0c900420..788d7a609dd8 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -130,7 +130,7 @@ struct dma_map_ops {
enum dma_data_direction direction);
int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
int (*dma_supported)(struct device *dev, u64 mask);
-#ifdef 

[PATCH v6 0/8] powerpc/pseries: Machine check handler improvements.

2018-07-04 Thread Mahesh J Salgaonkar
This patch series includes some improvement to Machine check handler
for pSeries. Patch 1 fixes a buffer overrun issue if rtas extended error
log size is greater than RTAS_ERROR_LOG_MAX.
Patch 2 fixes an issue where machine check handler crashes
kernel while accessing vmalloc-ed buffer while in nmi context.
Patch 3 fixes endain bug while restoring of r3 in MCE handler.
Patch 5 implements a real mode mce handler and flushes the SLBs on SLB error.
Patch 6 display's the MCE error details on console.
Patch 7 saves and dumps the SLB contents on SLB MCE errors to improve the
debugability.
Patch 8 consolidates mce early real mode handling code.

Change in V6:
- Introduce patch 8 to consolidate early real mode handling code.
- Address Nick's comment on erroneous hunk.

Change in V5:
- Use min_t instead of max_t.
- Fix an issue reported by kbuild test robot and address review comments.

Change in V4:
- Flush the SLBs in real mode mce handler to handle SLB errors for entry 0.
- Allocate buffers per cpu to hold rtas error log and old slb contents.
- Defer the logging of rtas error log to irq work queue.

Change in V3:
- Moved patch 5 to patch 2

Change in V2:
- patch 3: Display additional info (NIP and task info) in MCE error details.
- patch 5: Fix endain bug while restoring of r3 in MCE handler.


---

Mahesh Salgaonkar (8):
  powerpc/pseries: Avoid using the size greater than RTAS_ERROR_LOG_MAX.
  powerpc/pseries: Defer the logging of rtas error to irq work queue.
  powerpc/pseries: Fix endainness while restoring of r3 in MCE handler.
  powerpc/pseries: Define MCE error event section.
  powerpc/pseries: flush SLB contents on SLB MCE errors.
  powerpc/pseries: Display machine check error details.
  powerpc/pseries: Dump the SLB contents on SLB MCE errors.
  powernv/pseries: consolidate code for mce early handling.


 arch/powerpc/include/asm/book3s/64/mmu-hash.h |8 +
 arch/powerpc/include/asm/machdep.h|1 
 arch/powerpc/include/asm/paca.h   |4 
 arch/powerpc/include/asm/rtas.h   |  116 
 arch/powerpc/kernel/exceptions-64s.S  |   18 +-
 arch/powerpc/kernel/mce.c |   16 +-
 arch/powerpc/mm/slb.c |   63 +++
 arch/powerpc/platforms/pseries/pseries.h  |1 
 arch/powerpc/platforms/pseries/ras.c  |  242 +++--
 arch/powerpc/platforms/pseries/setup.c|   27 +++
 10 files changed, 471 insertions(+), 25 deletions(-)

--
Signature



[PATCH v6 1/8] powerpc/pseries: Avoid using the size greater than RTAS_ERROR_LOG_MAX.

2018-07-04 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar 

The global mce data buffer that used to copy rtas error log is of 2048
(RTAS_ERROR_LOG_MAX) bytes in size. Before the copy we read
extended_log_length from rtas error log header, then use max of
extended_log_length and RTAS_ERROR_LOG_MAX as a size of data to be copied.
Ideally the platform (phyp) will never send extended error log with
size > 2048. But if that happens, then we have a risk of buffer overrun
and corruption. Fix this by using min_t instead.

Fixes: d368514c3097 ("powerpc: Fix corruption when grabbing FWNMI data")
Reported-by: Michal Suchanek 
Signed-off-by: Mahesh Salgaonkar 
---
 arch/powerpc/platforms/pseries/ras.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/ras.c 
b/arch/powerpc/platforms/pseries/ras.c
index 5e1ef9150182..ef104144d4bc 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -371,7 +371,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct 
pt_regs *regs)
int len, error_log_length;
 
error_log_length = 8 + rtas_error_extended_log_length(h);
-   len = max_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
+   len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
memcpy(global_mce_data_buf, h, len);
errhdr = (struct rtas_error_log *)global_mce_data_buf;



[PATCH v6 2/8] powerpc/pseries: Defer the logging of rtas error to irq work queue.

2018-07-04 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar 

rtas_log_buf is a buffer to hold RTAS event data that are communicated
to kernel by hypervisor. This buffer is then used to pass RTAS event
data to user through proc fs. This buffer is allocated from vmalloc
(non-linear mapping) area.

On Machine check interrupt, register r3 points to RTAS extended event
log passed by hypervisor that contains the MCE event. The pseries
machine check handler then logs this error into rtas_log_buf. The
rtas_log_buf is a vmalloc-ed (non-linear) buffer we end up taking up a
page fault (vector 0x300) while accessing it. Since machine check
interrupt handler runs in NMI context we can not afford to take any
page fault. Page faults are not honored in NMI context and causes
kernel panic. Apart from that, as Nick pointed out, pSeries_log_error()
also takes a spin_lock while logging error which is not safe in NMI
context. It may endup in deadlock if we get another MCE before releasing
the lock. Fix this by deferring the logging of rtas error to irq work queue.

Current implementation uses two different buffers to hold rtas error log
depending on whether extended log is provided or not. This makes bit
difficult to identify which buffer has valid data that needs to logged
later in irq work. Simplify this using single buffer, one per paca, and
copy rtas log to it irrespective of whether extended log is provided or
not. Allocate this buffer below RMA region so that it can be accessed
in real mode mce handler.

Fixes: b96672dd840f ("powerpc: Machine check interrupt is a non-maskable 
interrupt")
Cc: sta...@vger.kernel.org
Reviewed-by: Nicholas Piggin 
Signed-off-by: Mahesh Salgaonkar 
---
 arch/powerpc/include/asm/paca.h|3 ++
 arch/powerpc/platforms/pseries/ras.c   |   47 ++--
 arch/powerpc/platforms/pseries/setup.c |   16 +++
 3 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 3f109a3e3edb..b441fef53077 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -251,6 +251,9 @@ struct paca_struct {
void *rfi_flush_fallback_area;
u64 l1d_flush_size;
 #endif
+#ifdef CONFIG_PPC_PSERIES
+   u8 *mce_data_buf;   /* buffer to hold per cpu rtas errlog */
+#endif /* CONFIG_PPC_PSERIES */
 } cacheline_aligned;
 
 extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/platforms/pseries/ras.c 
b/arch/powerpc/platforms/pseries/ras.c
index ef104144d4bc..14a46b07ab2f 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -32,11 +33,13 @@
 static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
 static DEFINE_SPINLOCK(ras_log_buf_lock);
 
-static char global_mce_data_buf[RTAS_ERROR_LOG_MAX];
-static DEFINE_PER_CPU(__u64, mce_data_buf);
-
 static int ras_check_exception_token;
 
+static void mce_process_errlog_event(struct irq_work *work);
+static struct irq_work mce_errlog_process_work = {
+   .func = mce_process_errlog_event,
+};
+
 #define EPOW_SENSOR_TOKEN  9
 #define EPOW_SENSOR_INDEX  0
 
@@ -330,16 +333,20 @@ static irqreturn_t ras_error_interrupt(int irq, void 
*dev_id)
A) >= 0x7000) && ((A) < 0x7ff0)) || \
(((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16
 
+static inline struct rtas_error_log *fwnmi_get_errlog(void)
+{
+   return (struct rtas_error_log *)local_paca->mce_data_buf;
+}
+
 /*
  * Get the error information for errors coming through the
  * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
  * the actual r3 if possible, and a ptr to the error log entry
  * will be returned if found.
  *
- * If the RTAS error is not of the extended type, then we put it in a per
- * cpu 64bit buffer. If it is the extended type we use global_mce_data_buf.
+ * Use one buffer mce_data_buf per cpu to store RTAS error.
  *
- * The global_mce_data_buf does not have any locks or protection around it,
+ * The mce_data_buf does not have any locks or protection around it,
  * if a second machine check comes in, or a system reset is done
  * before we have logged the error, then we will get corruption in the
  * error log.  This is preferable over holding off on calling
@@ -349,7 +356,7 @@ static irqreturn_t ras_error_interrupt(int irq, void 
*dev_id)
 static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 {
unsigned long *savep;
-   struct rtas_error_log *h, *errhdr = NULL;
+   struct rtas_error_log *h;
 
/* Mask top two bits */
regs->gpr[3] &= ~(0x3UL << 62);
@@ -362,22 +369,20 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct 
pt_regs *regs)
savep = __va(regs->gpr[3]);
regs->gpr[3] = savep[0];/* restore original r3 */
 
-   /* If it isn't an extended log we can use the per cpu 64bit buffer */
h = 

[PATCH v6 3/8] powerpc/pseries: Fix endainness while restoring of r3 in MCE handler.

2018-07-04 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar 

During Machine Check interrupt on pseries platform, register r3 points
RTAS extended event log passed by hypervisor. Since hypervisor uses r3
to pass pointer to rtas log, it stores the original r3 value at the
start of the memory (first 8 bytes) pointed by r3. Since hypervisor
stores this info and rtas log is in BE format, linux should make
sure to restore r3 value in correct endian format.

Without this patch when MCE handler, after recovery, returns to code that
that caused the MCE may end up with Data SLB access interrupt for invalid
address followed by kernel panic or hang.

[   62.878965] Severe Machine check interrupt [Recovered]
[   62.878968]   NIP [dca301b8]: init_module+0x1b8/0x338 [bork_kernel]
[   62.878969]   Initiator: CPU
[   62.878970]   Error type: SLB [Multihit]
[   62.878971] Effective address: dca7
cpu 0xa: Vector: 380 (Data SLB Access) at [c000fc7775b0]
pc: c09694c0: vsnprintf+0x80/0x480
lr: c09698e0: vscnprintf+0x20/0x60
sp: c000fc777830
   msr: 82009033
   dar: a803a30c00d0
  current = 0xcbc9ef00
  paca= 0xc0001eca5c00   softe: 3irq_happened: 0x01
pid   = 8860, comm = insmod
[c000fc7778b0] c09698e0 vscnprintf+0x20/0x60
[c000fc7778e0] c016b6c4 vprintk_emit+0xb4/0x4b0
[c000fc777960] c016d40c vprintk_func+0x5c/0xd0
[c000fc777980] c016cbb4 printk+0x38/0x4c
[c000fc7779a0] dca301c0 init_module+0x1c0/0x338 [bork_kernel]
[c000fc777a40] c000d9c4 do_one_initcall+0x54/0x230
[c000fc777b00] c01b3b74 do_init_module+0x8c/0x248
[c000fc777b90] c01b2478 load_module+0x12b8/0x15b0
[c000fc777d30] c01b29e8 sys_finit_module+0xa8/0x110
[c000fc777e30] c000b204 system_call+0x58/0x6c
--- Exception: c00 (System Call) at 7fff8bda0644
SP (7fffdfbfe980) is in userspace

This patch fixes this issue.

Fixes: a08a53ea4c97 ("powerpc/le: Enable RTAS events support")
Cc: sta...@vger.kernel.org
Reviewed-by: Nicholas Piggin 
Signed-off-by: Mahesh Salgaonkar 
---
 arch/powerpc/platforms/pseries/ras.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/ras.c 
b/arch/powerpc/platforms/pseries/ras.c
index 14a46b07ab2f..851ce326874a 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -367,7 +367,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct 
pt_regs *regs)
}
 
savep = __va(regs->gpr[3]);
-   regs->gpr[3] = savep[0];/* restore original r3 */
+   regs->gpr[3] = be64_to_cpu(savep[0]);   /* restore original r3 */
 
h = (struct rtas_error_log *)[1];
/* Use the per cpu buffer from paca to store rtas error log */



[PATCH v6 4/8] powerpc/pseries: Define MCE error event section.

2018-07-04 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar 

On pseries, the machine check error details are part of RTAS extended
event log passed under Machine check exception section. This patch adds
the definition of rtas MCE event section and related helper
functions.

Signed-off-by: Mahesh Salgaonkar 
---
 arch/powerpc/include/asm/rtas.h |  111 +++
 1 file changed, 111 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index ec9dd79398ee..ceeed2dd489b 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -185,6 +185,13 @@ static inline uint8_t rtas_error_disposition(const struct 
rtas_error_log *elog)
return (elog->byte1 & 0x18) >> 3;
 }
 
+static inline
+void rtas_set_disposition_recovered(struct rtas_error_log *elog)
+{
+   elog->byte1 &= ~0x18;
+   elog->byte1 |= (RTAS_DISP_FULLY_RECOVERED << 3);
+}
+
 static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog)
 {
return (elog->byte1 & 0x04) >> 2;
@@ -275,6 +282,7 @@ inline uint32_t rtas_ext_event_company_id(struct 
rtas_ext_event_log_v6 *ext_log)
 #define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H')
 #define PSERIES_ELOG_SECT_ID_USER_DEF  (('U' << 8) | 'D')
 #define PSERIES_ELOG_SECT_ID_HOTPLUG   (('H' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_MCE   (('M' << 8) | 'C')
 
 /* Vendor specific Platform Event Log Format, Version 6, section header */
 struct pseries_errorlog {
@@ -326,6 +334,109 @@ struct pseries_hp_errorlog {
 #define PSERIES_HP_ELOG_ID_DRC_COUNT   3
 #define PSERIES_HP_ELOG_ID_DRC_IC  4
 
+/* RTAS pseries MCE errorlog section */
+#pragma pack(push, 1)
+struct pseries_mc_errorlog {
+   __be32  fru_id;
+   __be32  proc_id;
+   uint8_t error_type;
+   union {
+   struct {
+   uint8_t ue_err_type;
+   /* 
+* X1: Permanent or Transient UE.
+*  X   1: Effective address provided.
+*   X  1: Logical address provided.
+*XX2: Reserved.
+*  XXX 3: Type of UE error.
+*/
+   uint8_t reserved_1[6];
+   __be64  effective_address;
+   __be64  logical_address;
+   } ue_error;
+   struct {
+   uint8_t soft_err_type;
+   /* 
+* X1: Effective address provided.
+*  X   5: Reserved.
+*   XX 2: Type of SLB/ERAT/TLB error.
+*/
+   uint8_t reserved_1[6];
+   __be64  effective_address;
+   uint8_t reserved_2[8];
+   } soft_error;
+   } u;
+};
+#pragma pack(pop)
+
+/* RTAS pseries MCE error types */
+#define PSERIES_MC_ERROR_TYPE_UE   0x00
+#define PSERIES_MC_ERROR_TYPE_SLB  0x01
+#define PSERIES_MC_ERROR_TYPE_ERAT 0x02
+#define PSERIES_MC_ERROR_TYPE_TLB  0x04
+#define PSERIES_MC_ERROR_TYPE_D_CACHE  0x05
+#define PSERIES_MC_ERROR_TYPE_I_CACHE  0x07
+
+/* RTAS pseries MCE error sub types */
+#define PSERIES_MC_ERROR_UE_INDETERMINATE  0
+#define PSERIES_MC_ERROR_UE_IFETCH 1
+#define PSERIES_MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2
+#define PSERIES_MC_ERROR_UE_LOAD_STORE 3
+#define PSERIES_MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+
+#define PSERIES_MC_ERROR_SLB_PARITY0
+#define PSERIES_MC_ERROR_SLB_MULTIHIT  1
+#define PSERIES_MC_ERROR_SLB_INDETERMINATE 2
+
+#define PSERIES_MC_ERROR_ERAT_PARITY   1
+#define PSERIES_MC_ERROR_ERAT_MULTIHIT 2
+#define PSERIES_MC_ERROR_ERAT_INDETERMINATE3
+
+#define PSERIES_MC_ERROR_TLB_PARITY1
+#define PSERIES_MC_ERROR_TLB_MULTIHIT  2
+#define PSERIES_MC_ERROR_TLB_INDETERMINATE 3
+
+static inline uint8_t rtas_mc_error_type(const struct pseries_mc_errorlog 
*mlog)
+{
+   return mlog->error_type;
+}
+
+static inline uint8_t rtas_mc_error_sub_type(
+   const struct pseries_mc_errorlog *mlog)
+{
+   switch (mlog->error_type) {
+   casePSERIES_MC_ERROR_TYPE_UE:
+   return (mlog->u.ue_error.ue_err_type & 0x07);
+   casePSERIES_MC_ERROR_TYPE_SLB:
+   casePSERIES_MC_ERROR_TYPE_ERAT:
+   casePSERIES_MC_ERROR_TYPE_TLB:
+   return (mlog->u.soft_error.soft_err_type & 0x03);
+   default:
+   return 0;
+   }
+}
+
+static inline uint64_t rtas_mc_get_effective_addr(
+   const struct pseries_mc_errorlog *mlog)
+{
+   uint64_t addr = 0;
+
+   switch 

[PATCH v6 5/8] powerpc/pseries: flush SLB contents on SLB MCE errors.

2018-07-04 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar 

On pseries, as of today system crashes if we get a machine check
exceptions due to SLB errors. These are soft errors and can be fixed by
flushing the SLBs so the kernel can continue to function instead of
system crash. We do this in real mode before turning on MMU. Otherwise
we would run into nested machine checks. This patch now fetches the
rtas error log in real mode and flushes the SLBs on SLB errors.

Signed-off-by: Mahesh Salgaonkar 
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |1 
 arch/powerpc/include/asm/machdep.h|1 
 arch/powerpc/kernel/exceptions-64s.S  |   42 +
 arch/powerpc/kernel/mce.c |   16 +++-
 arch/powerpc/mm/slb.c |6 +++
 arch/powerpc/platforms/pseries/pseries.h  |1 
 arch/powerpc/platforms/pseries/ras.c  |   51 +
 arch/powerpc/platforms/pseries/setup.c|1 
 8 files changed, 116 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 50ed64fba4ae..cc00a7088cf3 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -487,6 +487,7 @@ extern void hpte_init_native(void);
 
 extern void slb_initialize(void);
 extern void slb_flush_and_rebolt(void);
+extern void slb_flush_and_rebolt_realmode(void);
 
 extern void slb_vmalloc_update(void);
 extern void slb_set_size(u16 size);
diff --git a/arch/powerpc/include/asm/machdep.h 
b/arch/powerpc/include/asm/machdep.h
index ffe7c71e1132..fe447e0d4140 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -108,6 +108,7 @@ struct machdep_calls {
 
/* Early exception handlers called in realmode */
int (*hmi_exception_early)(struct pt_regs *regs);
+   int (*machine_check_early)(struct pt_regs *regs);
 
/* Called during machine check exception to retrive fixup address. */
bool(*mce_check_early_recovery)(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index f283958129f2..0038596b7906 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -332,6 +332,9 @@ TRAMP_REAL_BEGIN(machine_check_pSeries)
 machine_check_fwnmi:
SET_SCRATCH0(r13)   /* save r13 */
EXCEPTION_PROLOG_0(PACA_EXMC)
+BEGIN_FTR_SECTION
+   b   machine_check_pSeries_early
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
 machine_check_pSeries_0:
EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
/*
@@ -343,6 +346,45 @@ machine_check_pSeries_0:
 
 TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
 
+TRAMP_REAL_BEGIN(machine_check_pSeries_early)
+BEGIN_FTR_SECTION
+   EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
+   mr  r10,r1  /* Save r1 */
+   ld  r1,PACAMCEMERGSP(r13)   /* Use MC emergency stack */
+   subir1,r1,INT_FRAME_SIZE/* alloc stack frame*/
+   mfspr   r11,SPRN_SRR0   /* Save SRR0 */
+   mfspr   r12,SPRN_SRR1   /* Save SRR1 */
+   EXCEPTION_PROLOG_COMMON_1()
+   EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
+   EXCEPTION_PROLOG_COMMON_3(0x200)
+   addir3,r1,STACK_FRAME_OVERHEAD
+   BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
+
+   /* Move original SRR0 and SRR1 into the respective regs */
+   ld  r9,_MSR(r1)
+   mtspr   SPRN_SRR1,r9
+   ld  r3,_NIP(r1)
+   mtspr   SPRN_SRR0,r3
+   ld  r9,_CTR(r1)
+   mtctr   r9
+   ld  r9,_XER(r1)
+   mtxer   r9
+   ld  r9,_LINK(r1)
+   mtlrr9
+   REST_GPR(0, r1)
+   REST_8GPRS(2, r1)
+   REST_GPR(10, r1)
+   ld  r11,_CCR(r1)
+   mtcrr11
+   REST_GPR(11, r1)
+   REST_2GPRS(12, r1)
+   /* restore original r1. */
+   ld  r1,GPR1(r1)
+   SET_SCRATCH0(r13)   /* save r13 */
+   EXCEPTION_PROLOG_0(PACA_EXMC)
+   b   machine_check_pSeries_0
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
+
 EXC_COMMON_BEGIN(machine_check_common)
/*
 * Machine check is different because we use a different
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index efdd16a79075..221271c96a57 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs)
 {
long handled = 0;
 
-   __this_cpu_inc(irq_stat.mce_exceptions);
+   /*
+* For pSeries we count mce when we go into virtual mode machine
+* check handler. Hence skip it. Also, We can't access per cpu
+* variables in real mode for LPAR.
+*/
+   if (early_cpu_has_feature(CPU_FTR_HVMODE))
+   __this_cpu_inc(irq_stat.mce_exceptions);
 
-   if (cur_cpu_spec 

[PATCH v6 6/8] powerpc/pseries: Display machine check error details.

2018-07-04 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar 

Extract the MCE error details from RTAS extended log and display it to
console.

With this patch you should now see mce logs like below:

[  142.371818] Severe Machine check interrupt [Recovered]
[  142.371822]   NIP [dca301b8]: init_module+0x1b8/0x338 [bork_kernel]
[  142.371822]   Initiator: CPU
[  142.371823]   Error type: SLB [Multihit]
[  142.371824] Effective address: dca7

Signed-off-by: Mahesh Salgaonkar 
---
 arch/powerpc/include/asm/rtas.h  |5 +
 arch/powerpc/platforms/pseries/ras.c |  132 ++
 2 files changed, 137 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index ceeed2dd489b..26bc3d5c4992 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -197,6 +197,11 @@ static inline uint8_t rtas_error_extended(const struct 
rtas_error_log *elog)
return (elog->byte1 & 0x04) >> 2;
 }
 
+static inline uint8_t rtas_error_initiator(const struct rtas_error_log *elog)
+{
+   return (elog->byte2 & 0xf0) >> 4;
+}
+
 #define rtas_error_type(x) ((x)->byte3)
 
 static inline
diff --git a/arch/powerpc/platforms/pseries/ras.c 
b/arch/powerpc/platforms/pseries/ras.c
index 9aa7885e0148..252792f238b6 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -427,6 +427,135 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
return 0; /* need to perform reset */
 }
 
+#define VAL_TO_STRING(ar, val) ((val < ARRAY_SIZE(ar)) ? ar[val] : "Unknown")
+
+static void pseries_print_mce_info(struct pt_regs *regs,
+   struct rtas_error_log *errp)
+{
+   const char *level, *sevstr;
+   struct pseries_errorlog *pseries_log;
+   struct pseries_mc_errorlog *mce_log;
+   uint8_t error_type, err_sub_type;
+   uint64_t addr;
+   uint8_t initiator = rtas_error_initiator(errp);
+   int disposition = rtas_error_disposition(errp);
+
+   static const char * const initiators[] = {
+   "Unknown",
+   "CPU",
+   "PCI",
+   "ISA",
+   "Memory",
+   "Power Mgmt",
+   };
+   static const char * const mc_err_types[] = {
+   "UE",
+   "SLB",
+   "ERAT",
+   "TLB",
+   "D-Cache",
+   "Unknown",
+   "I-Cache",
+   };
+   static const char * const mc_ue_types[] = {
+   "Indeterminate",
+   "Instruction fetch",
+   "Page table walk ifetch",
+   "Load/Store",
+   "Page table walk Load/Store",
+   };
+
+   /* SLB sub errors valid values are 0x0, 0x1, 0x2 */
+   static const char * const mc_slb_types[] = {
+   "Parity",
+   "Multihit",
+   "Indeterminate",
+   };
+
+   /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
+   static const char * const mc_soft_types[] = {
+   "Unknown",
+   "Parity",
+   "Multihit",
+   "Indeterminate",
+   };
+
+   if (!rtas_error_extended(errp)) {
+   pr_err("Machine check interrupt: Missing extended error log\n");
+   return;
+   }
+
+   pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+   if (pseries_log == NULL)
+   return;
+
+   mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+
+   error_type = rtas_mc_error_type(mce_log);
+   err_sub_type = rtas_mc_error_sub_type(mce_log);
+
+   switch (rtas_error_severity(errp)) {
+   case RTAS_SEVERITY_NO_ERROR:
+   level = KERN_INFO;
+   sevstr = "Harmless";
+   break;
+   case RTAS_SEVERITY_WARNING:
+   level = KERN_WARNING;
+   sevstr = "";
+   break;
+   case RTAS_SEVERITY_ERROR:
+   case RTAS_SEVERITY_ERROR_SYNC:
+   level = KERN_ERR;
+   sevstr = "Severe";
+   break;
+   case RTAS_SEVERITY_FATAL:
+   default:
+   level = KERN_ERR;
+   sevstr = "Fatal";
+   break;
+   }
+
+   printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
+   disposition == RTAS_DISP_FULLY_RECOVERED ?
+   "Recovered" : "Not recovered");
+   if (user_mode(regs)) {
+   printk("%s  NIP: [%016lx] PID: %d Comm: %s\n", level,
+   regs->nip, current->pid, current->comm);
+   } else {
+   printk("%s  NIP [%016lx]: %pS\n", level, regs->nip,
+   (void *)regs->nip);
+   }
+   printk("%s  Initiator: %s\n", level,
+   VAL_TO_STRING(initiators, initiator));
+
+   switch (error_type) {
+   case PSERIES_MC_ERROR_TYPE_UE:
+   printk("%s  

[PATCH v6 8/8] powernv/pseries: consolidate code for mce early handling.

2018-07-04 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar 

Now that other platforms also implements real mode mce handler,
lets consolidate the code by sharing existing powernv machine check
early code. Rename machine_check_powernv_early to
machine_check_common_early and reuse the code.

Signed-off-by: Mahesh Salgaonkar 
---
 arch/powerpc/kernel/exceptions-64s.S |   56 +++---
 1 file changed, 11 insertions(+), 45 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 0038596b7906..3e877ec55d50 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -243,14 +243,13 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
SET_SCRATCH0(r13)   /* save r13 */
EXCEPTION_PROLOG_0(PACA_EXMC)
 BEGIN_FTR_SECTION
-   b   machine_check_powernv_early
+   b   machine_check_common_early
 FTR_SECTION_ELSE
b   machine_check_pSeries_0
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
 EXC_REAL_END(machine_check, 0x200, 0x100)
 EXC_VIRT_NONE(0x4200, 0x100)
-TRAMP_REAL_BEGIN(machine_check_powernv_early)
-BEGIN_FTR_SECTION
+TRAMP_REAL_BEGIN(machine_check_common_early)
EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
/*
 * Register contents:
@@ -306,7 +305,9 @@ BEGIN_FTR_SECTION
/* Save r9 through r13 from EXMC save area to stack frame. */
EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
mfmsr   r11 /* get MSR value */
+BEGIN_FTR_SECTION
ori r11,r11,MSR_ME  /* turn on ME bit */
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
ori r11,r11,MSR_RI  /* turn on RI bit */
LOAD_HANDLER(r12, machine_check_handle_early)
 1: mtspr   SPRN_SRR0,r12
@@ -325,7 +326,6 @@ BEGIN_FTR_SECTION
andcr11,r11,r10 /* Turn off MSR_ME */
b   1b
b   .   /* prevent speculative execution */
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 
 TRAMP_REAL_BEGIN(machine_check_pSeries)
.globl machine_check_fwnmi
@@ -333,7 +333,7 @@ machine_check_fwnmi:
SET_SCRATCH0(r13)   /* save r13 */
EXCEPTION_PROLOG_0(PACA_EXMC)
 BEGIN_FTR_SECTION
-   b   machine_check_pSeries_early
+   b   machine_check_common_early
 END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
 machine_check_pSeries_0:
EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
@@ -346,45 +346,6 @@ machine_check_pSeries_0:
 
 TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
 
-TRAMP_REAL_BEGIN(machine_check_pSeries_early)
-BEGIN_FTR_SECTION
-   EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
-   mr  r10,r1  /* Save r1 */
-   ld  r1,PACAMCEMERGSP(r13)   /* Use MC emergency stack */
-   subir1,r1,INT_FRAME_SIZE/* alloc stack frame*/
-   mfspr   r11,SPRN_SRR0   /* Save SRR0 */
-   mfspr   r12,SPRN_SRR1   /* Save SRR1 */
-   EXCEPTION_PROLOG_COMMON_1()
-   EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
-   EXCEPTION_PROLOG_COMMON_3(0x200)
-   addir3,r1,STACK_FRAME_OVERHEAD
-   BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
-
-   /* Move original SRR0 and SRR1 into the respective regs */
-   ld  r9,_MSR(r1)
-   mtspr   SPRN_SRR1,r9
-   ld  r3,_NIP(r1)
-   mtspr   SPRN_SRR0,r3
-   ld  r9,_CTR(r1)
-   mtctr   r9
-   ld  r9,_XER(r1)
-   mtxer   r9
-   ld  r9,_LINK(r1)
-   mtlrr9
-   REST_GPR(0, r1)
-   REST_8GPRS(2, r1)
-   REST_GPR(10, r1)
-   ld  r11,_CCR(r1)
-   mtcrr11
-   REST_GPR(11, r1)
-   REST_2GPRS(12, r1)
-   /* restore original r1. */
-   ld  r1,GPR1(r1)
-   SET_SCRATCH0(r13)   /* save r13 */
-   EXCEPTION_PROLOG_0(PACA_EXMC)
-   b   machine_check_pSeries_0
-END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
-
 EXC_COMMON_BEGIN(machine_check_common)
/*
 * Machine check is different because we use a different
@@ -483,6 +444,9 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
bl  machine_check_early
std r3,RESULT(r1)   /* Save result */
ld  r12,_MSR(r1)
+BEGIN_FTR_SECTION
+   bne 9f  /* pSeries: continue to V mode. */
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
 
 #ifdef CONFIG_PPC_P7_NAP
/*
@@ -564,7 +528,9 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 9:
/* Deliver the machine check to host kernel in V mode. */
MACHINE_CHECK_HANDLER_WINDUP
-   b   machine_check_pSeries
+   SET_SCRATCH0(r13)   /* save r13 */
+   EXCEPTION_PROLOG_0(PACA_EXMC)
+   b   machine_check_pSeries_0
 
 EXC_COMMON_BEGIN(unrecover_mce)
/* Invoke machine_check_exception to print MCE event and panic. */



[PATCH v6 7/8] powerpc/pseries: Dump the SLB contents on SLB MCE errors.

2018-07-04 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar 

If we get a machine check exceptions due to SLB errors then dump the
current SLB contents which will be very much helpful in debugging the
root cause of SLB errors. Introduce an exclusive buffer per cpu to hold
faulty SLB entries. In real mode mce handler saves the old SLB contents
into this buffer accessible through paca and print it out later in virtual
mode.

With this patch the console will log SLB contents like below on SLB MCE
errors:

[ 3022.938065] SLB contents of cpu 0x3
[ 3022.938066] 00 c800 400ea1b217000500
[ 3022.938067]   1T  ESID=   c0  VSID=  ea1b217 LLP:100
[ 3022.938068] 01 d800 400d43642f000510
[ 3022.938069]   1T  ESID=   d0  VSID=  d43642f LLP:110
[ 3022.938070] 05 f800 400a86c85f000500
[ 3022.938071]   1T  ESID=   f0  VSID=  a86c85f LLP:100
[ 3022.938072] 06 7f000800 400a628b13000d90
[ 3022.938073]   1T  ESID=   7f  VSID=  a628b13 LLP:110
[ 3022.938074] 07 1800 000b7979f523fd90
[ 3022.938075]  256M ESID=1  VSID=   b7979f523f LLP:110
[ 3022.938076] 08 c800 400ea1b217000510
[ 3022.938076]   1T  ESID=   c0  VSID=  ea1b217 LLP:110
[ 3022.938077] 09 c800 400ea1b217000510
[ 3022.938078]   1T  ESID=   c0  VSID=  ea1b217 LLP:110

Suggested-by: Aneesh Kumar K.V 
Suggested-by: Michael Ellerman 
Signed-off-by: Mahesh Salgaonkar 
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |7 +++
 arch/powerpc/include/asm/paca.h   |1 
 arch/powerpc/mm/slb.c |   57 +
 arch/powerpc/platforms/pseries/ras.c  |   10 
 arch/powerpc/platforms/pseries/setup.c|   10 
 5 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index cc00a7088cf3..5a3fe282076d 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -485,9 +485,16 @@ static inline void hpte_init_pseries(void) { }
 
 extern void hpte_init_native(void);
 
+struct slb_entry {
+   u64 esid;
+   u64 vsid;
+};
+
 extern void slb_initialize(void);
 extern void slb_flush_and_rebolt(void);
 extern void slb_flush_and_rebolt_realmode(void);
+extern void slb_save_contents(struct slb_entry *slb_ptr);
+extern void slb_dump_contents(struct slb_entry *slb_ptr);
 
 extern void slb_vmalloc_update(void);
 extern void slb_set_size(u16 size);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index b441fef53077..653f87c69423 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -253,6 +253,7 @@ struct paca_struct {
 #endif
 #ifdef CONFIG_PPC_PSERIES
u8 *mce_data_buf;   /* buffer to hold per cpu rtas errlog */
+   struct slb_entry *mce_faulty_slbs;
 #endif /* CONFIG_PPC_PSERIES */
 } cacheline_aligned;
 
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 5b1813b98358..476ab0b1d4e8 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -151,6 +151,63 @@ void slb_flush_and_rebolt_realmode(void)
get_paca()->slb_cache_ptr = 0;
 }
 
+void slb_save_contents(struct slb_entry *slb_ptr)
+{
+   int i;
+   unsigned long e, v;
+
+   if (!slb_ptr)
+   return;
+
+   for (i = 0; i < mmu_slb_size; i++) {
+   asm volatile("slbmfee  %0,%1" : "=r" (e) : "r" (i));
+   asm volatile("slbmfev  %0,%1" : "=r" (v) : "r" (i));
+   slb_ptr->esid = e;
+   slb_ptr->vsid = v;
+   slb_ptr++;
+   }
+}
+
+void slb_dump_contents(struct slb_entry *slb_ptr)
+{
+   int i;
+   unsigned long e, v;
+   unsigned long llp;
+
+   if (!slb_ptr)
+   return;
+
+   pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
+
+   for (i = 0; i < mmu_slb_size; i++) {
+   e = slb_ptr->esid;
+   v = slb_ptr->vsid;
+   slb_ptr++;
+
+   if (!e && !v)
+   continue;
+
+   pr_err("%02d %016lx %016lx\n", i, e, v);
+
+   if (!(e & SLB_ESID_V)) {
+   pr_err("\n");
+   continue;
+   }
+   llp = v & SLB_VSID_LLP;
+   if (v & SLB_VSID_B_1T) {
+   pr_err("  1T  ESID=%9lx  VSID=%13lx LLP:%3lx\n",
+   GET_ESID_1T(e),
+   (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T,
+   llp);
+   } else {
+   pr_err(" 256M ESID=%9lx  VSID=%13lx LLP:%3lx\n",
+   GET_ESID(e),
+   (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT,
+   llp);
+   }
+   }
+}
+
 void slb_vmalloc_update(void)
 {
unsigned long 

Re: [PATCH v2 2/2] hwmon: ibmpowernv: Add attributes to enable/disable sensor groups

2018-07-04 Thread Shilpasri G Bhat
Hi Guenter,

Thanks for reviewing the patch.
On 07/04/2018 08:16 PM, Guenter Roeck wrote:
>> +/* Disable if last sensor in the group */
>> +send_command = true;
>> +for (i = 0; i < sg->nr_sensor; i++) {
>> +struct sensor_data *sd = sg->sensors[i];
>> +
>> +if (sd->enable) {
>> +send_command = false;
>> +break;
>> +}
> 
> This is weird. So there are situations where a request to disable
> a sensor is accepted, but effectively ignored ? Shouldn't that
> return, say, -EBUSY ?

This is because we do not support per-sensor enable/disable. We can only
enable/disable at a sensor-group level.

This patch follows the semantic to disable a sensor group iff all the sensors
belonging to that group have been disabled. Otherwise the sensor alone is marked
to be disabled and returns -ENODATA on reading it.

And a sensor group will be enabled if any of the sensor in that group is 
enabled.

I will make changes to the remaining code according to your suggestion.

Thanks and Regards,
Shilpa



Re: [PATCH v6 2/4] resource: Use list_head to link sibling resource

2018-07-04 Thread kbuild test robot
Hi Baoquan,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.18-rc3 next-20180704]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Baoquan-He/resource-Use-list_head-to-link-sibling-resource/20180704-121402
config: ia64-allnoconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 8.1.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=8.1.0 make.cross ARCH=ia64 

All errors (new ones prefixed by >>):

   arch/ia64/sn/kernel/io_init.c: In function 'sn_io_slot_fixup':
>> arch/ia64/sn/kernel/io_init.c:195:19: error: invalid operands to binary && 
>> (have 'int' and 'struct list_head')
  if (res->parent && res->parent->child)
  ~~~ ^~ ~~

vim +195 arch/ia64/sn/kernel/io_init.c

^1da177e Linus Torvalds  2005-04-16  142  
3ec829b6 John Keller 2005-11-29  143  /*
6f09a925 John Keller 2007-01-30  144   * sn_io_slot_fixup() -   We are not 
running with an ACPI capable PROM,
8ea6091f John Keller 2006-10-04  145   *  and need to 
convert the pci_dev->resource
8ea6091f John Keller 2006-10-04  146   *  'start' and 
'end' addresses to mapped addresses,
8ea6091f John Keller 2006-10-04  147   *  and setup the 
pci_controller->window array entries.
^1da177e Linus Torvalds  2005-04-16  148   */
8ea6091f John Keller 2006-10-04  149  void
6f09a925 John Keller 2007-01-30  150  sn_io_slot_fixup(struct pci_dev *dev)
^1da177e Linus Torvalds  2005-04-16  151  {
^1da177e Linus Torvalds  2005-04-16  152int idx;
ab97b8cc Bjorn Helgaas   2016-03-02  153struct resource *res;
18c25526 Matt Fleming2016-05-04  154unsigned long size;
6f09a925 John Keller 2007-01-30  155struct pcidev_info *pcidev_info;
6f09a925 John Keller 2007-01-30  156struct sn_irq_info *sn_irq_info;
6f09a925 John Keller 2007-01-30  157int status;
6f09a925 John Keller 2007-01-30  158  
6f09a925 John Keller 2007-01-30  159pcidev_info = 
kzalloc(sizeof(struct pcidev_info), GFP_KERNEL);
6f09a925 John Keller 2007-01-30  160if (!pcidev_info)
d4ed8084 Harvey Harrison 2008-03-04  161panic("%s: Unable to 
alloc memory for pcidev_info", __func__);
6f09a925 John Keller 2007-01-30  162  
6f09a925 John Keller 2007-01-30  163sn_irq_info = 
kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
6f09a925 John Keller 2007-01-30  164if (!sn_irq_info)
d4ed8084 Harvey Harrison 2008-03-04  165panic("%s: Unable to 
alloc memory for sn_irq_info", __func__);
6f09a925 John Keller 2007-01-30  166  
6f09a925 John Keller 2007-01-30  167/* Call to retrieve pci device 
information needed by kernel. */
6f09a925 John Keller 2007-01-30  168status = 
sal_get_pcidev_info((u64) pci_domain_nr(dev),
6f09a925 John Keller 2007-01-30  169(u64) dev->bus->number,
6f09a925 John Keller 2007-01-30  170dev->devfn,
6f09a925 John Keller 2007-01-30  171(u64) __pa(pcidev_info),
6f09a925 John Keller 2007-01-30  172(u64) 
__pa(sn_irq_info));
6f09a925 John Keller 2007-01-30  173  
80a03e29 Stoyan Gaydarov 2009-03-10  174BUG_ON(status); /* Cannot get 
platform pci device information */
6f09a925 John Keller 2007-01-30  175  
3ec829b6 John Keller 2005-11-29  176  
^1da177e Linus Torvalds  2005-04-16  177/* Copy over PIO Mapped 
Addresses */
^1da177e Linus Torvalds  2005-04-16  178for (idx = 0; idx <= 
PCI_ROM_RESOURCE; idx++) {
ab97b8cc Bjorn Helgaas   2016-03-02  179if 
(!pcidev_info->pdi_pio_mapped_addr[idx])
^1da177e Linus Torvalds  2005-04-16  180continue;
^1da177e Linus Torvalds  2005-04-16  181  
ab97b8cc Bjorn Helgaas   2016-03-02  182res = 
>resource[idx];
ab97b8cc Bjorn Helgaas   2016-03-02  183  
ab97b8cc Bjorn Helgaas   2016-03-02  184size = res->end - 
res->start;
ab97b8cc Bjorn Helgaas   2016-03-02  185if (size == 0)
3ec829b6 John Keller 2005-11-29  186continue;
ab97b8cc Bjorn Helgaas   2016-03-02  187  
240504ad Bjorn Helgaas   2016-03-02  188res->start = 
pcidev_info->pdi_pio_mapped_addr[idx];
18c25526 Matt Fleming2016-05-04  189res->end = res->start + 
size;
64715725 Bernhard Walle  2007-03-18  190  
64715725 Bernhard Walle  2007-03-18  191/*
64715725 Bernhard Walle  2007-03-18  192

Re: [v2 PATCH 2/2] powerpc: Enable CPU_FTR_ASYM_SMT for interleaved big-cores

2018-07-04 Thread Murilo Opsfelder Araujo
On Wed, Jul 04, 2018 at 01:45:05PM +0530, Gautham R Shenoy wrote:
> Hi Murilo,
> 
> Thanks for the review.
> 
> On Tue, Jul 03, 2018 at 02:53:46PM -0300, Murilo Opsfelder Araujo wrote:
> [..snip..]
> 
> > > -/* Initialize CPU <=> thread mapping/
> > > + if (has_interleaved_big_core) {
> > > + int key = __builtin_ctzl(CPU_FTR_ASYM_SMT);
> > > +
> > > + cur_cpu_spec->cpu_features |= CPU_FTR_ASYM_SMT;
> > > + static_branch_enable(_feature_keys[key]);
> > > + pr_info("Detected interleaved big-cores\n");
> > > + }
> > 
> > Shouldn't we use cpu_has_feature(CPU_FTR_ASYM_SMT) before setting
> > > it?
> 
> 
> Are you suggesting that we do the following?
> 
> if (has_interleaved_big_core &&
> !cpu_has_feature(CPU_FTR_ASYM_SMT)) {
>   ...
> }
> 
> Currently CPU_FTR_ASYM_SMT is set at compile time for only POWER7
> where running the tasks on lower numbered threads give us the benefit
> of SMT thread folding. Interleaved big core is a feature introduced
> only on POWER9. Thus, we know that CPU_FTR_ASYM_SMT is not set in
> cpu_features at this point.

Since we're setting CPU_FTR_ASYM_SMT, it doesn't make sense to use
cpu_has_feature(CPU_FTR_ASYM_SMT).  I thought cpu_has_feature() held all
available features (not necessarily enabled) that we could check before
setting or enabling such feature.  I think I misread it.  Sorry.

> 
> > 
> > > +
> > > + /* Initialize CPU <=> thread mapping/
> > >*
> > >* WARNING: We assume that the number of threads is the same for
> > >* every CPU in the system. If that is not the case, then some code
> > > -- 
> > > 1.9.4
> > > 
> > 
> > -- 
> > Murilo
> 
> --
> Thanks and Regards
> gautham.
> 

-- 
Murilo



Re: [PATCH v6 1/4] resource: Move reparent_resources() to kernel/resource.c and make it public

2018-07-04 Thread Andy Shevchenko
On Wed, Jul 4, 2018 at 7:10 AM, Baoquan He  wrote:
> reparent_resources() is duplicated in arch/microblaze/pci/pci-common.c
> and arch/powerpc/kernel/pci-common.c, so move it to kernel/resource.c
> so that it's shared.

With couple of comments below,

Reviewed-by: Andy Shevchenko 

P.S. In some commit message in this series you used 'likt' instead of 'like'.

>
> Signed-off-by: Baoquan He 
> ---
>  arch/microblaze/pci/pci-common.c | 37 -
>  arch/powerpc/kernel/pci-common.c | 35 ---
>  include/linux/ioport.h   |  1 +
>  kernel/resource.c| 39 +++
>  4 files changed, 40 insertions(+), 72 deletions(-)
>
> diff --git a/arch/microblaze/pci/pci-common.c 
> b/arch/microblaze/pci/pci-common.c
> index f34346d56095..7899bafab064 100644
> --- a/arch/microblaze/pci/pci-common.c
> +++ b/arch/microblaze/pci/pci-common.c
> @@ -619,43 +619,6 @@ int pcibios_add_device(struct pci_dev *dev)
>  EXPORT_SYMBOL(pcibios_add_device);
>
>  /*
> - * Reparent resource children of pr that conflict with res
> - * under res, and make res replace those children.
> - */
> -static int __init reparent_resources(struct resource *parent,
> -struct resource *res)
> -{
> -   struct resource *p, **pp;
> -   struct resource **firstpp = NULL;
> -
> -   for (pp = >child; (p = *pp) != NULL; pp = >sibling) {
> -   if (p->end < res->start)
> -   continue;
> -   if (res->end < p->start)
> -   break;
> -   if (p->start < res->start || p->end > res->end)
> -   return -1;  /* not completely contained */
> -   if (firstpp == NULL)
> -   firstpp = pp;
> -   }
> -   if (firstpp == NULL)
> -   return -1;  /* didn't find any conflicting entries? */
> -   res->parent = parent;
> -   res->child = *firstpp;
> -   res->sibling = *pp;
> -   *firstpp = res;
> -   *pp = NULL;
> -   for (p = res->child; p != NULL; p = p->sibling) {
> -   p->parent = res;
> -   pr_debug("PCI: Reparented %s [%llx..%llx] under %s\n",
> -p->name,
> -(unsigned long long)p->start,
> -(unsigned long long)p->end, res->name);
> -   }
> -   return 0;
> -}
> -
> -/*
>   *  Handle resources of PCI devices.  If the world were perfect, we could
>   *  just allocate all the resource regions and do nothing more.  It isn't.
>   *  On the other hand, we cannot just re-allocate all devices, as it would
> diff --git a/arch/powerpc/kernel/pci-common.c 
> b/arch/powerpc/kernel/pci-common.c
> index fe9733aa..926035bb378d 100644
> --- a/arch/powerpc/kernel/pci-common.c
> +++ b/arch/powerpc/kernel/pci-common.c
> @@ -1088,41 +1088,6 @@ resource_size_t pcibios_align_resource(void *data, 
> const struct resource *res,
>  EXPORT_SYMBOL(pcibios_align_resource);
>
>  /*
> - * Reparent resource children of pr that conflict with res
> - * under res, and make res replace those children.
> - */
> -static int reparent_resources(struct resource *parent,
> -struct resource *res)
> -{
> -   struct resource *p, **pp;
> -   struct resource **firstpp = NULL;
> -
> -   for (pp = >child; (p = *pp) != NULL; pp = >sibling) {
> -   if (p->end < res->start)
> -   continue;
> -   if (res->end < p->start)
> -   break;
> -   if (p->start < res->start || p->end > res->end)
> -   return -1;  /* not completely contained */
> -   if (firstpp == NULL)
> -   firstpp = pp;
> -   }
> -   if (firstpp == NULL)
> -   return -1;  /* didn't find any conflicting entries? */
> -   res->parent = parent;
> -   res->child = *firstpp;
> -   res->sibling = *pp;
> -   *firstpp = res;
> -   *pp = NULL;
> -   for (p = res->child; p != NULL; p = p->sibling) {
> -   p->parent = res;
> -   pr_debug("PCI: Reparented %s %pR under %s\n",
> -p->name, p, res->name);
> -   }
> -   return 0;
> -}
> -
> -/*
>   *  Handle resources of PCI devices.  If the world were perfect, we could
>   *  just allocate all the resource regions and do nothing more.  It isn't.
>   *  On the other hand, we cannot just re-allocate all devices, as it would
> diff --git a/include/linux/ioport.h b/include/linux/ioport.h
> index da0ebaec25f0..dfdcd0bfe54e 100644
> --- a/include/linux/ioport.h
> +++ b/include/linux/ioport.h
> @@ -192,6 +192,7 @@ extern int allocate_resource(struct resource *root, 
> struct resource *new,
>  struct resource *lookup_resource(struct resource *root, resource_size_t 
> start);
>  int adjust_resource(struct resource *res, 

Re: [PATCH v6 2/4] resource: Use list_head to link sibling resource

2018-07-04 Thread kbuild test robot
Hi Baoquan,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.18-rc3 next-20180704]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Baoquan-He/resource-Use-list_head-to-link-sibling-resource/20180704-121402
config: mips-rb532_defconfig (attached as .config)
compiler: mipsel-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.2.0 make.cross ARCH=mips 

All error/warnings (new ones prefixed by >>):

>> arch/mips/pci/pci-rc32434.c:57:11: error: initialization from incompatible 
>> pointer type [-Werror=incompatible-pointer-types]
 .child = _res_pci_mem2
  ^
   arch/mips/pci/pci-rc32434.c:57:11: note: (near initialization for 
'rc32434_res_pci_mem1.child.next')
>> arch/mips/pci/pci-rc32434.c:51:47: warning: missing braces around 
>> initializer [-Wmissing-braces]
static struct resource rc32434_res_pci_mem1 = {
  ^
   arch/mips/pci/pci-rc32434.c:60:47: warning: missing braces around 
initializer [-Wmissing-braces]
static struct resource rc32434_res_pci_mem2 = {
  ^
   cc1: some warnings being treated as errors

vim +57 arch/mips/pci/pci-rc32434.c

73b4390f Ralf Baechle 2008-07-16  50  
73b4390f Ralf Baechle 2008-07-16 @51  static struct resource 
rc32434_res_pci_mem1 = {
73b4390f Ralf Baechle 2008-07-16  52.name = "PCI MEM1",
73b4390f Ralf Baechle 2008-07-16  53.start = 0x5000,
73b4390f Ralf Baechle 2008-07-16  54.end = 0x5FFF,
73b4390f Ralf Baechle 2008-07-16  55.flags = IORESOURCE_MEM,
73b4390f Ralf Baechle 2008-07-16  56.sibling = NULL,
73b4390f Ralf Baechle 2008-07-16 @57.child = _res_pci_mem2
73b4390f Ralf Baechle 2008-07-16  58  };
73b4390f Ralf Baechle 2008-07-16  59  

:: The code at line 57 was first introduced by commit
:: 73b4390fb23456964201abda79f1210fe337d01a [MIPS] Routerboard 532: Support 
for base system

:: TO: Ralf Baechle 
:: CC: Ralf Baechle 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCHv3 2/4] drivers/base: utilize device tree info to shutdown devices

2018-07-04 Thread kbuild test robot
Hi Pingfan,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on driver-core/driver-core-testing]
[also build test WARNING on v4.18-rc3 next-20180704]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Pingfan-Liu/drivers-base-bugfix-for-supplier-consumer-ordering-in-device_kset/20180703-184317
reproduce: make htmldocs

All warnings (new ones prefixed by >>):

   WARNING: convert(1) not found, for SVG to PDF conversion install ImageMagick 
(https://www.imagemagick.org)
   mm/mempool.c:228: warning: Function parameter or member 'pool' not described 
in 'mempool_init'
   include/net/cfg80211.h:4279: warning: Function parameter or member 
'wext.ibss' not described in 'wireless_dev'
   include/net/cfg80211.h:4279: warning: Function parameter or member 
'wext.connect' not described in 'wireless_dev'
   include/net/cfg80211.h:4279: warning: Function parameter or member 
'wext.keys' not described in 'wireless_dev'
   include/net/cfg80211.h:4279: warning: Function parameter or member 'wext.ie' 
not described in 'wireless_dev'
   include/net/cfg80211.h:4279: warning: Function parameter or member 
'wext.ie_len' not described in 'wireless_dev'
   include/net/cfg80211.h:4279: warning: Function parameter or member 
'wext.bssid' not described in 'wireless_dev'
   include/net/cfg80211.h:4279: warning: Function parameter or member 
'wext.ssid' not described in 'wireless_dev'
   include/net/cfg80211.h:4279: warning: Function parameter or member 
'wext.default_key' not described in 'wireless_dev'
   include/net/cfg80211.h:4279: warning: Function parameter or member 
'wext.default_mgmt_key' not described in 'wireless_dev'
   include/net/cfg80211.h:4279: warning: Function parameter or member 
'wext.prev_bssid_valid' not described in 'wireless_dev'
   include/net/mac80211.h:2282: warning: Function parameter or member 
'radiotap_timestamp.units_pos' not described in 'ieee80211_hw'
   include/net/mac80211.h:2282: warning: Function parameter or member 
'radiotap_timestamp.accuracy' not described in 'ieee80211_hw'
   include/net/mac80211.h:955: warning: Function parameter or member 
'control.rates' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'control.rts_cts_rate_idx' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'control.use_rts' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'control.use_cts_prot' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'control.short_preamble' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'control.skip_table' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'control.jiffies' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'control.vif' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'control.hw_key' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'control.flags' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'control.enqueue_time' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 'ack' not 
described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'ack.cookie' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'status.rates' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'status.ack_signal' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'status.ampdu_ack_len' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'status.ampdu_len' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'status.antenna' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'status.tx_time' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'status.is_valid_ack_signal' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'status.status_driver_data' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 
'driver_rates' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function par

Re: [PATCHv5 4/4] arm64: Add build salt to the vDSO

2018-07-04 Thread Will Deacon
On Tue, Jul 03, 2018 at 04:34:30PM -0700, Laura Abbott wrote:
> 
> The vDSO needs to have a unique build id in a similar manner
> to the kernel and modules. Use the build salt macro.
> 
> Signed-off-by: Laura Abbott 
> ---
> v5: I was previously focused on x86 only but since powerpc gave a patch,
> I figured I would do arm64 since the changes were also fairly simple.
> ---
>  arch/arm64/kernel/vdso/note.S | 3 +++
>  1 file changed, 3 insertions(+)

If you drop the trailing semicolon, then:

Acked-by: Will Deacon 

Will

> diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S
> index b82c85e5d972..2c429dfd3f45 100644
> --- a/arch/arm64/kernel/vdso/note.S
> +++ b/arch/arm64/kernel/vdso/note.S
> @@ -22,7 +22,10 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  ELFNOTE_START(Linux, 0, "a")
>   .long LINUX_VERSION_CODE
>  ELFNOTE_END
> +
> +BUILD_SALT;
> -- 
> 2.17.1
> 


Re: [alsa-devel] [PATCH] ASoC: fsl_spdif: Use 64-bit arithmetic instead of 32-bit

2018-07-04 Thread Nicolin Chen
On Wed, Jul 04, 2018 at 09:18:33AM -0500, Gustavo A. R. Silva wrote:
> Add suffix ULL to constant 64 in order to give the compiler complete
> information about the proper arithmetic to use.
> 
> Notice that such constant is used in a context that expects an
> expression of type u64 (64 bits, unsigned) and the following
> expression is currently being evaluated using 32-bit arithmetic:
> 
> rate[index] * txclk_df * 64
> 
> Addresses-Coverity-ID: 1222129 ("Unintentional integer overflow")
> Signed-off-by: Gustavo A. R. Silva 

Acked-by: Nicolin Chen 

Thanks.

> ---
>  sound/soc/fsl/fsl_spdif.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c
> index 9b59d87..740b90d 100644
> --- a/sound/soc/fsl/fsl_spdif.c
> +++ b/sound/soc/fsl/fsl_spdif.c
> @@ -1118,7 +1118,7 @@ static u32 fsl_spdif_txclk_caldiv(struct fsl_spdif_priv 
> *spdif_priv,
>  
>   for (sysclk_df = sysclk_dfmin; sysclk_df <= sysclk_dfmax; sysclk_df++) {
>   for (txclk_df = 1; txclk_df <= 128; txclk_df++) {
> - rate_ideal = rate[index] * txclk_df * 64;
> + rate_ideal = rate[index] * txclk_df * 64ULL;
>   if (round)
>   rate_actual = clk_round_rate(clk, rate_ideal);
>   else
> -- 
> 2.7.4
> 
> ___
> Alsa-devel mailing list
> alsa-de...@alsa-project.org
> http://mailman.alsa-project.org/mailman/listinfo/alsa-devel