[PATCH v2] powerpc/slb: Define an enum for the bolted indexes

2015-08-13 Thread Michael Ellerman
From: Anshuman Khandual khand...@linux.vnet.ibm.com

This patch defines macros for the three bolted SLB indexes we use.
Switch the functions that take the indexes as an argument to use the
enum.

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
Signed-off-by: Michael Ellerman m...@ellerman.id.au
---

v2: Use index rather than slot as that's what the ISA docs call it.
Use the enum in the function signatures.

 arch/powerpc/mm/slb.c | 47 ++-
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 8a32a2be3c53..0c7115fd314b 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -25,6 +25,11 @@
 #include asm/udbg.h
 #include asm/code-patching.h
 
+enum slb_index {
+   LINEAR_INDEX= 0, /* Kernel linear map  (0xc000) */
+   VMALLOC_INDEX   = 1, /* Kernel virtual map (0xd000) */
+   KSTACK_INDEX= 2, /* Kernel stack map */
+};
 
 extern void slb_allocate_realmode(unsigned long ea);
 extern void slb_allocate_user(unsigned long ea);
@@ -41,9 +46,9 @@ static void slb_allocate(unsigned long ea)
(((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
 
 static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
-unsigned long entry)
+enum slb_index index)
 {
-   return (ea  slb_esid_mask(ssize)) | SLB_ESID_V | entry;
+   return (ea  slb_esid_mask(ssize)) | SLB_ESID_V | index;
 }
 
 static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
@@ -55,39 +60,39 @@ static inline unsigned long mk_vsid_data(unsigned long ea, 
int ssize,
 
 static inline void slb_shadow_update(unsigned long ea, int ssize,
 unsigned long flags,
-unsigned long entry)
+enum slb_index index)
 {
/*
 * Clear the ESID first so the entry is not valid while we are
 * updating it.  No write barriers are needed here, provided
 * we only update the current CPU's SLB shadow buffer.
 */
-   get_slb_shadow()-save_area[entry].esid = 0;
-   get_slb_shadow()-save_area[entry].vsid =
+   get_slb_shadow()-save_area[index].esid = 0;
+   get_slb_shadow()-save_area[index].vsid =
cpu_to_be64(mk_vsid_data(ea, ssize, flags));
-   get_slb_shadow()-save_area[entry].esid =
-   cpu_to_be64(mk_esid_data(ea, ssize, entry));
+   get_slb_shadow()-save_area[index].esid =
+   cpu_to_be64(mk_esid_data(ea, ssize, index));
 }
 
-static inline void slb_shadow_clear(unsigned long entry)
+static inline void slb_shadow_clear(enum slb_index index)
 {
-   get_slb_shadow()-save_area[entry].esid = 0;
+   get_slb_shadow()-save_area[index].esid = 0;
 }
 
 static inline void create_shadowed_slbe(unsigned long ea, int ssize,
unsigned long flags,
-   unsigned long entry)
+   enum slb_index index)
 {
/*
 * Updating the shadow buffer before writing the SLB ensures
 * we don't get a stale entry here if we get preempted by PHYP
 * between these two statements.
 */
-   slb_shadow_update(ea, ssize, flags, entry);
+   slb_shadow_update(ea, ssize, flags, index);
 
asm volatile(slbmte  %0,%1 :
 : r (mk_vsid_data(ea, ssize, flags)),
-  r (mk_esid_data(ea, ssize, entry))
+  r (mk_esid_data(ea, ssize, index))
 : memory );
 }
 
@@ -103,16 +108,16 @@ static void __slb_flush_and_rebolt(void)
lflags = SLB_VSID_KERNEL | linear_llp;
vflags = SLB_VSID_KERNEL | vmalloc_llp;
 
-   ksp_esid_data = mk_esid_data(get_paca()-kstack, mmu_kernel_ssize, 2);
+   ksp_esid_data = mk_esid_data(get_paca()-kstack, mmu_kernel_ssize, 
KSTACK_INDEX);
if ((ksp_esid_data  ~0xfffUL) = PAGE_OFFSET) {
ksp_esid_data = ~SLB_ESID_V;
ksp_vsid_data = 0;
-   slb_shadow_clear(2);
+   slb_shadow_clear(KSTACK_INDEX);
} else {
/* Update stack entry; others don't change */
-   slb_shadow_update(get_paca()-kstack, mmu_kernel_ssize, lflags, 
2);
+   slb_shadow_update(get_paca()-kstack, mmu_kernel_ssize, lflags, 
KSTACK_INDEX);
ksp_vsid_data =
-   be64_to_cpu(get_slb_shadow()-save_area[2].vsid);
+   
be64_to_cpu(get_slb_shadow()-save_area[KSTACK_INDEX].vsid);
}
 
/* We need to do this all in asm, so we're sure we don't touch
@@ -151,7 +156,7 @@ void slb_vmalloc_update(void)
unsigned long vflags;
 
vflags = SLB_VSID_KERNEL | 

Re: [PATCH 02/20] powerpc/8xx: Map linear kernel RAM with 8M pages

2015-08-13 Thread Segher Boessenkool
On Wed, Aug 12, 2015 at 03:40:56PM +0200, Christophe Leroy wrote:
   /* Insert level 1 index */
   rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2)  1), (PAGE_SHIFT - 2)  1, 
 29
   lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the 
 level 1 entry */
 + mtcrr11

Maybe mtcrf is faster?  You only want one field, anyhow.

 + bgt-cr7,5f  /* CR7.GT = bit 29 = Large page (8M or 512K) */

You can write this as   bt- 29,5f   which should be easier to read.

   /* Insert level 1 index */
  3:   rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2)  1), (PAGE_SHIFT - 2)  1, 
 29
   lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the 
 level 1 entry */
 + mtcrr11
 + bgt cr7,200f

Same here...  Probably good to comment it, too.


Segher
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/slb: Use a local to avoid multiple calls to get_slb_shadow()

2015-08-13 Thread Michael Ellerman
For no reason other than it looks ugly.

Signed-off-by: Michael Ellerman m...@ellerman.id.au
---
 arch/powerpc/mm/slb.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 0c7115fd314b..515730e499fe 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -62,16 +62,16 @@ static inline void slb_shadow_update(unsigned long ea, int 
ssize,
 unsigned long flags,
 enum slb_index index)
 {
+   struct slb_shadow *p = get_slb_shadow();
+
/*
 * Clear the ESID first so the entry is not valid while we are
 * updating it.  No write barriers are needed here, provided
 * we only update the current CPU's SLB shadow buffer.
 */
-   get_slb_shadow()-save_area[index].esid = 0;
-   get_slb_shadow()-save_area[index].vsid =
-   cpu_to_be64(mk_vsid_data(ea, ssize, flags));
-   get_slb_shadow()-save_area[index].esid =
-   cpu_to_be64(mk_esid_data(ea, ssize, index));
+   p-save_area[index].esid = 0;
+   p-save_area[index].vsid = cpu_to_be64(mk_vsid_data(ea, ssize, flags));
+   p-save_area[index].esid = cpu_to_be64(mk_esid_data(ea, ssize, index));
 }
 
 static inline void slb_shadow_clear(enum slb_index index)
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/3] powerpc/e6500: hw tablewalk: optimize a bit for tcd lock acquiring codes

2015-08-13 Thread Kevin Hao
It makes no sense to put the instructions for calculating the lock
value (cpu number + 1) and the clearing of eq bit of cr1 in lbarx/stbcx
loop. And when the lock is acquired by the other thread, the current
lock value has no chance to equal with the lock value used by current
cpu. So we can skip the comparing for these two lock values in the
lbz/bne loop.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
 arch/powerpc/mm/tlb_low_64e.S | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index 765b419883f2..e4185581c5a7 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -308,11 +308,11 @@ BEGIN_FTR_SECTION /* CPU_FTR_SMT */
 *
 * MAS6:IND should be already set based on MAS4
 */
-1: lbarx   r15,0,r11
lhz r10,PACAPACAINDEX(r13)
-   cmpdi   r15,0
-   cmpdi   cr1,r15,1   /* set cr1.eq = 0 for non-recursive */
addir10,r10,1
+   crclr   cr1*4+eq/* set cr1.eq = 0 for non-recursive */
+1: lbarx   r15,0,r11
+   cmpdi   r15,0
bne 2f
stbcx.  r10,0,r11
bne 1b
@@ -320,9 +320,9 @@ BEGIN_FTR_SECTION   /* CPU_FTR_SMT */
.subsection 1
 2: cmpdcr1,r15,r10 /* recursive lock due to mcheck/crit/etc? */
beq cr1,3b  /* unlock will happen if cr1.eq = 0 */
-   lbz r15,0(r11)
+10:lbz r15,0(r11)
cmpdi   r15,0
-   bne 2b
+   bne 10b
b   1b
.previous
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/3] powerpc/e6500: hw tablewalk: order the memory access when acquire/release tcd lock

2015-08-13 Thread Kevin Hao
I didn't find anything unusual. But I think we do need to order the
load/store of esel_next when acquire/release tcd lock. For acquire,
add a data dependency to order the loads of lock and esel_next.
For release, even there already have a isync here, but it doesn't
guarantee any memory access order. So we still need lwsync for
the two stores for lock and esel_next.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
 arch/powerpc/mm/tlb_low_64e.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index e4185581c5a7..964754911987 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -334,6 +334,8 @@ BEGIN_FTR_SECTION   /* CPU_FTR_SMT */
 * with tlbilx before overwriting.
 */
 
+   andir15,r15,0   /* add a data dependency to order the loards */
+   add r11,r11,r15 /* between the lock and esel_next */
lbz r15,TCD_ESEL_NEXT(r11)
rlwinm  r10,r15,16,0xff
orisr10,r10,MAS0_TLBSEL(1)@h
@@ -447,6 +449,7 @@ BEGIN_FTR_SECTION
beq cr1,1f  /* no unlock if lock was recursively grabbed */
li  r15,0
isync
+   lwsync
stb r15,0(r11)
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_SMT)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/3] powerpc/e6500: remove the stale TCD_LOCK macro

2015-08-13 Thread Kevin Hao
Since we moved the lock to be the first element of
struct tlb_core_data in commit 82d86de25b9c (powerpc/e6500: Make TLB
lock recursive), this macro is not used by any code. Just delete it.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
 arch/powerpc/kernel/asm-offsets.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 98230579d99c..810f433731dc 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -213,7 +213,6 @@ int main(void)
offsetof(struct tlb_core_data, esel_max));
DEFINE(TCD_ESEL_FIRST,
offsetof(struct tlb_core_data, esel_first));
-   DEFINE(TCD_LOCK, offsetof(struct tlb_core_data, lock));
 #endif /* CONFIG_PPC_BOOK3E */
 
 #ifdef CONFIG_PPC_STD_MMU_64
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 4/6] powerpc/powernv: replace the hard coded boundary with gate

2015-08-13 Thread Wei Yang
At the moment 64bit-prefetchable window can be maximum 64GB, which is
currently got from device tree. This means that in shared mode the maximum
supported VF BAR size is 64GB/256=256MB. While this size could exhaust the
whole 64bit-prefetchable window. This is a design decision to set a
boundary to 64MB of the VF BAR size. Since VF BAR size with 64MB would
occupy a quarter of the 64bit-prefetchable window, this is affordable.

This patch replaces magic limit of 64MB with (m64_segsize  1) and adds
comment to explain the reason for it.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   22 +-
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 4da0f50..3e8c0b4 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2688,7 +2688,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
struct pnv_phb *phb;
struct resource *res;
int i;
-   resource_size_t size;
+   resource_size_t size, gate;
struct pci_dn *pdn;
int mul, total_vfs;
 
@@ -2704,6 +2704,17 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
 
total_vfs = pci_sriov_get_totalvfs(pdev);
mul = phb-ioda.total_pe;
+   /*
+* If bigger than or equal to half of M64 segment size, just round up
+* power of two.
+*
+* Generally, one M64 BAR maps one IOV BAR. To avoid conflict with
+* other devices, IOV BAR size is expanded to be (total_pe *
+* VF_BAR_size).  When VF_BAR_size is half of M64 segment size , the
+* expanded size would equal to half of the whole M64 Space size,
+* which will exhaust the M64 Space and limit the system flexibility.
+*/
+   gate = phb-ioda.m64_segsize  1;
 
for (i = 0; i  PCI_SRIOV_NUM_BARS; i++) {
res = pdev-resource[i + PCI_IOV_RESOURCES];
@@ -2718,10 +2729,11 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
 
size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
 
-   /* bigger than 64M */
-   if (size  (1  26)) {
-   dev_info(pdev-dev, PowerNV: VF BAR%d: %pR IOV size 
is bigger than 64M, roundup power2\n,
-i, res);
+   /* bigger than or equal to gate */
+   if (size = gate) {
+   dev_info(pdev-dev, PowerNV: VF BAR%d: %pR IOV size 
+   is bigger than %lld, roundup power2\n,
+i, res, gate);
mul = roundup_pow_of_two(total_vfs);
pdn-m64_single_mode = true;
break;
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 2/6] powerpc/powernv: simplify the calculation of iov resource alignment

2015-08-13 Thread Wei Yang
The alignment of IOV BAR on PowerNV platform is the total size of the IOV
BAR. No matter whether the IOV BAR is extended with number of
roundup_pow_of_two(total_vfs) or number of max PE number (256), the total
size could be calculated by (vfs_expanded * VF_BAR_size).

This patch simplifies the pnv_pci_iov_resource_alignment() by removing the
first case.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9ac324e..67b8f72 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2987,12 +2987,16 @@ static resource_size_t 
pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
  int resno)
 {
struct pci_dn *pdn = pci_get_pdn(pdev);
-   resource_size_t align, iov_align;
-
-   iov_align = resource_size(pdev-resource[resno]);
-   if (iov_align)
-   return iov_align;
+   resource_size_t align;
 
+   /*
+* On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
+* SR-IOV. While from hardware perspective, the range mapped by M64
+* BAR should be size aligned.
+*
+* This function return the total IOV BAR size if expanded or just the
+* individual size if not.
+*/
align = pci_iov_resource_size(pdev, resno);
if (pdn-vfs_expanded)
return pdn-vfs_expanded * align;
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 5/6] powerpc/powernv: boundary the total VF BAR size instead of the individual one

2015-08-13 Thread Wei Yang
Each VF could have 6 BARs at most. When the total BAR size exceeds the
gate, after expanding it will also exhaust the M64 Window.

This patch limits the boundary by checking the total VF BAR size instead of
the individual BAR.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3e8c0b4..1e6ac86 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2688,7 +2688,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
struct pnv_phb *phb;
struct resource *res;
int i;
-   resource_size_t size, gate;
+   resource_size_t size, gate, total_vf_bar_sz;
struct pci_dn *pdn;
int mul, total_vfs;
 
@@ -2715,6 +2715,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
 * which will exhaust the M64 Space and limit the system flexibility.
 */
gate = phb-ioda.m64_segsize  1;
+   total_vf_bar_sz = 0;
 
for (i = 0; i  PCI_SRIOV_NUM_BARS; i++) {
res = pdev-resource[i + PCI_IOV_RESOURCES];
@@ -2727,13 +2728,13 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
return;
}
 
-   size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+   total_vf_bar_sz += pci_iov_resource_size(pdev,
+   i + PCI_IOV_RESOURCES);
 
/* bigger than or equal to gate */
-   if (size = gate) {
-   dev_info(pdev-dev, PowerNV: VF BAR%d: %pR IOV size 
-   is bigger than %lld, roundup power2\n,
-i, res, gate);
+   if (total_vf_bar_sz = gate) {
+   dev_info(pdev-dev, PowerNV: VF BAR Total IOV size 
+   is bigger than %lld, roundup power2\n, gate);
mul = roundup_pow_of_two(total_vfs);
pdn-m64_single_mode = true;
break;
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 0/6] Redesign SR-IOV on PowerNV

2015-08-13 Thread Wei Yang
In original design, it tries to group VFs to enable more number of VFs in the
system, when VF BAR is bigger than 64MB. This design has a flaw in which one
error on a VF will interfere other VFs in the same group.

This patch series change this design by using M64 BAR in Single PE mode to
cover only one VF BAR. By doing so, it gives absolute isolation between VFs.

v3:
   * return -ENOSPC when a VF has non-64bit prefetchable BAR
   * rename offset to pe_num_map and define it statically
   * change commit log based on comments
   * define m64_map statically
v2:
   * clean up iov bar alignment calculation
   * change m64s to m64_bars
   * add a field to represent M64 Single PE mode will be used
   * change m64_wins to m64_map
   * calculate the gate instead of hard coded
   * dynamically allocate m64_map
   * dynamically allocate PE#
   * add a case to calculate iov bar alignment when M64 Single PE is used
   * when M64 Single PE is used, compare num_vfs with M64 BAR available number 
 in system at first



Wei Yang (6):
  powerpc/powernv: don't enable SRIOV when VF BAR has non
64bit-prefetchable BAR
  powerpc/powernv: simplify the calculation of iov resource alignment
  powerpc/powernv: use one M64 BAR in Single PE mode for one VF BAR
  powerpc/powernv: replace the hard coded boundary with gate
  powerpc/powernv: boundary the total VF BAR size instead of the
individual one
  powerpc/powernv: allocate sparse PE# when using M64 BAR in Single PE
mode

 arch/powerpc/include/asm/pci-bridge.h |8 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  284 ++---
 2 files changed, 139 insertions(+), 153 deletions(-)

-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 1/6] powerpc/powernv: don't enable SRIOV when VF BAR has non 64bit-prefetchable BAR

2015-08-13 Thread Wei Yang
On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
a SRIOV device's IOV BAR is not 64bit-prefetchable, this is not assigned
from 64bit prefetchable window, which means M64 BAR can't work on it.

This patch makes this explicit.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   25 +
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 5738d31..9ac324e 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, 
int offset)
if (!res-flags || !res-parent)
continue;
 
-   if (!pnv_pci_is_mem_pref_64(res-flags))
-   continue;
-
/*
 * The actual IOV BAR range is determined by the start address
 * and the actual size for num_vfs VFs BAR.  This check is to
@@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, 
int offset)
if (!res-flags || !res-parent)
continue;
 
-   if (!pnv_pci_is_mem_pref_64(res-flags))
-   continue;
-
size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
res2 = *res;
res-start += size * offset;
@@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, 
u16 num_vfs)
if (!res-flags || !res-parent)
continue;
 
-   if (!pnv_pci_is_mem_pref_64(res-flags))
-   continue;
-
for (j = 0; j  vf_groups; j++) {
do {
win = 
find_next_zero_bit(phb-ioda.m64_bar_alloc,
@@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 
num_vfs)
pdn = pci_get_pdn(pdev);
 
if (phb-type == PNV_PHB_IODA2) {
+   if (!pdn-vfs_expanded) {
+   dev_info(pdev-dev, don't support this SRIOV device
+with non 64bit-prefetchable IOV BAR\n);
+   return -ENOSPC;
+   }
+
/* Calculate available PE for required VFs */
mutex_lock(phb-ioda.pe_alloc_mutex);
pdn-offset = bitmap_find_next_zero_area(
@@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
if (!res-flags || res-parent)
continue;
if (!pnv_pci_is_mem_pref_64(res-flags)) {
-   dev_warn(pdev-dev,  non M64 VF BAR%d: %pR\n,
+   dev_warn(pdev-dev, Don't support SR-IOV with
+non M64 VF BAR%d: %pR. \n,
 i, res);
-   continue;
+   return;
}
 
size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
@@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
res = pdev-resource[i + PCI_IOV_RESOURCES];
if (!res-flags || res-parent)
continue;
-   if (!pnv_pci_is_mem_pref_64(res-flags)) {
-   dev_warn(pdev-dev, Skipping expanding VF BAR%d: 
%pR\n,
-i, res);
-   continue;
-   }
 
dev_dbg(pdev-dev,  Fixing VF BAR%d: %pR to\n, i, res);
size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 3/6] powerpc/powernv: use one M64 BAR in Single PE mode for one VF BAR

2015-08-13 Thread Wei Yang
In current implementation, when VF BAR is bigger than 64MB, it uses 4 M64
BARs in Single PE mode to cover the number of VFs required to be enabled.
By doing so, several VFs would be in one VF Group and leads to interference
between VFs in the same group.

This patch changes the design by using one M64 BAR in Single PE mode for
one VF BAR. This gives absolute isolation for VFs.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/pci-bridge.h |6 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  163 +++--
 2 files changed, 62 insertions(+), 107 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 712add5..9d33ada 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -187,6 +187,7 @@ static inline int isa_vaddr_is_ioport(void __iomem *address)
  */
 struct iommu_table;
 
+#define MAX_M64_BAR  16
 struct pci_dn {
int flags;
 #define PCI_DN_FLAG_IOV_VF 0x01
@@ -214,10 +215,9 @@ struct pci_dn {
u16 vfs_expanded;   /* number of VFs IOV BAR expanded */
u16 num_vfs;/* number of VFs enabled*/
int offset; /* PE# for the first VF PE */
-#define M64_PER_IOV 4
-   int m64_per_iov;
+   boolm64_single_mode;/* Use M64 BAR in Single Mode */
 #define IODA_INVALID_M64(-1)
-   int m64_wins[PCI_SRIOV_NUM_BARS][M64_PER_IOV];
+   int  m64_map[PCI_SRIOV_NUM_BARS][MAX_M64_BAR];
 #endif /* CONFIG_PCI_IOV */
 #endif
struct list_head child_list;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 67b8f72..4da0f50 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1162,15 +1162,14 @@ static int pnv_pci_vf_release_m64(struct pci_dev *pdev)
pdn = pci_get_pdn(pdev);
 
for (i = 0; i  PCI_SRIOV_NUM_BARS; i++)
-   for (j = 0; j  M64_PER_IOV; j++) {
-   if (pdn-m64_wins[i][j] == IODA_INVALID_M64)
+   for (j = 0; j  MAX_M64_BAR; j++) {
+   if (pdn-m64_map[i][j] == IODA_INVALID_M64)
continue;
opal_pci_phb_mmio_enable(phb-opal_id,
-   OPAL_M64_WINDOW_TYPE, pdn-m64_wins[i][j], 0);
-   clear_bit(pdn-m64_wins[i][j], 
phb-ioda.m64_bar_alloc);
-   pdn-m64_wins[i][j] = IODA_INVALID_M64;
+   OPAL_M64_WINDOW_TYPE, pdn-m64_map[i][j], 0);
+   clear_bit(pdn-m64_map[i][j], phb-ioda.m64_bar_alloc);
+   pdn-m64_map[i][j] = IODA_INVALID_M64;
}
-
return 0;
 }
 
@@ -1187,8 +1186,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, 
u16 num_vfs)
inttotal_vfs;
resource_size_tsize, start;
intpe_num;
-   intvf_groups;
-   intvf_per_group;
+   intm64_bars;
 
bus = pdev-bus;
hose = pci_bus_to_host(bus);
@@ -1196,26 +1194,23 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, 
u16 num_vfs)
pdn = pci_get_pdn(pdev);
total_vfs = pci_sriov_get_totalvfs(pdev);
 
-   /* Initialize the m64_wins to IODA_INVALID_M64 */
-   for (i = 0; i  PCI_SRIOV_NUM_BARS; i++)
-   for (j = 0; j  M64_PER_IOV; j++)
-   pdn-m64_wins[i][j] = IODA_INVALID_M64;
+   if (pdn-m64_single_mode)
+   m64_bars = num_vfs;
+   else
+   m64_bars = 1;
+
+   /* Initialize the m64_map to IODA_INVALID_M64 */
+   for (i = 0; i  PCI_SRIOV_NUM_BARS ; i++)
+   for (j = 0; j  MAX_M64_BAR; j++)
+   pdn-m64_map[i][j] = IODA_INVALID_M64;
 
-   if (pdn-m64_per_iov == M64_PER_IOV) {
-   vf_groups = (num_vfs = M64_PER_IOV) ? num_vfs: M64_PER_IOV;
-   vf_per_group = (num_vfs = M64_PER_IOV)? 1:
-   roundup_pow_of_two(num_vfs) / pdn-m64_per_iov;
-   } else {
-   vf_groups = 1;
-   vf_per_group = 1;
-   }
 
for (i = 0; i  PCI_SRIOV_NUM_BARS; i++) {
res = pdev-resource[i + PCI_IOV_RESOURCES];
if (!res-flags || !res-parent)
continue;
 
-   for (j = 0; j  vf_groups; j++) {
+   for (j = 0; j  m64_bars; j++) {
do {
win = 
find_next_zero_bit(phb-ioda.m64_bar_alloc,
phb-ioda.m64_bar_idx + 1, 0);
@@ -1224,12 +1219,11 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, 
u16 num_vfs)
goto m64_failed;
} 

[PATCH v3 6/6] powerpc/powernv: allocate sparse PE# when using M64 BAR in Single PE mode

2015-08-13 Thread Wei Yang
When M64 BAR is set to Single PE mode, the PE# assigned to VF could be
sparse.

This patch restructures the patch to allocate sparse PE# for VFs when M64
BAR is set to Single PE mode.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/pci-bridge.h |2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |   59 +++--
 2 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 9d33ada..b026ef8 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -214,7 +214,7 @@ struct pci_dn {
 #ifdef CONFIG_PCI_IOV
u16 vfs_expanded;   /* number of VFs IOV BAR expanded */
u16 num_vfs;/* number of VFs enabled*/
-   int offset; /* PE# for the first VF PE */
+   int pe_num_map[MAX_M64_BAR];/* PE# for the first VF PE or array */
boolm64_single_mode;/* Use M64 BAR in Single Mode */
 #define IODA_INVALID_M64(-1)
int  m64_map[PCI_SRIOV_NUM_BARS][MAX_M64_BAR];
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 1e6ac86..7633538 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1232,7 +1232,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, 
u16 num_vfs)
 
/* Map the M64 here */
if (pdn-m64_single_mode) {
-   pe_num = pdn-offset + j;
+   pe_num = pdn-pe_num_map[j];
rc = opal_pci_map_pe_mmio_window(phb-opal_id,
pe_num, OPAL_M64_WINDOW_TYPE,
pdn-m64_map[i][j], 0);
@@ -1336,7 +1336,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
struct pnv_phb*phb;
struct pci_dn *pdn;
struct pci_sriov  *iov;
-   u16 num_vfs;
+   u16 num_vfs, i;
 
bus = pdev-bus;
hose = pci_bus_to_host(bus);
@@ -1350,14 +1350,17 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
 
if (phb-type == PNV_PHB_IODA2) {
if (!pdn-m64_single_mode)
-   pnv_pci_vf_resource_shift(pdev, -pdn-offset);
+   pnv_pci_vf_resource_shift(pdev, -pdn-pe_num_map[0]);
 
/* Release M64 windows */
pnv_pci_vf_release_m64(pdev);
 
/* Release PE numbers */
-   bitmap_clear(phb-ioda.pe_alloc, pdn-offset, num_vfs);
-   pdn-offset = 0;
+   if (pdn-m64_single_mode) {
+   for (i = 0; i  num_vfs; i++)
+   pnv_ioda_free_pe(phb, pdn-pe_num_map[i]);
+   } else
+   bitmap_clear(phb-ioda.pe_alloc, pdn-pe_num_map[0], 
num_vfs);
}
 }
 
@@ -1383,7 +1386,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, 
u16 num_vfs)
 
/* Reserve PE for each VF */
for (vf_index = 0; vf_index  num_vfs; vf_index++) {
-   pe_num = pdn-offset + vf_index;
+   if (pdn-m64_single_mode)
+   pe_num = pdn-pe_num_map[vf_index];
+   else
+   pe_num = pdn-pe_num_map[0] + vf_index;
 
pe = phb-ioda.pe_array[pe_num];
pe-pe_number = pe_num;
@@ -1425,6 +1431,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 
num_vfs)
struct pnv_phb*phb;
struct pci_dn *pdn;
intret;
+   u16i;
 
bus = pdev-bus;
hose = pci_bus_to_host(bus);
@@ -1448,19 +1455,30 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 
num_vfs)
}
 
/* Calculate available PE for required VFs */
-   mutex_lock(phb-ioda.pe_alloc_mutex);
-   pdn-offset = bitmap_find_next_zero_area(
-   phb-ioda.pe_alloc, phb-ioda.total_pe,
-   0, num_vfs, 0);
-   if (pdn-offset = phb-ioda.total_pe) {
+   if (pdn-m64_single_mode) {
+   for (i = 0; i  num_vfs; i++)
+   pdn-pe_num_map[i] = IODA_INVALID_PE;
+   for (i = 0; i  num_vfs; i++) {
+   pdn-pe_num_map[i] = pnv_ioda_alloc_pe(phb);
+   if (pdn-pe_num_map[i] == IODA_INVALID_PE) {
+   ret = -EBUSY;
+   goto m64_failed;
+   }
+   }
+   } else {
+   mutex_lock(phb-ioda.pe_alloc_mutex);
+   pdn-pe_num_map[0] = bitmap_find_next_zero_area(
+

Re: RFC: prepare for struct scatterlist entries without page backing

2015-08-13 Thread Christoph Hellwig
On Wed, Aug 12, 2015 at 03:42:47PM +0300, Boaz Harrosh wrote:
 The support I have suggested and submitted for zone-less sections.
 (In my add_persistent_memory() patchset)

 Would work perfectly well and transparent for all such multimedia cases.
 (All hacks removed). In fact I have loaded pmem (with-pages) on a VRAM
 a few times and it is great easy fun. (I wanted to experiment with cached
 memory over a pcie)

And everyone agree that it was both buggy and incomplete.

Dan has done a respin of the page backed nvdimm work with most of
these comments addressed.

I have to say I hate both pfn-based I/O [1] and page backed nvdimms with
passion, so we're looking into the lesser evil with an open mind.

[1] not the SGL part posted here, which I think is quite sane.  The bio
side is much worse, though.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 29/31] parisc: handle page-less SG entries

2015-08-13 Thread Christoph Hellwig
On Wed, Aug 12, 2015 at 09:01:02AM -0700, Linus Torvalds wrote:
 I'm assuming that anybody who wants to use the page-less
 scatter-gather lists always does so on memory that isn't actually
 virtually mapped at all, or only does so on sane architectures that
 are cache coherent at a physical level, but I'd like that assumption
 *documented* somewhere.

It's temporarily mapped by kmap-like helpers.  That code isn't in
this series. The most recent version of it is here:

https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git/commit/?h=pfnid=de8237c99fdb4352be2193f3a7610e902b9bb2f0

note that it's not doing the cache flushing it would have to do yet, but
it's also only enabled for x86 at the moment.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: RFC: prepare for struct scatterlist entries without page backing

2015-08-13 Thread Christoph Hellwig
On Thu, Aug 13, 2015 at 09:37:37AM +1000, Julian Calaby wrote:
 I.e. ~90% of this patch set seems to be just mechanically dropping
 BUG_ON()s and converting open coded stuff to use accessor functions
 (which should be macros or get inlined, right?) - and the remaining
 bit is not flushing if we don't have a physical page somewhere.

Which is was 90%.  By lines changed most actually is the diffs for
the cache flushing.

 Would it make sense to split this patch set into a few bits: one to
 drop all the useless BUG_ON()s, one to convert all the open coded
 stuff to accessor functions, then another to do the actual page-less
 sg stuff?

Without the ifs the BUG_ON() actually are useful to assert we
never feed the sort of physical addresses we can't otherwise support,
so I don't think that part is doable.

A simple series to make more use of sg_phys and add sg_pfn might
still be useful, though.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 4/5] dma-mapping: consolidate dma_supported

2015-08-13 Thread Christoph Hellwig
Most architectures just call into -dma_supported, but some also return 1
if the method is not present, or 0 if no dma ops are present (although
that should never happeb). Consolidate this more broad version into
common code.

Also fix h8300 which inorrectly always returned 0, which would have been
a problem if it's dma_set_mask implementation wasn't a similarly buggy
noop.

As a few architectures have much more elaborate implementations, we
still allow for arch overrides.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 arch/alpha/include/asm/dma-mapping.h  |  5 -
 arch/arm/include/asm/dma-mapping.h|  5 +++--
 arch/arm64/include/asm/dma-mapping.h  |  6 --
 arch/h8300/include/asm/dma-mapping.h  |  5 -
 arch/hexagon/include/asm/dma-mapping.h|  1 +
 arch/ia64/include/asm/dma-mapping.h   |  6 --
 arch/microblaze/include/asm/dma-mapping.h | 11 ---
 arch/mips/include/asm/dma-mapping.h   |  6 --
 arch/openrisc/include/asm/dma-mapping.h   |  5 +++--
 arch/powerpc/include/asm/dma-mapping.h| 11 ---
 arch/s390/include/asm/dma-mapping.h   |  9 -
 arch/sh/include/asm/dma-mapping.h | 10 --
 arch/sparc/include/asm/dma-mapping.h  |  1 +
 arch/tile/include/asm/dma-mapping.h   |  6 --
 arch/unicore32/include/asm/dma-mapping.h  | 10 --
 arch/x86/include/asm/dma-mapping.h|  4 +++-
 include/asm-generic/dma-mapping-common.h  | 13 +
 17 files changed, 24 insertions(+), 90 deletions(-)

diff --git a/arch/alpha/include/asm/dma-mapping.h 
b/arch/alpha/include/asm/dma-mapping.h
index 80ac3e8..9d763e5 100644
--- a/arch/alpha/include/asm/dma-mapping.h
+++ b/arch/alpha/include/asm/dma-mapping.h
@@ -12,11 +12,6 @@ static inline struct dma_map_ops *get_dma_ops(struct device 
*dev)
 
 #include asm-generic/dma-mapping-common.h
 
-static inline int dma_supported(struct device *dev, u64 mask)
-{
-   return get_dma_ops(dev)-dma_supported(dev, mask);
-}
-
 static inline int dma_set_mask(struct device *dev, u64 mask)
 {
return get_dma_ops(dev)-set_dma_mask(dev, mask);
diff --git a/arch/arm/include/asm/dma-mapping.h 
b/arch/arm/include/asm/dma-mapping.h
index 2fa33d7..b90d247 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -38,6 +38,9 @@ static inline void set_dma_ops(struct device *dev, struct 
dma_map_ops *ops)
dev-archdata.dma_ops = ops;
 }
 
+#define HAVE_ARCH_DMA_SUPPORTED 1
+extern int dma_supported(struct device *dev, u64 mask);
+
 #include asm-generic/dma-mapping-common.h
 
 static inline int dma_set_mask(struct device *dev, u64 mask)
@@ -166,8 +169,6 @@ static inline bool dma_capable(struct device *dev, 
dma_addr_t addr, size_t size)
 
 static inline void dma_mark_clean(void *addr, size_t size) { }
 
-extern int dma_supported(struct device *dev, u64 mask);
-
 extern int arm_dma_set_mask(struct device *dev, u64 dma_mask);
 
 /**
diff --git a/arch/arm64/include/asm/dma-mapping.h 
b/arch/arm64/include/asm/dma-mapping.h
index f45f444..f519a58 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -84,12 +84,6 @@ static inline phys_addr_t dma_to_phys(struct device *dev, 
dma_addr_t dev_addr)
return (phys_addr_t)dev_addr;
 }
 
-static inline int dma_supported(struct device *dev, u64 mask)
-{
-   struct dma_map_ops *ops = get_dma_ops(dev);
-   return ops-dma_supported(dev, mask);
-}
-
 static inline int dma_set_mask(struct device *dev, u64 mask)
 {
if (!dev-dma_mask || !dma_supported(dev, mask))
diff --git a/arch/h8300/include/asm/dma-mapping.h 
b/arch/h8300/include/asm/dma-mapping.h
index 5eef053..48d652e 100644
--- a/arch/h8300/include/asm/dma-mapping.h
+++ b/arch/h8300/include/asm/dma-mapping.h
@@ -10,11 +10,6 @@ static inline struct dma_map_ops *get_dma_ops(struct device 
*dev)
 
 #include asm-generic/dma-mapping-common.h
 
-static inline int dma_supported(struct device *dev, u64 mask)
-{
-   return 0;
-}
-
 static inline int dma_set_mask(struct device *dev, u64 mask)
 {
return 0;
diff --git a/arch/hexagon/include/asm/dma-mapping.h 
b/arch/hexagon/include/asm/dma-mapping.h
index e661192..36e8de7 100644
--- a/arch/hexagon/include/asm/dma-mapping.h
+++ b/arch/hexagon/include/asm/dma-mapping.h
@@ -43,6 +43,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device 
*dev)
return dma_ops;
 }
 
+#define HAVE_ARCH_DMA_SUPPORTED 1
 extern int dma_supported(struct device *dev, u64 mask);
 extern int dma_set_mask(struct device *dev, u64 mask);
 extern int dma_is_consistent(struct device *dev, dma_addr_t dma_handle);
diff --git a/arch/ia64/include/asm/dma-mapping.h 
b/arch/ia64/include/asm/dma-mapping.h
index 27b713d..7982caa 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -27,12 +27,6 @@ extern void machvec_dma_sync_sg(struct device *, struct 
scatterlist *, int,
 
 #include asm-generic/dma-mapping-common.h
 

[PATCH 3/5] dma-mapping: cosolidate dma_mapping_error

2015-08-13 Thread Christoph Hellwig
Currently there are three valid implementations of dma_mapping_error:

 (1) call -mapping_error
 (2) check for a hardcoded error code
 (3) always return 0

This patch provides a common implementation that calls -mapping_error
if present, then checks for DMA_ERROR_CODE if defined or otherwise
returns 0.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 arch/alpha/include/asm/dma-mapping.h  |  5 -
 arch/arm/include/asm/dma-mapping.h|  9 -
 arch/arm64/include/asm/dma-mapping.h  |  7 ---
 arch/h8300/include/asm/dma-mapping.h  |  5 -
 arch/hexagon/include/asm/dma-mapping.h| 11 +--
 arch/ia64/include/asm/dma-mapping.h   |  7 ---
 arch/microblaze/include/asm/dma-mapping.h | 11 ---
 arch/mips/include/asm/dma-mapping.h   |  8 
 arch/openrisc/include/asm/dma-mapping.h   |  5 -
 arch/powerpc/include/asm/dma-mapping.h| 17 ++---
 arch/s390/include/asm/dma-mapping.h   | 10 --
 arch/sh/include/asm/dma-mapping.h | 13 ++---
 arch/sparc/include/asm/dma-mapping.h  |  6 --
 arch/tile/include/asm/dma-mapping.h   |  7 ---
 arch/unicore32/include/asm/dma-mapping.h  | 10 --
 arch/x86/include/asm/dma-mapping.h| 11 ---
 include/asm-generic/dma-mapping-common.h  | 14 ++
 17 files changed, 19 insertions(+), 137 deletions(-)

diff --git a/arch/alpha/include/asm/dma-mapping.h 
b/arch/alpha/include/asm/dma-mapping.h
index 0552bf0..80ac3e8 100644
--- a/arch/alpha/include/asm/dma-mapping.h
+++ b/arch/alpha/include/asm/dma-mapping.h
@@ -12,11 +12,6 @@ static inline struct dma_map_ops *get_dma_ops(struct device 
*dev)
 
 #include asm-generic/dma-mapping-common.h
 
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-   return get_dma_ops(dev)-mapping_error(dev, dma_addr);
-}
-
 static inline int dma_supported(struct device *dev, u64 mask)
 {
return get_dma_ops(dev)-dma_supported(dev, mask);
diff --git a/arch/arm/include/asm/dma-mapping.h 
b/arch/arm/include/asm/dma-mapping.h
index ab521d5..2fa33d7 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -166,15 +166,6 @@ static inline bool dma_capable(struct device *dev, 
dma_addr_t addr, size_t size)
 
 static inline void dma_mark_clean(void *addr, size_t size) { }
 
-/*
- * DMA errors are defined by all-bits-set in the DMA address.
- */
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-   debug_dma_mapping_error(dev, dma_addr);
-   return dma_addr == DMA_ERROR_CODE;
-}
-
 extern int dma_supported(struct device *dev, u64 mask);
 
 extern int arm_dma_set_mask(struct device *dev, u64 dma_mask);
diff --git a/arch/arm64/include/asm/dma-mapping.h 
b/arch/arm64/include/asm/dma-mapping.h
index 178e60b..f45f444 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -84,13 +84,6 @@ static inline phys_addr_t dma_to_phys(struct device *dev, 
dma_addr_t dev_addr)
return (phys_addr_t)dev_addr;
 }
 
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dev_addr)
-{
-   struct dma_map_ops *ops = get_dma_ops(dev);
-   debug_dma_mapping_error(dev, dev_addr);
-   return ops-mapping_error(dev, dev_addr);
-}
-
 static inline int dma_supported(struct device *dev, u64 mask)
 {
struct dma_map_ops *ops = get_dma_ops(dev);
diff --git a/arch/h8300/include/asm/dma-mapping.h 
b/arch/h8300/include/asm/dma-mapping.h
index 72465ce..5eef053 100644
--- a/arch/h8300/include/asm/dma-mapping.h
+++ b/arch/h8300/include/asm/dma-mapping.h
@@ -20,9 +20,4 @@ static inline int dma_set_mask(struct device *dev, u64 mask)
return 0;
 }
 
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-   return 0;
-}
-
 #endif
diff --git a/arch/hexagon/include/asm/dma-mapping.h 
b/arch/hexagon/include/asm/dma-mapping.h
index 58d2d8f..e661192 100644
--- a/arch/hexagon/include/asm/dma-mapping.h
+++ b/arch/hexagon/include/asm/dma-mapping.h
@@ -31,6 +31,7 @@
 
 struct device;
 extern int bad_dma_address;
+#define DMA_ERROR_CODE bad_dma_address
 
 extern struct dma_map_ops *dma_ops;
 
@@ -57,14 +58,4 @@ static inline bool dma_capable(struct device *dev, 
dma_addr_t addr, size_t size)
return addr + size - 1 = *dev-dma_mask;
 }
 
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-   struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
-   if (dma_ops-mapping_error)
-   return dma_ops-mapping_error(dev, dma_addr);
-
-   return (dma_addr == bad_dma_address);
-}
-
 #endif
diff --git a/arch/ia64/include/asm/dma-mapping.h 
b/arch/ia64/include/asm/dma-mapping.h
index a925ff0..27b713d 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -27,13 +27,6 @@ extern void machvec_dma_sync_sg(struct device *, struct 
scatterlist *, int,
 
 #include 

Re: [PATCH 5/5] dma-mapping: consolidate dma_set_mask

2015-08-13 Thread Russell King - ARM Linux
On Thu, Aug 13, 2015 at 05:04:08PM +0200, Christoph Hellwig wrote:
 diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
 index 1143c4d..260f52a 100644
 --- a/arch/arm/common/dmabounce.c
 +++ b/arch/arm/common/dmabounce.c
 @@ -440,14 +440,6 @@ static void dmabounce_sync_for_device(struct device *dev,
   arm_dma_ops.sync_single_for_device(dev, handle, size, dir);
  }
  
 -static int dmabounce_set_mask(struct device *dev, u64 dma_mask)
 -{
 - if (dev-archdata.dmabounce)
 - return 0;
 -
 - return arm_dma_ops.set_dma_mask(dev, dma_mask);

Are you sure about this?  A user of dmabounce gets to request any mask
with the original code (even though it was never written back... which
is a separate bug.)  After this, it seems that this will get limited
by the dma_supported() check.  As this old code is about bouncing any
buffer into DMA-able memory, it doesn't care about the DMA mask.

-- 
FTTC broadband for 0.8mile line: currently at 10.5Mbps down 400kbps up
according to speedtest.net.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 31/31] dma-mapping-common: skip kmemleak checks for page-less SG entries

2015-08-13 Thread Christoph Hellwig
On Wed, Aug 12, 2015 at 09:05:15AM -0700, Linus Torvalds wrote:
 [ Again, I'm responding to one random patch - this pattern was in
 other patches too.  ]
 
 A question: do we actually expect to mix page-less and pageful SG
 entries in the same SG list?
 
 How does that happen?

Both for DAX and the video buffer case people could do direct I/O
spanning the boundary between such a VMA and a normal one unless
we add special code to prevent that.  Right now I don't think it's
all that useful, but then again it doesn't seem harmful either
and adding those checks might add up.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/5] dma-mapping: consolidate dma_{alloc, free}_{attrs, coherent}

2015-08-13 Thread Christoph Hellwig
The coherent DMA allocator works the same over all architectures supporting
dma_map operations.

This patch consolidates them and converges the minor differences:

 - the debug_dma helpers are now called from all architectures, including
   those that were previously missing them
 - dma_alloc_from_coherent and dma_release_from_coherent are now always
   called from the generic alloc/free routines instead of the ops
   dma-mapping-common.h always includes dma-coherent.h to get the defintions
   for them, or the stubs if the architecture doesn't support this feature
 - checks for -alloc / -free presence are removed.  There is only one
   magic instead of dma_map_ops without them (mic_dma_ops) and that one
   is x86 only anyway.

Besides that only x86 needs special treatment to replace a default devices
if none is passed and tweak the gfp_flags.  An optional arch hook is provided
for that.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 arch/alpha/include/asm/dma-mapping.h  | 18 --
 arch/arm/include/asm/dma-mapping.h| 29 
 arch/arm/mm/dma-mapping.c | 11 --
 arch/arm64/include/asm/dma-mapping.h  | 33 --
 arch/h8300/include/asm/dma-mapping.h  | 26 --
 arch/hexagon/include/asm/dma-mapping.h| 33 --
 arch/ia64/include/asm/dma-mapping.h   | 25 -
 arch/microblaze/include/asm/dma-mapping.h | 31 -
 arch/mips/cavium-octeon/dma-octeon.c  |  8 -
 arch/mips/include/asm/dma-mapping.h   | 31 -
 arch/mips/loongson64/common/dma-swiotlb.c |  8 -
 arch/mips/mm/dma-default.c|  7 
 arch/mips/netlogic/common/nlm-dma.c   |  8 -
 arch/openrisc/include/asm/dma-mapping.h   | 30 
 arch/powerpc/include/asm/dma-mapping.h| 33 --
 arch/s390/include/asm/dma-mapping.h   | 31 -
 arch/sh/include/asm/dma-mapping.h | 37 
 arch/sparc/include/asm/dma-mapping.h  | 26 --
 arch/tile/include/asm/dma-mapping.h   | 27 --
 arch/unicore32/include/asm/dma-mapping.h  | 24 -
 arch/x86/include/asm/dma-mapping.h| 16 ++---
 arch/x86/kernel/pci-dma.c | 49 +-
 drivers/xen/swiotlb-xen.c |  6 
 include/asm-generic/dma-mapping-common.h  | 58 +++
 24 files changed, 70 insertions(+), 535 deletions(-)

diff --git a/arch/alpha/include/asm/dma-mapping.h 
b/arch/alpha/include/asm/dma-mapping.h
index dfa32f0..9fef5bd 100644
--- a/arch/alpha/include/asm/dma-mapping.h
+++ b/arch/alpha/include/asm/dma-mapping.h
@@ -12,24 +12,6 @@ static inline struct dma_map_ops *get_dma_ops(struct device 
*dev)
 
 #include asm-generic/dma-mapping-common.h
 
-#define dma_alloc_coherent(d,s,h,f)dma_alloc_attrs(d,s,h,f,NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
-   dma_addr_t *dma_handle, gfp_t gfp,
-   struct dma_attrs *attrs)
-{
-   return get_dma_ops(dev)-alloc(dev, size, dma_handle, gfp, attrs);
-}
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
-   get_dma_ops(dev)-free(dev, size, vaddr, dma_handle, attrs);
-}
-
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
return get_dma_ops(dev)-mapping_error(dev, dma_addr);
diff --git a/arch/arm/include/asm/dma-mapping.h 
b/arch/arm/include/asm/dma-mapping.h
index b52101d..2ae3424 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -8,7 +8,6 @@
 #include linux/dma-attrs.h
 #include linux/dma-debug.h
 
-#include asm-generic/dma-coherent.h
 #include asm/memory.h
 
 #include xen/xen.h
@@ -209,21 +208,6 @@ extern int arm_dma_set_mask(struct device *dev, u64 
dma_mask);
 extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
   gfp_t gfp, struct dma_attrs *attrs);
 
-#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
-  dma_addr_t *dma_handle, gfp_t flag,
-  struct dma_attrs *attrs)
-{
-   struct dma_map_ops *ops = get_dma_ops(dev);
-   void *cpu_addr;
-   BUG_ON(!ops);
-
-   cpu_addr = ops-alloc(dev, size, dma_handle, flag, attrs);
-   debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
-   return cpu_addr;
-}
-
 /**
  * arm_dma_free - free memory allocated by arm_dma_alloc
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -241,19 +225,6 @@ static 

[PATCH 5/5] dma-mapping: consolidate dma_set_mask

2015-08-13 Thread Christoph Hellwig
Almost everyone implements dma_set_mask the same way, although some time
that's hidden in -set_dma_mask methods.

Move this implementation to common code, including a callout to override
the post-check action, and remove duplicate instaces in methods as well.

Unfortunately some architectures overload unrelated semantics like changing
the dma_ops into it so we still need to allow for an architecture override
for now.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 arch/alpha/include/asm/dma-mapping.h  |  5 -
 arch/alpha/kernel/pci-noop.c  | 10 --
 arch/alpha/kernel/pci_iommu.c | 11 ---
 arch/arm/common/dmabounce.c   |  9 -
 arch/arm/include/asm/dma-mapping.h|  5 -
 arch/arm/mm/dma-mapping.c | 16 
 arch/arm/xen/mm.c |  1 -
 arch/arm64/include/asm/dma-mapping.h  |  9 -
 arch/h8300/include/asm/dma-mapping.h  |  5 -
 arch/hexagon/include/asm/dma-mapping.h|  1 -
 arch/hexagon/kernel/dma.c | 11 ---
 arch/ia64/include/asm/dma-mapping.h   |  9 -
 arch/microblaze/include/asm/dma-mapping.h | 14 --
 arch/mips/include/asm/dma-mapping.h   | 16 
 arch/openrisc/include/asm/dma-mapping.h   |  9 -
 arch/powerpc/include/asm/dma-mapping.h|  4 +++-
 arch/powerpc/platforms/cell/iommu.c   |  3 ---
 arch/s390/include/asm/dma-mapping.h   |  2 --
 arch/s390/pci/pci_dma.c   | 10 --
 arch/sh/include/asm/dma-mapping.h | 14 --
 arch/sparc/include/asm/dma-mapping.h  |  5 +++--
 arch/tile/include/asm/dma-mapping.h   |  5 +++--
 arch/unicore32/include/asm/dma-mapping.h  | 10 --
 arch/x86/include/asm/dma-mapping.h|  2 --
 arch/x86/kernel/pci-dma.c | 11 ---
 drivers/xen/swiotlb-xen.c | 12 
 include/asm-generic/dma-mapping-common.h  | 16 
 include/xen/swiotlb-xen.h |  2 --
 28 files changed, 25 insertions(+), 202 deletions(-)

diff --git a/arch/alpha/include/asm/dma-mapping.h 
b/arch/alpha/include/asm/dma-mapping.h
index 9d763e5..72a8ca7 100644
--- a/arch/alpha/include/asm/dma-mapping.h
+++ b/arch/alpha/include/asm/dma-mapping.h
@@ -12,11 +12,6 @@ static inline struct dma_map_ops *get_dma_ops(struct device 
*dev)
 
 #include asm-generic/dma-mapping-common.h
 
-static inline int dma_set_mask(struct device *dev, u64 mask)
-{
-   return get_dma_ops(dev)-set_dma_mask(dev, mask);
-}
-
 #define dma_cache_sync(dev, va, size, dir)   ((void)0)
 
 #endif /* _ALPHA_DMA_MAPPING_H */
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index df24b76..2b1f4a1 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -166,15 +166,6 @@ static int alpha_noop_supported(struct device *dev, u64 
mask)
return mask  0x00ffUL ? 0 : 1;
 }
 
-static int alpha_noop_set_mask(struct device *dev, u64 mask)
-{
-   if (!dev-dma_mask || !dma_supported(dev, mask))
-   return -EIO;
-
-   *dev-dma_mask = mask;
-   return 0;
-}
-
 struct dma_map_ops alpha_noop_ops = {
.alloc  = alpha_noop_alloc_coherent,
.free   = alpha_noop_free_coherent,
@@ -182,7 +173,6 @@ struct dma_map_ops alpha_noop_ops = {
.map_sg = alpha_noop_map_sg,
.mapping_error  = alpha_noop_mapping_error,
.dma_supported  = alpha_noop_supported,
-   .set_dma_mask   = alpha_noop_set_mask,
 };
 
 struct dma_map_ops *dma_ops = alpha_noop_ops;
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index eddee77..8969bf2 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -939,16 +939,6 @@ static int alpha_pci_mapping_error(struct device *dev, 
dma_addr_t dma_addr)
return dma_addr == 0;
 }
 
-static int alpha_pci_set_mask(struct device *dev, u64 mask)
-{
-   if (!dev-dma_mask ||
-   !pci_dma_supported(alpha_gendev_to_pci(dev), mask))
-   return -EIO;
-
-   *dev-dma_mask = mask;
-   return 0;
-}
-
 struct dma_map_ops alpha_pci_ops = {
.alloc  = alpha_pci_alloc_coherent,
.free   = alpha_pci_free_coherent,
@@ -958,7 +948,6 @@ struct dma_map_ops alpha_pci_ops = {
.unmap_sg   = alpha_pci_unmap_sg,
.mapping_error  = alpha_pci_mapping_error,
.dma_supported  = alpha_pci_supported,
-   .set_dma_mask   = alpha_pci_set_mask,
 };
 
 struct dma_map_ops *dma_ops = alpha_pci_ops;
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index 1143c4d..260f52a 100644
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -440,14 +440,6 @@ static void dmabounce_sync_for_device(struct device *dev,

Re: [PATCH 2/5] dma-mapping: consolidate dma_{alloc, free}_noncoherent

2015-08-13 Thread Russell King - ARM Linux
On Thu, Aug 13, 2015 at 05:04:05PM +0200, Christoph Hellwig wrote:
 diff --git a/arch/arm/include/asm/dma-mapping.h 
 b/arch/arm/include/asm/dma-mapping.h
 index 2ae3424..ab521d5 100644
 --- a/arch/arm/include/asm/dma-mapping.h
 +++ b/arch/arm/include/asm/dma-mapping.h
 @@ -175,21 +175,6 @@ static inline int dma_mapping_error(struct device *dev, 
 dma_addr_t dma_addr)
   return dma_addr == DMA_ERROR_CODE;
  }
  
 -/*
 - * Dummy noncoherent implementation.  We don't provide a dma_cache_sync
 - * function so drivers using this API are highlighted with build warnings.
 - */

I'd like a similar comment to remain after this patch explaining that we
don't support non-coherent allocations and that it'll be highlighted by
the lack of dma_cache_sync, otherwise I'm sure we'll start to get patches
to add the thing.

-- 
FTTC broadband for 0.8mile line: currently at 10.5Mbps down 400kbps up
according to speedtest.net.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

provide more common DMA API functions

2015-08-13 Thread Christoph Hellwig
Since 2009 we have a nice asm-generic header implementing lots of DMA API
functions for architectures using struct dma_map_ops, but unfortunately
it's still missing a lot of APIs that all architectures still have to
duplicate.

This series consolidates the remaining functions, although we still
need arch opt outs for two of them as a few architectures have very
non-standard implementations.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/5] dma-mapping: consolidate dma_{alloc,free}_noncoherent

2015-08-13 Thread Christoph Hellwig
Most architectures do not support non-coherent allocations and either
define dma_{alloc,free}_noncoherent to their coherent versions or stub
them out.

Openrisc uses dma_{alloc,free}_attrs to implement them, and only Mips
implements them directly.

This patch moves the Openrisc version to common code, and handles the
DMA_ATTR_NON_CONSISTENT case in the mips dma_map_ops instance.

Note that actual non-coherent allocations require a dma_cache_sync
implementation, so if non-coherent allocations didn't work on
an architecture before this patch they still won't work after it.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 arch/alpha/include/asm/dma-mapping.h  |  3 ---
 arch/arm/include/asm/dma-mapping.h| 15 ---
 arch/arm64/include/asm/dma-mapping.h  | 14 --
 arch/h8300/include/asm/dma-mapping.h  |  3 ---
 arch/hexagon/include/asm/dma-mapping.h|  3 ---
 arch/ia64/include/asm/dma-mapping.h   |  3 ---
 arch/microblaze/include/asm/dma-mapping.h |  3 ---
 arch/mips/include/asm/dma-mapping.h   |  6 --
 arch/mips/mm/dma-default.c| 20 +++-
 arch/openrisc/include/asm/dma-mapping.h   | 20 
 arch/powerpc/include/asm/dma-mapping.h|  3 ---
 arch/s390/include/asm/dma-mapping.h   |  3 ---
 arch/sh/include/asm/dma-mapping.h |  3 ---
 arch/sparc/include/asm/dma-mapping.h  |  3 ---
 arch/tile/include/asm/dma-mapping.h   |  3 ---
 arch/unicore32/include/asm/dma-mapping.h  |  3 ---
 arch/x86/include/asm/dma-mapping.h|  3 ---
 include/asm-generic/dma-mapping-common.h  | 18 ++
 18 files changed, 33 insertions(+), 96 deletions(-)

diff --git a/arch/alpha/include/asm/dma-mapping.h 
b/arch/alpha/include/asm/dma-mapping.h
index 9fef5bd..0552bf0 100644
--- a/arch/alpha/include/asm/dma-mapping.h
+++ b/arch/alpha/include/asm/dma-mapping.h
@@ -27,9 +27,6 @@ static inline int dma_set_mask(struct device *dev, u64 mask)
return get_dma_ops(dev)-set_dma_mask(dev, mask);
 }
 
-#define dma_alloc_noncoherent(d, s, h, f)  dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h)   dma_free_coherent(d, s, v, h)
-
 #define dma_cache_sync(dev, va, size, dir)   ((void)0)
 
 #endif /* _ALPHA_DMA_MAPPING_H */
diff --git a/arch/arm/include/asm/dma-mapping.h 
b/arch/arm/include/asm/dma-mapping.h
index 2ae3424..ab521d5 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -175,21 +175,6 @@ static inline int dma_mapping_error(struct device *dev, 
dma_addr_t dma_addr)
return dma_addr == DMA_ERROR_CODE;
 }
 
-/*
- * Dummy noncoherent implementation.  We don't provide a dma_cache_sync
- * function so drivers using this API are highlighted with build warnings.
- */
-static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
-   dma_addr_t *handle, gfp_t gfp)
-{
-   return NULL;
-}
-
-static inline void dma_free_noncoherent(struct device *dev, size_t size,
-   void *cpu_addr, dma_addr_t handle)
-{
-}
-
 extern int dma_supported(struct device *dev, u64 mask);
 
 extern int arm_dma_set_mask(struct device *dev, u64 dma_mask);
diff --git a/arch/arm64/include/asm/dma-mapping.h 
b/arch/arm64/include/asm/dma-mapping.h
index 5e11b3f..178e60b 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -118,19 +118,5 @@ static inline void dma_mark_clean(void *addr, size_t size)
 {
 }
 
-/*
- * There is no dma_cache_sync() implementation, so just return NULL here.
- */
-static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
- dma_addr_t *handle, gfp_t flags)
-{
-   return NULL;
-}
-
-static inline void dma_free_noncoherent(struct device *dev, size_t size,
-   void *cpu_addr, dma_addr_t handle)
-{
-}
-
 #endif /* __KERNEL__ */
 #endif /* __ASM_DMA_MAPPING_H */
diff --git a/arch/h8300/include/asm/dma-mapping.h 
b/arch/h8300/include/asm/dma-mapping.h
index 826aa9b..72465ce 100644
--- a/arch/h8300/include/asm/dma-mapping.h
+++ b/arch/h8300/include/asm/dma-mapping.h
@@ -20,9 +20,6 @@ static inline int dma_set_mask(struct device *dev, u64 mask)
return 0;
 }
 
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
return 0;
diff --git a/arch/hexagon/include/asm/dma-mapping.h 
b/arch/hexagon/include/asm/dma-mapping.h
index c20d3ca..58d2d8f 100644
--- a/arch/hexagon/include/asm/dma-mapping.h
+++ b/arch/hexagon/include/asm/dma-mapping.h
@@ -34,9 +34,6 @@ extern int bad_dma_address;
 
 extern struct dma_map_ops *dma_ops;
 
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
 

Re: [PATCH 2/5] dma-mapping: consolidate dma_{alloc,free}_noncoherent

2015-08-13 Thread Christoph Hellwig
On Thu, Aug 13, 2015 at 04:20:40PM +0100, Russell King - ARM Linux wrote:
  -/*
  - * Dummy noncoherent implementation.  We don't provide a dma_cache_sync
  - * function so drivers using this API are highlighted with build warnings.
  - */
 
 I'd like a similar comment to remain after this patch explaining that we
 don't support non-coherent allocations and that it'll be highlighted by
 the lack of dma_cache_sync, otherwise I'm sure we'll start to get patches
 to add the thing.

I'll keep a modified version of this comment in the ARM dma-mapping.h
in addition to an explanation near the new common dma_alloc_noncoherent
definition, thanks!

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 5/5] dma-mapping: consolidate dma_set_mask

2015-08-13 Thread Christoph Hellwig
On Thu, Aug 13, 2015 at 04:25:05PM +0100, Russell King - ARM Linux wrote:
 On Thu, Aug 13, 2015 at 05:04:08PM +0200, Christoph Hellwig wrote:
  diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
  index 1143c4d..260f52a 100644
  --- a/arch/arm/common/dmabounce.c
  +++ b/arch/arm/common/dmabounce.c
  @@ -440,14 +440,6 @@ static void dmabounce_sync_for_device(struct device 
  *dev,
  arm_dma_ops.sync_single_for_device(dev, handle, size, dir);
   }
   
  -static int dmabounce_set_mask(struct device *dev, u64 dma_mask)
  -{
  -   if (dev-archdata.dmabounce)
  -   return 0;
  -
  -   return arm_dma_ops.set_dma_mask(dev, dma_mask);
 
 Are you sure about this?  A user of dmabounce gets to request any mask
 with the original code (even though it was never written back... which
 is a separate bug.)  After this, it seems that this will get limited
 by the dma_supported() check.  As this old code is about bouncing any
 buffer into DMA-able memory, it doesn't care about the DMA mask.

I think you're right.  With the default dma_supported implementation
it would be fine, but ARM uses a custom one.  I'll keep the arm
specific dma_set_mask implementation for the next round.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/3] powerpc/e6500: hw tablewalk: optimize a bit for tcd lock acquiring codes

2015-08-13 Thread Scott Wood
On Thu, 2015-08-13 at 19:51 +0800, Kevin Hao wrote:
 It makes no sense to put the instructions for calculating the lock
 value (cpu number + 1) and the clearing of eq bit of cr1 in lbarx/stbcx
 loop. And when the lock is acquired by the other thread, the current
 lock value has no chance to equal with the lock value used by current
 cpu. So we can skip the comparing for these two lock values in the
 lbz/bne loop.
 
 Signed-off-by: Kevin Hao haoke...@gmail.com
 ---
  arch/powerpc/mm/tlb_low_64e.S | 10 +-
  1 file changed, 5 insertions(+), 5 deletions(-)
 
 diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
 index 765b419883f2..e4185581c5a7 100644
 --- a/arch/powerpc/mm/tlb_low_64e.S
 +++ b/arch/powerpc/mm/tlb_low_64e.S
 @@ -308,11 +308,11 @@ BEGIN_FTR_SECTION   /* CPU_FTR_SMT */
*
* MAS6:IND should be already set based on MAS4
*/
 -1:   lbarx   r15,0,r11
   lhz r10,PACAPACAINDEX(r13)
 - cmpdi   r15,0
 - cmpdi   cr1,r15,1   /* set cr1.eq = 0 for non-recursive */
   addir10,r10,1
 + crclr   cr1*4+eq/* set cr1.eq = 0 for non-recursive */
 +1:   lbarx   r15,0,r11
 + cmpdi   r15,0
   bne 2f

You're optimizing the contended case at the expense of introducing stalls in 
the uncontended case.  Does it really matter if there are more instructions 
in the loop?  This change just means that you'll spin in the loop for more 
iterations (if it even does that -- I think the cycles per loop iteration 
might be the same before and after, due to load latency and pairing) while 
waiting for the other thread to release the lock.

Do you have any benchmark results for this patch?

-Scott

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: RFC: prepare for struct scatterlist entries without page backing

2015-08-13 Thread Julian Calaby
Hi Christoph,

On Fri, Aug 14, 2015 at 12:35 AM, Christoph Hellwig h...@lst.de wrote:
 On Thu, Aug 13, 2015 at 09:37:37AM +1000, Julian Calaby wrote:
 I.e. ~90% of this patch set seems to be just mechanically dropping
 BUG_ON()s and converting open coded stuff to use accessor functions
 (which should be macros or get inlined, right?) - and the remaining
 bit is not flushing if we don't have a physical page somewhere.

 Which is was 90%.  By lines changed most actually is the diffs for
 the cache flushing.

I was talking in terms of changes made, not lines changed: by my
recollection, about a third of the patches didn't touch flush calls
and most of the lines changed looked like refactoring so that making
the flush call conditional would be easier.

I guess it smelled like you were doing lots of distinct changes in a
single patch and I got my numbers wrong.

 Would it make sense to split this patch set into a few bits: one to
 drop all the useless BUG_ON()s, one to convert all the open coded
 stuff to accessor functions, then another to do the actual page-less
 sg stuff?

 Without the ifs the BUG_ON() actually are useful to assert we
 never feed the sort of physical addresses we can't otherwise support,
 so I don't think that part is doable.

My point is that there's a couple of patches that only remove
BUG_ON()s, which implies that for that particular driver it doesn't
matter if there's a physical page or not, so therefore that code is
purely documentation.

Thanks,

-- 
Julian Calaby

Email: julian.cal...@gmail.com
Profile: http://www.google.com/profiles/julian.calaby/
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 7/7] pmem, dax: have direct_access use __pmem annotation

2015-08-13 Thread Dan Williams
On Thu, Aug 13, 2015 at 9:51 AM, Ross Zwisler
ross.zwis...@linux.intel.com wrote:
 Update the annotation for the kaddr pointer returned by direct_access()
 so that it is a __pmem pointer.  This is consistent with the PMEM driver
 and with how this direct_access() pointer is used in the DAX code.

 Signed-off-by: Ross Zwisler ross.zwis...@linux.intel.com
 ---
  Documentation/filesystems/Locking |  3 ++-
  arch/powerpc/sysdev/axonram.c |  7 ---
  drivers/block/brd.c   |  4 ++--
  drivers/nvdimm/pmem.c |  4 ++--
  drivers/s390/block/dcssblk.c  | 10 +
  fs/block_dev.c|  2 +-
  fs/dax.c  | 44 
 +--
  include/linux/blkdev.h|  8 +++
  8 files changed, 45 insertions(+), 37 deletions(-)

 diff --git a/Documentation/filesystems/Locking 
 b/Documentation/filesystems/Locking
 index 6a34a0f..06d4434 100644
 --- a/Documentation/filesystems/Locking
 +++ b/Documentation/filesystems/Locking
 @@ -397,7 +397,8 @@ prototypes:
 int (*release) (struct gendisk *, fmode_t);
 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned 
 long);
 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, 
 unsigned long);
 -   int (*direct_access) (struct block_device *, sector_t, void **, 
 unsigned long *);
 +   int (*direct_access) (struct block_device *, sector_t, void __pmem **,
 +   unsigned long *);

So this collides with the __pfn_t work.  I think the we have a
reasonable chance of getting that in to 4.3, so I'd wait to see if we
hit any major roadblocks with that set [1] before merging these.

[1]: https://lists.01.org/pipermail/linux-nvdimm/2015-August/001803.html
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 4/6] powerpc/powernv: replace the hard coded boundary with gate

2015-08-13 Thread Gavin Shan
On Thu, Aug 13, 2015 at 10:11:09PM +0800, Wei Yang wrote:
At the moment 64bit-prefetchable window can be maximum 64GB, which is
currently got from device tree. This means that in shared mode the maximum
supported VF BAR size is 64GB/256=256MB. While this size could exhaust the
whole 64bit-prefetchable window. This is a design decision to set a
boundary to 64MB of the VF BAR size. Since VF BAR size with 64MB would
occupy a quarter of the 64bit-prefetchable window, this is affordable.

This patch replaces magic limit of 64MB with (m64_segsize  1) and adds
comment to explain the reason for it.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com

Reviewed-by: Gavin Shan gws...@linux.vent.ibm.com

---
 arch/powerpc/platforms/powernv/pci-ioda.c |   22 +-
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 4da0f50..3e8c0b4 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2688,7 +2688,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
   struct pnv_phb *phb;
   struct resource *res;
   int i;
-  resource_size_t size;
+  resource_size_t size, gate;
   struct pci_dn *pdn;
   int mul, total_vfs;

@@ -2704,6 +2704,17 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)

   total_vfs = pci_sriov_get_totalvfs(pdev);
   mul = phb-ioda.total_pe;
+  /*
+   * If bigger than or equal to half of M64 segment size, just round up
+   * power of two.
+   *
+   * Generally, one M64 BAR maps one IOV BAR. To avoid conflict with
+   * other devices, IOV BAR size is expanded to be (total_pe *
+   * VF_BAR_size).  When VF_BAR_size is half of M64 segment size , the
+   * expanded size would equal to half of the whole M64 Space size,
+   * which will exhaust the M64 Space and limit the system flexibility.
+   */

s/M64 Space/M64 space

+  gate = phb-ioda.m64_segsize  1;

   for (i = 0; i  PCI_SRIOV_NUM_BARS; i++) {
   res = pdev-resource[i + PCI_IOV_RESOURCES];
@@ -2718,10 +2729,11 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)

   size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);

-  /* bigger than 64M */
-  if (size  (1  26)) {
-  dev_info(pdev-dev, PowerNV: VF BAR%d: %pR IOV size 
is bigger than 64M, roundup power2\n,
-   i, res);
+  /* bigger than or equal to gate */
+  if (size = gate) {
+  dev_info(pdev-dev, PowerNV: VF BAR%d: %pR IOV size 
+  is bigger than %lld, roundup power2\n,
+   i, res, gate);
   mul = roundup_pow_of_two(total_vfs);
   pdn-m64_single_mode = true;
   break;
-- 
1.7.9.5


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 5/6] powerpc/powernv: boundary the total VF BAR size instead of the individual one

2015-08-13 Thread Gavin Shan
On Thu, Aug 13, 2015 at 10:11:10PM +0800, Wei Yang wrote:
Each VF could have 6 BARs at most. When the total BAR size exceeds the
gate, after expanding it will also exhaust the M64 Window.

This patch limits the boundary by checking the total VF BAR size instead of
the individual BAR.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com

Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com

---
 arch/powerpc/platforms/powernv/pci-ioda.c |   13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3e8c0b4..1e6ac86 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2688,7 +2688,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
   struct pnv_phb *phb;
   struct resource *res;
   int i;
-  resource_size_t size, gate;
+  resource_size_t size, gate, total_vf_bar_sz;
   struct pci_dn *pdn;
   int mul, total_vfs;

@@ -2715,6 +2715,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
* which will exhaust the M64 Space and limit the system flexibility.
*/
   gate = phb-ioda.m64_segsize  1;
+  total_vf_bar_sz = 0;

   for (i = 0; i  PCI_SRIOV_NUM_BARS; i++) {
   res = pdev-resource[i + PCI_IOV_RESOURCES];
@@ -2727,13 +2728,13 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
   return;
   }

-  size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+  total_vf_bar_sz += pci_iov_resource_size(pdev,
+  i + PCI_IOV_RESOURCES);

   /* bigger than or equal to gate */
-  if (size = gate) {
-  dev_info(pdev-dev, PowerNV: VF BAR%d: %pR IOV size 
-  is bigger than %lld, roundup power2\n,
-   i, res, gate);
+  if (total_vf_bar_sz = gate) {
+  dev_info(pdev-dev, PowerNV: VF BAR Total IOV size 
+  is bigger than %lld, roundup power2\n, gate);

dev_info(pdev-dev, PowerNV: Total VF BAR size %lld 
 is bigger than %lld, roundup power2\n,
 total_vf_bar_sz, gate);

   mul = roundup_pow_of_two(total_vfs);
   pdn-m64_single_mode = true;
   break;
-- 
1.7.9.5


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 1/6] powerpc/powernv: don't enable SRIOV when VF BAR has non 64bit-prefetchable BAR

2015-08-13 Thread Gavin Shan
On Thu, Aug 13, 2015 at 10:11:06PM +0800, Wei Yang wrote:
On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
a SRIOV device's IOV BAR is not 64bit-prefetchable, this is not assigned
from 64bit prefetchable window, which means M64 BAR can't work on it.

This patch makes this explicit.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com

Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com

---
 arch/powerpc/platforms/powernv/pci-ioda.c |   25 +
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 5738d31..9ac324e 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, 
int offset)
   if (!res-flags || !res-parent)
   continue;

-  if (!pnv_pci_is_mem_pref_64(res-flags))
-  continue;
-
   /*
* The actual IOV BAR range is determined by the start address
* and the actual size for num_vfs VFs BAR.  This check is to
@@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, 
int offset)
   if (!res-flags || !res-parent)
   continue;

-  if (!pnv_pci_is_mem_pref_64(res-flags))
-  continue;
-
   size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
   res2 = *res;
   res-start += size * offset;
@@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, 
u16 num_vfs)
   if (!res-flags || !res-parent)
   continue;

-  if (!pnv_pci_is_mem_pref_64(res-flags))
-  continue;
-
   for (j = 0; j  vf_groups; j++) {
   do {
   win = 
 find_next_zero_bit(phb-ioda.m64_bar_alloc,
@@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 
num_vfs)
   pdn = pci_get_pdn(pdev);

   if (phb-type == PNV_PHB_IODA2) {
+  if (!pdn-vfs_expanded) {
+  dev_info(pdev-dev, don't support this SRIOV device
+   with non 64bit-prefetchable IOV BAR\n);
+  return -ENOSPC;
+  }
+
   /* Calculate available PE for required VFs */
   mutex_lock(phb-ioda.pe_alloc_mutex);
   pdn-offset = bitmap_find_next_zero_area(
@@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
   if (!res-flags || res-parent)
   continue;
   if (!pnv_pci_is_mem_pref_64(res-flags)) {
-  dev_warn(pdev-dev,  non M64 VF BAR%d: %pR\n,
+  dev_warn(pdev-dev, Don't support SR-IOV with
+   non M64 VF BAR%d: %pR. \n,
i, res);
-  continue;
+  return;
   }

   size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
@@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
   res = pdev-resource[i + PCI_IOV_RESOURCES];
   if (!res-flags || res-parent)
   continue;
-  if (!pnv_pci_is_mem_pref_64(res-flags)) {
-  dev_warn(pdev-dev, Skipping expanding VF BAR%d: 
%pR\n,
-   i, res);
-  continue;
-  }

   dev_dbg(pdev-dev,  Fixing VF BAR%d: %pR to\n, i, res);
   size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
-- 
1.7.9.5


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 3/6] powerpc/powernv: use one M64 BAR in Single PE mode for one VF BAR

2015-08-13 Thread Gavin Shan
On Thu, Aug 13, 2015 at 10:11:08PM +0800, Wei Yang wrote:
In current implementation, when VF BAR is bigger than 64MB, it uses 4 M64
BARs in Single PE mode to cover the number of VFs required to be enabled.
By doing so, several VFs would be in one VF Group and leads to interference
between VFs in the same group.

This patch changes the design by using one M64 BAR in Single PE mode for
one VF BAR. This gives absolute isolation for VFs.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/pci-bridge.h |6 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  163 +++--
 2 files changed, 62 insertions(+), 107 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 712add5..9d33ada 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -187,6 +187,7 @@ static inline int isa_vaddr_is_ioport(void __iomem 
*address)
  */
 struct iommu_table;

+#define MAX_M64_BAR  16

struct pnv_phb::m64_bar_idx is initialized to 15. Another macro is defined here
as 16. Both of them can be used as maximal M64 BAR number. Obviously, they're
duplicated. On the other hand, I don't think it's a good idea to have the static
m64_map because @pdn is created for every PCI devices, including VFs. non-PF
don't m64_map, together other fields like m64_per_iov at all. It's obviously
wasting memory. So it would be allocated dynamically when the PF's pdn is 
created
or in pnv_pci_ioda_fixup_iov_resources().

In long run, it might be reasonable to move all SRIOV related fields in pci_dn
to another data struct (struct pci_iov_dn?) and allocate that dynamically.

   int flags;
 #define PCI_DN_FLAG_IOV_VF0x01
@@ -214,10 +215,9 @@ struct pci_dn {
   u16 vfs_expanded;   /* number of VFs IOV BAR expanded */
   u16 num_vfs;/* number of VFs enabled*/
   int offset; /* PE# for the first VF PE */
-#define M64_PER_IOV 4
-  int m64_per_iov;
+  boolm64_single_mode;/* Use M64 BAR in Single Mode */
 #define IODA_INVALID_M64(-1)
-  int m64_wins[PCI_SRIOV_NUM_BARS][M64_PER_IOV];
+  int  m64_map[PCI_SRIOV_NUM_BARS][MAX_M64_BAR];
 #endif /* CONFIG_PCI_IOV */
 #endif
   struct list_head child_list;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 67b8f72..4da0f50 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1162,15 +1162,14 @@ static int pnv_pci_vf_release_m64(struct pci_dev *pdev)
   pdn = pci_get_pdn(pdev);

   for (i = 0; i  PCI_SRIOV_NUM_BARS; i++)
-  for (j = 0; j  M64_PER_IOV; j++) {
-  if (pdn-m64_wins[i][j] == IODA_INVALID_M64)
+  for (j = 0; j  MAX_M64_BAR; j++) {
+  if (pdn-m64_map[i][j] == IODA_INVALID_M64)
   continue;
   opal_pci_phb_mmio_enable(phb-opal_id,
-  OPAL_M64_WINDOW_TYPE, pdn-m64_wins[i][j], 0);
-  clear_bit(pdn-m64_wins[i][j], 
phb-ioda.m64_bar_alloc);
-  pdn-m64_wins[i][j] = IODA_INVALID_M64;
+  OPAL_M64_WINDOW_TYPE, pdn-m64_map[i][j], 0);
+  clear_bit(pdn-m64_map[i][j], phb-ioda.m64_bar_alloc);
+  pdn-m64_map[i][j] = IODA_INVALID_M64;
   }
-
   return 0;
 }

@@ -1187,8 +1186,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, 
u16 num_vfs)
   inttotal_vfs;
   resource_size_tsize, start;
   intpe_num;
-  intvf_groups;
-  intvf_per_group;
+  intm64_bars;

   bus = pdev-bus;
   hose = pci_bus_to_host(bus);
@@ -1196,26 +1194,23 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, 
u16 num_vfs)
   pdn = pci_get_pdn(pdev);
   total_vfs = pci_sriov_get_totalvfs(pdev);

-  /* Initialize the m64_wins to IODA_INVALID_M64 */
-  for (i = 0; i  PCI_SRIOV_NUM_BARS; i++)
-  for (j = 0; j  M64_PER_IOV; j++)
-  pdn-m64_wins[i][j] = IODA_INVALID_M64;
+  if (pdn-m64_single_mode)
+  m64_bars = num_vfs;
+  else
+  m64_bars = 1;
+
+  /* Initialize the m64_map to IODA_INVALID_M64 */
+  for (i = 0; i  PCI_SRIOV_NUM_BARS ; i++)
+  for (j = 0; j  MAX_M64_BAR; j++)
+  pdn-m64_map[i][j] = IODA_INVALID_M64;

It would be done in pnv_pci_ioda_fixup_iov_resources(). That means it will
be done for once if hotplug isn't considered. The code here will be called
on every attempt to enable SRIOV capability, which isn't necessary, right?


-  if (pdn-m64_per_iov == M64_PER_IOV) {
-  vf_groups = (num_vfs = M64_PER_IOV) ? num_vfs: 

Re: [PATCH 09/10] Define PERF_PMU_TXN_READ interface

2015-08-13 Thread Sukadev Bhattiprolu
Peter Zijlstra [pet...@infradead.org] wrote:
| On Tue, Aug 11, 2015 at 09:14:00PM -0700, Sukadev Bhattiprolu wrote:
|  | +static void __perf_read_group_add(struct perf_event *leader, u64 
read_format, u64 *values)
|  |  {
|  | + struct perf_event *sub;
|  | + int n = 1; /* skip @nr */
|  
|  This n = 1 is to skip over the values[0] = 1 + nr_siblings in the
|  caller.
|  
|  Anyway, in __perf_read_group_add() we always start with n = 1, however
|  ...
|  | 
|  | + perf_event_read(leader, true);
|  | +
|  | + /*
|  | +  * Since we co-schedule groups, {enabled,running} times of siblings
|  | +  * will be identical to those of the leader, so we only publish one
|  | +  * set.
|  | +  */
|  | + if (read_format  PERF_FORMAT_TOTAL_TIME_ENABLED) {
|  | + values[n++] += leader-total_time_enabled +
|  | + atomic64_read(leader-child_total_time_enabled);
| 
| Note how this is an in-place addition,

Ah, yes, Sorry I missed that. It make sense now and my tests seem to
be running fine.

| 
|  | + }
|  | 
|  | + if (read_format  PERF_FORMAT_TOTAL_TIME_RUNNING) {
|  | + values[n++] += leader-total_time_running +
|  | + atomic64_read(leader-child_total_time_running);
| 
| and here,
| 
|  | + }
|  | 
|  | + /*
|  | +  * Write {count,id} tuples for every sibling.
|  | +  */
|  | + values[n++] += perf_event_count(leader);
| 
| and here,
| 
| 
|  |   if (read_format  PERF_FORMAT_ID)
|  |   values[n++] = primary_event_id(leader);
| 
| and this will always assign the same value.
| 
|  | + list_for_each_entry(sub, leader-sibling_list, group_entry) {
|  | + values[n++] += perf_event_count(sub);
|  | + if (read_format  PERF_FORMAT_ID)
|  | + values[n++] = primary_event_id(sub);
| 
| Same for these, therefore,
| 
|  | + }
|  | +}
|  | 
|  | +static int perf_read_group(struct perf_event *event,
|  | +u64 read_format, char __user *buf)
|  | +{
|  | + struct perf_event *leader = event-group_leader, *child;
|  | + struct perf_event_context *ctx = leader-ctx;
|  | + int ret = leader-read_size;

One other question, We return leader-read_size but allocate/copy_to_user
the sibling's event-read_size. We consistently use read_format from the
'event' being read, rather than its 'group_leader', so we are ok in terms
of what we copy into values[] for each event in the group.

But, can the leader's read_format (and hence its read_size) differ from
its sibling's read_size? If so, in the current code, we return the event's
read_size but in the new code, we return the leader's read_size.

|  | + u64 *values;
|  | 
|  | + lockdep_assert_held(ctx-mutex);
|  | 
|  | + values = kzalloc(event-read_size);
|  | + if (!values)
|  | + return -ENOMEM;
|  | 
|  | + values[0] = 1 + leader-nr_siblings;
|  | 
|  | + /*
|  | +  * By locking the child_mutex of the leader we effectively
|  | +  * lock the child list of all siblings.. XXX explain how.
|  | +  */
|  | + mutex_lock(leader-child_mutex);
|  | 
|  | + __perf_read_group_add(leader, read_format, values);
|  
|  ... we don't copy_to_user() here,
|  
|  | + list_for_each_entry(child, leader-child_list, child_list)
|  | + __perf_read_group_add(child, read_format, values);
|  
|  so won't we overwrite the values[], if we always start at n = 1
|  in __perf_read_group_add()?
| 
| yes and no, we have to re-iterate the same values for each child as they
| all have the same group, but we add the time and count fields, we do not
| overwrite. The _add() suffix was supposed to be a hint ;-)
| 
|  | + mutex_unlock(leader-child_mutex);
|  | +
|  | + if (copy_to_user(buf, values, event-read_size))
|  | + ret = -EFAULT;
|  | +
|  | + kfree(values);
|  | 
|  |   return ret;
|  |  }
| 
| Where previously we would iterate the group and for each member
| iterate/sum all the child values together before copying the value out,
| we now, because we need to read groups together, need to first iterate
| the child list and sum whole groups.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 09/10] Define PERF_PMU_TXN_READ interface

2015-08-13 Thread Peter Zijlstra
On Thu, Aug 13, 2015 at 01:04:28PM -0700, Sukadev Bhattiprolu wrote:

 |  | +static int perf_read_group(struct perf_event *event,
 |  | +  u64 read_format, char __user *buf)
 |  | +{
 |  | +   struct perf_event *leader = event-group_leader, *child;
 |  | +   struct perf_event_context *ctx = leader-ctx;
 |  | +   int ret = leader-read_size;

 One other question, We return leader-read_size but allocate/copy_to_user
 the sibling's event-read_size. We consistently use read_format from the
 'event' being read, rather than its 'group_leader', so we are ok in terms
 of what we copy into values[] for each event in the group.
 
 But, can the leader's read_format (and hence its read_size) differ from
 its sibling's read_size? If so, in the current code, we return the event's
 read_size but in the new code, we return the leader's read_size.

Hmm, good spotting that. I'm fairly sure I didn't do that on purpose.

I think we should use event-read_size there too and have the lot
consistent. I don't think we require read_format to be uniform across
siblings.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: RFC: Reducing the number of non volatile GPRs in the ppc64 kernel

2015-08-13 Thread Anton Blanchard
Hi,

Here is another instruction trace from a kernel context switch trace.
Quite a lot of register and CR save/restore code.

Regards,
Anton

c02943d8 fsnotify+0x8 mfcrr12
c02943dc fsnotify+0xc std r20,-96(r1)
c02943e0 fsnotify+0x10 std r21,-88(r1)
c02943e4 fsnotify+0x14 rldicl. r9,r4,63,63
c02943e8 fsnotify+0x18 std r22,-80(r1)
c02943ec fsnotify+0x1c mflrr0
c02943f0 fsnotify+0x20 std r24,-64(r1)
c02943f4 fsnotify+0x24 std r25,-56(r1)
c02943f8 fsnotify+0x28 std r26,-48(r1)
c02943fc fsnotify+0x2c std r27,-40(r1)
c0294400 fsnotify+0x30 std r31,-8(r1)
c0294404 fsnotify+0x34 std r15,-136(r1)
c0294408 fsnotify+0x38 stw r12,8(r1)
c029440c fsnotify+0x3c std r16,-128(r1)
c0294410 fsnotify+0x40 mcrfcr4,cr0
c0294414 fsnotify+0x44 std r0,16(r1)
c0294418 fsnotify+0x48 std r17,-120(r1)
c029441c fsnotify+0x4c std r18,-112(r1)
c0294420 fsnotify+0x50 std r19,-104(r1)
c0294424 fsnotify+0x54 std r23,-72(r1)
c0294428 fsnotify+0x58 std r28,-32(r1)
c029442c fsnotify+0x5c std r29,-24(r1)
c0294430 fsnotify+0x60 std r30,-16(r1)
c0294434 fsnotify+0x64 stdur1,-272(r1)
c0294438 fsnotify+0x68 cmpwi   cr7,r6,1
c029443c fsnotify+0x6c rlwinm  r31,r4,4,1,31
c0294440 fsnotify+0x70 li  r9,0
c029 fsnotify+0x74 rotlwi  r31,r31,28
c0294448 fsnotify+0x78 mr  r24,r6
c029444c fsnotify+0x7c mr  r26,r4
c0294450 fsnotify+0x80 mr  r25,r3
c0294454 fsnotify+0x84 mr  r22,r5
c0294458 fsnotify+0x88 mr  r21,r7
c029445c fsnotify+0x8c mr  r20,r8
c0294460 fsnotify+0x90 std r9,120(r1)
c0294464 fsnotify+0x94 std r9,112(r1)
c0294468 fsnotify+0x98 clrldi  r27,r31,32
c029446c fsnotify+0x9c beq cr7,c0294888 fsnotify+0x4b8 
c0294888 fsnotify+0x4b8 ld  r29,0(r5)
c029488c fsnotify+0x4bc addir29,r29,-32
c0294890 fsnotify+0x4c0 beq c0294478 fsnotify+0xa8 
c0294478 fsnotify+0xa8 lwz r9,516(r25)
c029447c fsnotify+0xac and r10,r9,r31
c0294480 fsnotify+0xb0 cmpwi   r10,0
c0294484 fsnotify+0xb4 bne c02945d0 fsnotify+0x200 
c0294488 fsnotify+0xb8 cmpdi   cr7,r29,0
c029448c fsnotify+0xbc beq cr7,c02948c4 fsnotify+0x4f4 
c0294490 fsnotify+0xc0 lwz r9,264(r29)
c0294494 fsnotify+0xc4 and r10,r9,r31
c0294498 fsnotify+0xc8 cmpwi   r10,0
c029449c fsnotify+0xcc beq c02948c4 fsnotify+0x4f4 
c02948c4 fsnotify+0x4f4 li  r3,0
c02948c8 fsnotify+0x4f8 b   c02947cc fsnotify+0x3fc 
c02947cc fsnotify+0x3fc addir1,r1,272
c02947d0 fsnotify+0x400 ld  r0,16(r1)
c02947d4 fsnotify+0x404 lwz r12,8(r1)
c02947d8 fsnotify+0x408 ld  r15,-136(r1)
c02947dc fsnotify+0x40c ld  r16,-128(r1)
c02947e0 fsnotify+0x410 mtlrr0
c02947e4 fsnotify+0x414 ld  r17,-120(r1)
c02947e8 fsnotify+0x418 ld  r18,-112(r1)
c02947ec fsnotify+0x41c mtocrf  32,r12
c02947f0 fsnotify+0x420 mtocrf  16,r12
c02947f4 fsnotify+0x424 mtocrf  8,r12
c02947f8 fsnotify+0x428 ld  r19,-104(r1)
c02947fc fsnotify+0x42c ld  r20,-96(r1)
c0294800 fsnotify+0x430 ld  r21,-88(r1)
c0294804 fsnotify+0x434 ld  r22,-80(r1)
c0294808 fsnotify+0x438 ld  r23,-72(r1)
c029480c fsnotify+0x43c ld  r24,-64(r1)
c0294810 fsnotify+0x440 ld  r25,-56(r1)
c0294814 fsnotify+0x444 ld  r26,-48(r1)
c0294818 fsnotify+0x448 ld  r27,-40(r1)
c029481c fsnotify+0x44c ld  r28,-32(r1)
c0294820 fsnotify+0x450 ld  r29,-24(r1)
c0294824 fsnotify+0x454 ld  r30,-16(r1)
c0294828 fsnotify+0x458 ld  r31,-8(r1)
c029482c fsnotify+0x45c blr
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 7/7] pmem, dax: have direct_access use __pmem annotation

2015-08-13 Thread Ross Zwisler
Update the annotation for the kaddr pointer returned by direct_access()
so that it is a __pmem pointer.  This is consistent with the PMEM driver
and with how this direct_access() pointer is used in the DAX code.

Signed-off-by: Ross Zwisler ross.zwis...@linux.intel.com
---
 Documentation/filesystems/Locking |  3 ++-
 arch/powerpc/sysdev/axonram.c |  7 ---
 drivers/block/brd.c   |  4 ++--
 drivers/nvdimm/pmem.c |  4 ++--
 drivers/s390/block/dcssblk.c  | 10 +
 fs/block_dev.c|  2 +-
 fs/dax.c  | 44 +--
 include/linux/blkdev.h|  8 +++
 8 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/Documentation/filesystems/Locking 
b/Documentation/filesystems/Locking
index 6a34a0f..06d4434 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -397,7 +397,8 @@ prototypes:
int (*release) (struct gendisk *, fmode_t);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned 
long);
-   int (*direct_access) (struct block_device *, sector_t, void **, 
unsigned long *);
+   int (*direct_access) (struct block_device *, sector_t, void __pmem **,
+   unsigned long *);
int (*media_changed) (struct gendisk *);
void (*unlock_native_capacity) (struct gendisk *);
int (*revalidate_disk) (struct gendisk *);
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index ee90db1..a2be2a6 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -141,13 +141,14 @@ axon_ram_make_request(struct request_queue *queue, struct 
bio *bio)
  */
 static long
 axon_ram_direct_access(struct block_device *device, sector_t sector,
-  void **kaddr, unsigned long *pfn, long size)
+  void __pmem **kaddr, unsigned long *pfn, long size)
 {
struct axon_ram_bank *bank = device-bd_disk-private_data;
loff_t offset = (loff_t)sector  AXON_RAM_SECTOR_SHIFT;
+   void *addr = (void *)(bank-ph_addr + offset);
 
-   *kaddr = (void *)(bank-ph_addr + offset);
-   *pfn = virt_to_phys(*kaddr)  PAGE_SHIFT;
+   *kaddr = (void __pmem *)addr;
+   *pfn = virt_to_phys(addr)  PAGE_SHIFT;
 
return bank-size - offset;
 }
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 5750b39..2691bb6 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -371,7 +371,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t 
sector,
 
 #ifdef CONFIG_BLK_DEV_RAM_DAX
 static long brd_direct_access(struct block_device *bdev, sector_t sector,
-   void **kaddr, unsigned long *pfn, long size)
+   void __pmem **kaddr, unsigned long *pfn, long size)
 {
struct brd_device *brd = bdev-bd_disk-private_data;
struct page *page;
@@ -381,7 +381,7 @@ static long brd_direct_access(struct block_device *bdev, 
sector_t sector,
page = brd_insert_page(brd, sector);
if (!page)
return -ENOSPC;
-   *kaddr = page_address(page);
+   *kaddr = (void __pmem *)page_address(page);
*pfn = page_to_pfn(page);
 
/*
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index ade9eb9..68f6a6a 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -92,7 +92,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t 
sector,
 }
 
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
- void **kaddr, unsigned long *pfn, long size)
+ void __pmem **kaddr, unsigned long *pfn, long size)
 {
struct pmem_device *pmem = bdev-bd_disk-private_data;
size_t offset = sector  9;
@@ -101,7 +101,7 @@ static long pmem_direct_access(struct block_device *bdev, 
sector_t sector,
return -ENODEV;
 
/* FIXME convert DAX to comprehend that this mapping has a lifetime */
-   *kaddr = (void __force *) pmem-virt_addr + offset;
+   *kaddr = pmem-virt_addr + offset;
*pfn = (pmem-phys_addr + offset)  PAGE_SHIFT;
 
return pmem-size - offset;
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index da21281..2c5a397 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -29,7 +29,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t 
mode);
 static void dcssblk_release(struct gendisk *disk, fmode_t mode);
 static void dcssblk_make_request(struct request_queue *q, struct bio *bio);
 static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
-void **kaddr, unsigned long *pfn, long size);
+void __pmem **kaddr, unsigned long *pfn, long size);
 
 static char 

[PATCH v2 0/7] dax: I/O path enhancements

2015-08-13 Thread Ross Zwisler
The goal of this series is to enhance the DAX I/O path so that all operations
that store data (I/O writes, zeroing blocks, punching holes, etc.) properly
synchronize the stores to media using the PMEM API.  This ensures that the data
DAX is writing is durable on media before the operation completes.

Patches 1-4 are a few random cleanups.

Changes from v1:
 - Removed patches to PMEM for the read flush _DSM flag.  These are different
   enough that they deserve their own series, and they have a separate baseline
   which is currently moving (Dan's memremap() series).
 - Added clear_pmem() PMEM API to zero DAX memory and flush it in one call.
   (Dave)
 - Open coded flushing in arch_wb_cache_pmem() instead of adding a generic
   clwb_flush_range().  This allowed me to avoid having extra memory barriers
   and instead rely completely on arch_wmb_pmem() for ordering. (Dave)
 - Moved the arch implementation of the PMEM API into it's own arch header
   (Christoph).

Ross Zwisler (7):
  brd: make rd_size static
  pmem, x86: move x86 PMEM API to new pmem.h header
  pmem: remove layer when calling arch_has_wmb_pmem()
  pmem, x86: clean up conditional pmem includes
  pmem: add wb_cache_pmem() and clear_pmem()
  dax: update I/O path to do proper PMEM flushing
  pmem, dax: have direct_access use __pmem annotation

 Documentation/filesystems/Locking |   3 +-
 MAINTAINERS   |   1 +
 arch/powerpc/sysdev/axonram.c |   7 ++-
 arch/x86/include/asm/cacheflush.h |  71 --
 arch/x86/include/asm/pmem.h   | 123 ++
 drivers/block/brd.c   |   6 +-
 drivers/nvdimm/pmem.c |   4 +-
 drivers/s390/block/dcssblk.c  |  10 ++--
 fs/block_dev.c|   2 +-
 fs/dax.c  |  73 ++
 include/linux/blkdev.h|   8 +--
 include/linux/pmem.h  |  66 
 12 files changed, 247 insertions(+), 127 deletions(-)
 create mode 100644 arch/x86/include/asm/pmem.h

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

RE: [PATCH V2] QorIQ/TMU: add thermal management support based on TMU

2015-08-13 Thread Hongtao Jia
Hi Eduardo,

In previous mail I asked questions about including header files in device tree.
Don't bother, I have already figured out the solution.

Another questions is about cpu cooling:
I found out that there is no explicit calling for registering cpu cooling
device in the of-thermal style drivers.

And Samsung did it in cpufreq driver: drivers/cpufreq/exynos-cpufreq.c

Should all the of-thermal driver use the same way?
Or is there any recommendation for registering cpu cooling device?
(I enabled the CONFIG_CPUFREQ_DT and still got no cooling device registered)

Thanks.

---
Best Regards,
Hongtao


 -Original Message-
 From: Linuxppc-dev [mailto:linuxppc-dev-
 bounces+b38951=freescale@lists.ozlabs.org] On Behalf Of Hongtao Jia
 Sent: Friday, August 07, 2015 4:15 PM
 To: Eduardo Valentin
 Cc: Wood Scott-B07421; linuxppc-dev@lists.ozlabs.org; linux-
 p...@vger.kernel.org
 Subject: RE: [PATCH V2] QorIQ/TMU: add thermal management support based
 on TMU
 
 Thanks for your comments.
 Please see my questions inline.
 
 Thanks.
 ---
 Best Regards,
 Hongtao
 
 
  -Original Message-
  From: Eduardo Valentin [mailto:edubez...@gmail.com]
  Sent: Thursday, August 06, 2015 3:43 AM
  To: Jia Hongtao-B38951
  Cc: linux...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; Wood
  Scott-
  B07421
  Subject: Re: [PATCH V2] QorIQ/TMU: add thermal management support
  based on TMU
 
  On Thu, Jul 30, 2015 at 08:13:09AM +, Hongtao Jia wrote:
   - Any specific reason why not using OF thermal?
   - No, actually.
  
   I'd like to use OF thermal after some clarification.
  
   Regarding to cooling-maps. For some cases there should be more
   than one cpus as cooling device and they are independent.
   1. Let's say 4. So we need to provide 4 maps like map0-map3. Right?
 
  That would depend on the amount of sensors you have. Do you have one
  sensor per cpu? if the answer is yes, then you probably want to have
  four different map entries, yes, but one on each thermal zone of each
  cpu temperature sensor. if the answer is no, then you would need to
  have all the maps in the same thermal zone.
 
   2. cooling-max-level may vary depend on switch settings or firmware.
  Is that
  OK if I do not provide cooling-min-level and cooling-max-level
  property?
 
  That is already achievable by using the cooling-device property of a
  cooling map.
 
  Please have a look in the example section of the
  Documentation/devicetree/bindings/thermal/thermal.txt
 
 Yes, I read this file.
 So in my understanding:
 There is no need to provide cooling-min-level and cooling-max-level
 property.
 THERMAL_NO_LIMIT value in cooling device node will indicate the driver to
 automatically parse the min and max state, right?
 
 Talking about THERMAL_NO_LIMIT, I need to #include dt-
 bindings/thermal/thermal.h to provide the definition. But I got
 compiling error when build dtb file.
 I did some research and using make t1040qds.dtb in order to involve
 preprocessor.
 But with simply adding #include dt-bindings/thermal/thermal.h to
 t1040si-post.dtsi at line 35 I still got error like this:
 Error: arch/powerpc/boot/dts/fsl/t1040si-post.dtsi:35.1-9 syntax error
 FATAL ERROR: Unable to parse input tree
 
 Could you help me out here.
 Thanks.
 
 
  Let me know if you need further clarification.
 
 
  BR,
 
  Eduardo Valentin
 
  
   Thanks.
   -Hongtao
  
  
-Original Message-
From: Eduardo Valentin [mailto:edubez...@gmail.com]
Sent: Thursday, July 30, 2015 2:56 PM
To: Jia Hongtao-B38951
Cc: linux...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; Wood
Scott-
B07421
Subject: Re: [PATCH V2] QorIQ/TMU: add thermal management support
based on TMU
   
On Wed, Jul 29, 2015 at 02:19:39PM +0800, Jia Hongtao wrote:
 It supports one critical trip point and one passive trip point.
 The cpufreq is used as the cooling device to throttle CPUs when
 the passive trip is crossed.

 Signed-off-by: Jia Hongtao hongtao@freescale.com
 ---
 This patch based on:
 http://patchwork.ozlabs.org/patch/482987/

 Changes for V2:
 * Add tmu-range parse.
 * Use default trend hook.
 * Using latest thermal_zone_bind_cooling_device API.
 * Add calibration check during initialization.
 * Disable/enalbe device when suspend/resume.

  drivers/thermal/Kconfig |  11 ++
  drivers/thermal/Makefile|   1 +
  drivers/thermal/qoriq_thermal.c | 406
 
  3 files changed, 418 insertions(+)  create mode 100644
 drivers/thermal/qoriq_thermal.c

 diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
 index
 118938e..a200745 100644
 --- a/drivers/thermal/Kconfig
 +++ b/drivers/thermal/Kconfig
 @@ -180,6 +180,17 @@ config IMX_THERMAL
 cpufreq is used as the cooling device to throttle CPUs when
  the
 passive trip is crossed.

 

Re: [PATCH v3 2/6] powerpc/powernv: simplify the calculation of iov resource alignment

2015-08-13 Thread Wei Yang
On Fri, Aug 14, 2015 at 11:04:58AM +1000, Gavin Shan wrote:
On Thu, Aug 13, 2015 at 10:11:07PM +0800, Wei Yang wrote:
The alignment of IOV BAR on PowerNV platform is the total size of the IOV
BAR. No matter whether the IOV BAR is extended with number of
roundup_pow_of_two(total_vfs) or number of max PE number (256), the total
size could be calculated by (vfs_expanded * VF_BAR_size).

This patch simplifies the pnv_pci_iov_resource_alignment() by removing the
first case.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9ac324e..67b8f72 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2987,12 +2987,16 @@ static resource_size_t 
pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
int resno)
 {
  struct pci_dn *pdn = pci_get_pdn(pdev);
- resource_size_t align, iov_align;
-
- iov_align = resource_size(pdev-resource[resno]);
- if (iov_align)
- return iov_align;
+ resource_size_t align;

+ /*
+  * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
+  * SR-IOV. While from hardware perspective, the range mapped by M64
+  * BAR should be size aligned.
+  *
+  * This function return the total IOV BAR size if expanded or just the
+  * individual size if not.
+  */

s/return/returns


Thanks :-)

  align = pci_iov_resource_size(pdev, resno);
  if (pdn-vfs_expanded)
  return pdn-vfs_expanded * align;
-- 
1.7.9.5


-- 
Richard Yang
Help you, Help me

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 6/6] powerpc/powernv: allocate sparse PE# when using M64 BAR in Single PE mode

2015-08-13 Thread Wei Yang
On Fri, Aug 14, 2015 at 11:03:00AM +1000, Gavin Shan wrote:
On Thu, Aug 13, 2015 at 10:11:11PM +0800, Wei Yang wrote:
When M64 BAR is set to Single PE mode, the PE# assigned to VF could be
sparse.

This patch restructures the patch to allocate sparse PE# for VFs when M64
BAR is set to Single PE mode.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/pci-bridge.h |2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |   59 
 +++--
 2 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 9d33ada..b026ef8 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -214,7 +214,7 @@ struct pci_dn {
 #ifdef CONFIG_PCI_IOV
  u16 vfs_expanded;   /* number of VFs IOV BAR expanded */
  u16 num_vfs;/* number of VFs enabled*/
- int offset; /* PE# for the first VF PE */
+ int pe_num_map[MAX_M64_BAR];/* PE# for the first VF PE or array */

Same question as to m64_map. pdn for non-PF doesn't need it.


The same, I prefer the dynamic version.

  boolm64_single_mode;/* Use M64 BAR in Single Mode */
 #define IODA_INVALID_M64(-1)
  int  m64_map[PCI_SRIOV_NUM_BARS][MAX_M64_BAR];
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 1e6ac86..7633538 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1232,7 +1232,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, 
u16 num_vfs)

  /* Map the M64 here */
  if (pdn-m64_single_mode) {
- pe_num = pdn-offset + j;
+ pe_num = pdn-pe_num_map[j];
  rc = opal_pci_map_pe_mmio_window(phb-opal_id,
  pe_num, OPAL_M64_WINDOW_TYPE,
  pdn-m64_map[i][j], 0);
@@ -1336,7 +1336,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
  struct pnv_phb*phb;
  struct pci_dn *pdn;
  struct pci_sriov  *iov;
- u16 num_vfs;
+ u16 num_vfs, i;

  bus = pdev-bus;
  hose = pci_bus_to_host(bus);
@@ -1350,14 +1350,17 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)

  if (phb-type == PNV_PHB_IODA2) {
  if (!pdn-m64_single_mode)
- pnv_pci_vf_resource_shift(pdev, -pdn-offset);
+ pnv_pci_vf_resource_shift(pdev, -pdn-pe_num_map[0]);

  /* Release M64 windows */
  pnv_pci_vf_release_m64(pdev);

  /* Release PE numbers */
- bitmap_clear(phb-ioda.pe_alloc, pdn-offset, num_vfs);
- pdn-offset = 0;
+ if (pdn-m64_single_mode) {
+ for (i = 0; i  num_vfs; i++)
+ pnv_ioda_free_pe(phb, pdn-pe_num_map[i]);
+ } else
+ bitmap_clear(phb-ioda.pe_alloc, pdn-pe_num_map[0], 
num_vfs);
  }
 }

@@ -1383,7 +1386,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, 
u16 num_vfs)

  /* Reserve PE for each VF */
  for (vf_index = 0; vf_index  num_vfs; vf_index++) {
- pe_num = pdn-offset + vf_index;
+ if (pdn-m64_single_mode)
+ pe_num = pdn-pe_num_map[vf_index];
+ else
+ pe_num = pdn-pe_num_map[0] + vf_index;

  pe = phb-ioda.pe_array[pe_num];
  pe-pe_number = pe_num;
@@ -1425,6 +1431,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 
num_vfs)
  struct pnv_phb*phb;
  struct pci_dn *pdn;
  intret;
+ u16i;

  bus = pdev-bus;
  hose = pci_bus_to_host(bus);
@@ -1448,19 +1455,30 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 
num_vfs)
  }

  /* Calculate available PE for required VFs */
- mutex_lock(phb-ioda.pe_alloc_mutex);
- pdn-offset = bitmap_find_next_zero_area(
- phb-ioda.pe_alloc, phb-ioda.total_pe,
- 0, num_vfs, 0);
- if (pdn-offset = phb-ioda.total_pe) {
+ if (pdn-m64_single_mode) {
+ for (i = 0; i  num_vfs; i++)
+ pdn-pe_num_map[i] = IODA_INVALID_PE;
+ for (i = 0; i  num_vfs; i++) {
+ pdn-pe_num_map[i] = pnv_ioda_alloc_pe(phb);
+ if (pdn-pe_num_map[i] == IODA_INVALID_PE) {
+ ret = -EBUSY;
+ goto m64_failed;
+ }
+ }
+ } else {
+ mutex_lock(phb-ioda.pe_alloc_mutex);
+   

Re: [PATCH] book3s_hv_rmhandlers:Pass the correct trap argument to kvmhv_commence_exit

2015-08-13 Thread Sam Bobroff
On Thu, May 21, 2015 at 01:57:04PM +0530, Gautham R. Shenoy wrote:
 In guest_exit_cont we call kvmhv_commence_exit which expects the trap
 number as the argument. However r3 doesn't contain the trap number at
 this point and as a result we would be calling the function with a
 spurious trap number.
 
 Fix this by copying r12 into r3 before calling kvmhv_commence_exit as
 r12 contains the trap number
 
 Signed-off-by: Gautham R. Shenoy e...@linux.vnet.ibm.com

Hi Gautham,

I agree with your logic: r3 is quite clearly corrupted in that path. So:

Reviewed-by: Sam Bobroff sam.bobr...@au1.ibm.com

Just one comment: Do you have a case of this causing some visible problem due
to the corrupted trap number? (I'll test the patch if you do.)

Cheers,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [V3] powerpc/irq: Enable some more exceptions in /proc/interrupts interface

2015-08-13 Thread Michael Ellerman
On Thu, 2015-08-06 at 18:54 +0530, Anshuman Khandual wrote:
 On 08/04/2015 03:27 PM, Michael Ellerman wrote:
  On Mon, 2015-13-07 at 08:16:06 UTC, Anshuman Khandual wrote:
  This patch enables facility unavailable exceptions for generic facility,
  FPU, ALTIVEC and VSX in /proc/interrupts listing by incrementing their
  newly added IRQ statistical counters as and when these exceptions happen.
  This also adds couple of helper functions which will be called from within
  the interrupt handler context to update their statistics. Similarly this
  patch also enables alignment and program check exceptions as well.
  
  ...
  
  diff --git a/arch/powerpc/kernel/exceptions-64s.S 
  b/arch/powerpc/kernel/exceptions-64s.S
  index 0a0399c2..a86180c 100644
  --- a/arch/powerpc/kernel/exceptions-64s.S
  +++ b/arch/powerpc/kernel/exceptions-64s.S
  @@ -1158,6 +1158,7 @@ BEGIN_FTR_SECTION
   END_FTR_SECTION_IFSET(CPU_FTR_TM)
   #endif
 bl  load_up_fpu
  +  bl  fpu_unav_exceptions_count
  
  Is it safe to call C code here?
 
 Hmm, is it not ? I had that question but was not really sure. Dont
 understand the difference between 'fast_exception_return' and
 'ret_from_except' completely.

If you're not really sure it's correct, please say so in the change log!

I'd rather you didn't send me patches with possibly subtle bugs in core code.

cheers



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v4 02/11] cxl: Drop commands if the PCI channel is not in normal state

2015-08-13 Thread Ian Munsie
Acked-by: Ian Munsie imun...@au1.ibm.com

Excerpts from Daniel Axtens's message of 2015-08-13 14:11:20 +1000:
 +/* Only warn if we detached while the link was OK.

Only because mpe is sure to pick this up (I personally don't mind) -
block comments should start with /* on a line by itself.

 +/* If the adapter has gone down, we can assume that we
...
 +/* We could be asked to terminate when the hw is down. That
...
 +/* We could be asked to remove when the hw is down. Again, if
...
 +/* If the adapter has gone away, we can't get any meaningful
...
 +/* Config space IO is based on phb-cfg_addr, which is based on

Ditto.

Cheers,
-Ian

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: RFC: Reducing the number of non volatile GPRs in the ppc64 kernel

2015-08-13 Thread Michael Ellerman
On Wed, 2015-08-05 at 14:03 +1000, Anton Blanchard wrote:
 Hi,
 
 While looking at traces of kernel workloads, I noticed places where gcc
 used a large number of non volatiles. Some of these functions
 did very little work, and we spent most of our time saving the
 non volatiles to the stack and reading them back.
 
 It made me wonder if we have the right ratio of volatile to non
 volatile GPRs. Since the kernel is completely self contained, we could
 potentially change that ratio.
 
 Attached is a quick hack to gcc and the kernel to decrease the number
 of non volatile GPRs to 8. I'm not sure if this is a good idea (and if
 the volatile to non volatile ratio is right), but this gives us
 something to play with.

OK, interesting idea. Can't say I'd ever though of that.

I'm thinking we'd want some pretty solid analysis of the resulting code-gen and
real world perf before we made a switch like that.

Presumably it's going to hurt our null syscall, due to the added save/restores,
but hopefully help with paths that do actual work.

If the caller is actually using the non-volatiles then presumably it will be a
wash, because the caller will have to do the save anyway. Though maybe it would
still be a win because the caller can do the saves  restores when it needs to
rather than all in a block.

I'm also not clear on how it would affect folks who build modules separate from
the kernel. We'd have to make sure they had the right GCC, or things would go
badly wrong, unless it can be done with command line flags? I don't know how
much we care about that but distros presumably do.

cheers


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/1] powerpc/xmon: Paged output for paca display

2015-08-13 Thread Sam Bobroff
The paca display is already more than 24 lines, which can be problematic
if you have an old school 80x24 terminal, or more likely you are on a
virtual terminal which does not scroll for whatever reason.

This adds an optional letter to the dp and dpa xmon commands
(dpp and dppa), which will enable a per-page display (with 16
line pages): the first page  will be displayed and if there was data
that didn't fit, it will display a message indicating that the user can
use enter to display the next page. The intent is that this feels
similar to the way the memory display functions work.

This is implemented by running over the entire output both for the
initial command and for each subsequent page: the visible part is
clipped out by checking line numbers. Handling the empty command as
more is done by writing a special command into a static buffer that
indicates where to move the sliding visibility window. This is similar
to the approach used for the memory dump commands except that the
state data is encoded into the last_cmd string, rather than a set of
static variables. The memory dump commands could probably be rewritten
to make use of the same buffer and remove their other static
variables.

Sample output:

0:mon dpp1
paca for cpu 0x1 @ cfdc0480:
 possible = yes
 present  = yes
 online   = yes
 lock_token   = 0x8000  (0x8)
 paca_index   = 0x1 (0xa)
 kernel_toc   = 0xc0eb2400  (0x10)
 kernelbase   = 0xc000  (0x18)
 kernel_msr   = 0xb0001032  (0x20)
 emergency_sp = 0xc0003ffe8000  (0x28)
 mc_emergency_sp  = 0xc0003ffe4000  (0x2e0)
 in_mce   = 0x0 (0x2e8)
 data_offset  = 0x7f17  (0x30)
 hw_cpu_id= 0x8 (0x38)
 cpu_start= 0x1 (0x3a)
 kexec_state  = 0x0 (0x3b)
[Enter for next page]
0:mon
 __current= 0xc0007e696620  (0x290)
 kstack   = 0xc0007e6ebe30  (0x298)
 stab_rr  = 0xb (0x2a0)
 saved_r1 = 0xc0007ef37860  (0x2a8)
 trap_save= 0x0 (0x2b8)
 soft_enabled = 0x0 (0x2ba)
 irq_happened = 0x1 (0x2bb)
 io_sync  = 0x0 (0x2bc)
 irq_work_pending = 0x0 (0x2bd)
 nap_state_lost   = 0x0 (0x2be)
0:mon

(Based on a similar patch by Michael Ellerman m...@ellerman.id.au
[v2] powerpc/xmon: Allow limiting the size of the paca display.
This patch is an alternative and cannot coexist with the original.)

Signed-off-by: Sam Bobroff sam.bobr...@au1.ibm.com
---

 arch/powerpc/xmon/xmon.c | 82 
 1 file changed, 62 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index e599259..9157286 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -72,6 +72,7 @@ static int xmon_gate;
 
 static unsigned long in_xmon __read_mostly = 0;
 
+static char last_cmd_buf[128];
 static unsigned long adrs;
 static int size = 1;
 #define MAX_DUMP (128 * 1024)
@@ -204,8 +205,8 @@ Commands:\n\
   dldump the kernel log buffer\n
 #ifdef CONFIG_PPC64
   \
-  dp[#]dump paca for current cpu, or cpu #\n\
-  dpa  dump paca for all possible cpus\n
+  dp[p][#] dump paca for current cpu, or cpu # (p = paged)\n\
+  dp[p]a   dump paca for all possible cpus (p = paged)\n
 #endif
   \
   dr   dump stream of raw bytes\n\
@@ -2070,7 +2071,17 @@ static void xmon_rawdump (unsigned long adrs, long ndump)
 }
 
 #ifdef CONFIG_PPC64
-static void dump_one_paca(int cpu)
+static bool line_visible(unsigned long start, unsigned long count,
+unsigned long *line) {
+   bool rv = (!count
+   || ((*line = start)  (*line  (start + count;
+
+   (*line)++;
+   return rv;
+}
+
+static void dump_one_paca(int cpu, unsigned long start,
+ unsigned long count, unsigned long *line)
 {
struct paca_struct *p;
 
@@ -2084,15 +2095,22 @@ static void dump_one_paca(int cpu)
 
p = paca[cpu];
 
-   printf(paca for cpu 0x%x @ %p:\n, cpu, p);
+#define VPRINT(...) do { \
+   if (line_visible(start, count, line)) \
+   printf(__VA_ARGS__); \
+} while (0)
+   VPRINT(paca for cpu 0x%x @ %p:\n, cpu, p);
 
-   printf( %-*s = %s\n, 16, possible, cpu_possible(cpu) ? yes : 
no);
-   printf( %-*s = %s\n, 16, present, cpu_present(cpu) ? yes : no);
-   printf( %-*s = %s\n, 16, online, cpu_online(cpu) ? yes : no);
+   VPRINT( %-*s = %s\n, 16, possible, cpu_possible(cpu) ? yes : 
no);
+   VPRINT( %-*s = %s\n, 16, present, cpu_present(cpu) ? yes : no);
+   VPRINT( %-*s = %s\n, 16, online, cpu_online(cpu) ? yes : no);
+#undef VPRINT
 
-#define DUMP(paca, name, format) \
-   printf( %-*s = %#-*format\t(0x%lx)\n, 16, #name, 18, paca-name, \
-   

[PATCH v2 2/2] powerpc/mpc85xx:Add SCFG device tree support of T104x

2015-08-13 Thread Dongsheng Wang
From: Wang Dongsheng dongsheng.w...@freescale.com

Signed-off-by: Wang Dongsheng dongsheng.w...@freescale.com
---
*V2*
No changes.
diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi 
b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
index 9e9f7e2..9770d02 100644
--- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
@@ -484,6 +484,11 @@
reg= 0xea000 0x4000;
};
 
+   scfg: global-utilities@fc000 {
+   compatible = fsl,t1040-scfg;
+   reg = 0xfc000 0x1000;
+   };
+
 /include/ elo3-dma-0.dtsi
 /include/ elo3-dma-1.dtsi
 /include/ qoriq-espi-0.dtsi
-- 
2.1.0.27.g96db324

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 3/6] powerpc/powernv: use one M64 BAR in Single PE mode for one VF BAR

2015-08-13 Thread Wei Yang
On Fri, Aug 14, 2015 at 10:52:21AM +1000, Gavin Shan wrote:
On Thu, Aug 13, 2015 at 10:11:08PM +0800, Wei Yang wrote:
In current implementation, when VF BAR is bigger than 64MB, it uses 4 M64
BARs in Single PE mode to cover the number of VFs required to be enabled.
By doing so, several VFs would be in one VF Group and leads to interference
between VFs in the same group.

This patch changes the design by using one M64 BAR in Single PE mode for
one VF BAR. This gives absolute isolation for VFs.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/pci-bridge.h |6 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  163 
 +++--
 2 files changed, 62 insertions(+), 107 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 712add5..9d33ada 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -187,6 +187,7 @@ static inline int isa_vaddr_is_ioport(void __iomem 
*address)
  */
 struct iommu_table;

+#define MAX_M64_BAR  16

struct pnv_phb::m64_bar_idx is initialized to 15. Another macro is defined here
as 16. Both of them can be used as maximal M64 BAR number. Obviously, they're
duplicated. On the other hand, I don't think it's a good idea to have the 
static
m64_map because @pdn is created for every PCI devices, including VFs. non-PF
don't m64_map, together other fields like m64_per_iov at all. It's 
obviously
wasting memory. So it would be allocated dynamically when the PF's pdn is 
created
or in pnv_pci_ioda_fixup_iov_resources().


I prefer the dynamic one.

Alexey,

I changed to static defined based on your comments. So do you have some
concern on the dynamic version?

In long run, it might be reasonable to move all SRIOV related fields in pci_dn
to another data struct (struct pci_iov_dn?) and allocate that dynamically.

  int flags;
 #define PCI_DN_FLAG_IOV_VF   0x01
@@ -214,10 +215,9 @@ struct pci_dn {
  u16 vfs_expanded;   /* number of VFs IOV BAR expanded */
  u16 num_vfs;/* number of VFs enabled*/
  int offset; /* PE# for the first VF PE */
-#define M64_PER_IOV 4
- int m64_per_iov;
+ boolm64_single_mode;/* Use M64 BAR in Single Mode */
 #define IODA_INVALID_M64(-1)
- int m64_wins[PCI_SRIOV_NUM_BARS][M64_PER_IOV];
+ int  m64_map[PCI_SRIOV_NUM_BARS][MAX_M64_BAR];
 #endif /* CONFIG_PCI_IOV */
 #endif
  struct list_head child_list;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 67b8f72..4da0f50 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1162,15 +1162,14 @@ static int pnv_pci_vf_release_m64(struct pci_dev 
*pdev)
  pdn = pci_get_pdn(pdev);

  for (i = 0; i  PCI_SRIOV_NUM_BARS; i++)
- for (j = 0; j  M64_PER_IOV; j++) {
- if (pdn-m64_wins[i][j] == IODA_INVALID_M64)
+ for (j = 0; j  MAX_M64_BAR; j++) {
+ if (pdn-m64_map[i][j] == IODA_INVALID_M64)
  continue;
  opal_pci_phb_mmio_enable(phb-opal_id,
- OPAL_M64_WINDOW_TYPE, pdn-m64_wins[i][j], 0);
- clear_bit(pdn-m64_wins[i][j], 
phb-ioda.m64_bar_alloc);
- pdn-m64_wins[i][j] = IODA_INVALID_M64;
+ OPAL_M64_WINDOW_TYPE, pdn-m64_map[i][j], 0);
+ clear_bit(pdn-m64_map[i][j], phb-ioda.m64_bar_alloc);
+ pdn-m64_map[i][j] = IODA_INVALID_M64;
  }
-
  return 0;
 }

@@ -1187,8 +1186,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, 
u16 num_vfs)
  inttotal_vfs;
  resource_size_tsize, start;
  intpe_num;
- intvf_groups;
- intvf_per_group;
+ intm64_bars;

  bus = pdev-bus;
  hose = pci_bus_to_host(bus);
@@ -1196,26 +1194,23 @@ static int pnv_pci_vf_assign_m64(struct pci_dev 
*pdev, u16 num_vfs)
  pdn = pci_get_pdn(pdev);
  total_vfs = pci_sriov_get_totalvfs(pdev);

- /* Initialize the m64_wins to IODA_INVALID_M64 */
- for (i = 0; i  PCI_SRIOV_NUM_BARS; i++)
- for (j = 0; j  M64_PER_IOV; j++)
- pdn-m64_wins[i][j] = IODA_INVALID_M64;
+ if (pdn-m64_single_mode)
+ m64_bars = num_vfs;
+ else
+ m64_bars = 1;
+
+ /* Initialize the m64_map to IODA_INVALID_M64 */
+ for (i = 0; i  PCI_SRIOV_NUM_BARS ; i++)
+ for (j = 0; j  MAX_M64_BAR; j++)
+ pdn-m64_map[i][j] = IODA_INVALID_M64;

It would be done in pnv_pci_ioda_fixup_iov_resources(). That means it will
be done for once if hotplug isn't considered. The code here will be called
on every attempt to enable 

Re: [PATCH v2] powerpc/xmon: Allow limiting the size of the paca display

2015-08-13 Thread Sam Bobroff
On Wed, Aug 12, 2015 at 09:55:25PM +1000, Michael Ellerman wrote:
 The paca display is already more than 24 lines, which can be problematic
 if you have an old school 80x24 terminal, or more likely you are on a
 virtual terminal which does not scroll for whatever reason.
 
 We'd like to expand the paca display even more, so add a way to limit
 the number of lines that are displayed.
 
 This adds a third form of 'dp' which is 'dp # #', where the first number
 is the cpu, and the second is the number of lines to display.
 
 Example output:
 
   5:mon dp 3 6
   paca for cpu 0x3 @ cfe00c00:
possible = yes
present  = yes
online   = yes
lock_token   = 0x8000  (0xa)
paca_index   = 0x3 (0x8)

Michael,

This patch inspired me to do the additional work to make the output paged, more
like the memory dump commands.

I'll post it shortly as powerpc/xmon: Paged output for paca display.

Cheers,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 3/3] powerpc/e6500: hw tablewalk: order the memory access when acquire/release tcd lock

2015-08-13 Thread Scott Wood
On Thu, 2015-08-13 at 19:51 +0800, Kevin Hao wrote:
 I didn't find anything unusual. But I think we do need to order the
 load/store of esel_next when acquire/release tcd lock. For acquire,
 add a data dependency to order the loads of lock and esel_next.
 For release, even there already have a isync here, but it doesn't
 guarantee any memory access order. So we still need lwsync for
 the two stores for lock and esel_next.

I was going to say that esel_next is just a hint and it doesn't really matter 
if we occasionally get the wrong value, unless it happens often enough to 
cause more performance degradation than the lwsync causes.  However, with the 
A-008139 workaround we do need to read the same value from esel_next both 
times.  It might be less costly to save/restore an additional register 
instead of lwsync, though.

-Scott

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 29/31] parisc: handle page-less SG entries

2015-08-13 Thread James Bottomley
On Thu, 2015-08-13 at 20:30 -0700, Dan Williams wrote:
 On Thu, Aug 13, 2015 at 7:31 AM, Christoph Hellwig h...@lst.de wrote:
  On Wed, Aug 12, 2015 at 09:01:02AM -0700, Linus Torvalds wrote:
  I'm assuming that anybody who wants to use the page-less
  scatter-gather lists always does so on memory that isn't actually
  virtually mapped at all, or only does so on sane architectures that
  are cache coherent at a physical level, but I'd like that assumption
  *documented* somewhere.
 
  It's temporarily mapped by kmap-like helpers.  That code isn't in
  this series. The most recent version of it is here:
 
  https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git/commit/?h=pfnid=de8237c99fdb4352be2193f3a7610e902b9bb2f0
 
  note that it's not doing the cache flushing it would have to do yet, but
  it's also only enabled for x86 at the moment.
 
 For virtually tagged caches I assume we would temporarily map with
 kmap_atomic_pfn_t(), similar to how drm_clflush_pages() implements
 powerpc support.  However with DAX we could end up with multiple
 virtual aliases for a page-less pfn.

At least on some PA architectures, you have to be very careful.
Improperly managed, multiple aliases will cause the system to crash
(actually a machine check in the cache chequerboard). For the most
temperamental systems, we need the cache line flushed and the alias
mapping ejected from the TLB cache before we access the same page at an
inequivalent alias.

James


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 6/6] powerpc/powernv: allocate sparse PE# when using M64 BAR in Single PE mode

2015-08-13 Thread Gavin Shan
On Thu, Aug 13, 2015 at 10:11:11PM +0800, Wei Yang wrote:
When M64 BAR is set to Single PE mode, the PE# assigned to VF could be
sparse.

This patch restructures the patch to allocate sparse PE# for VFs when M64
BAR is set to Single PE mode.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/pci-bridge.h |2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |   59 +++--
 2 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 9d33ada..b026ef8 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -214,7 +214,7 @@ struct pci_dn {
 #ifdef CONFIG_PCI_IOV
   u16 vfs_expanded;   /* number of VFs IOV BAR expanded */
   u16 num_vfs;/* number of VFs enabled*/
-  int offset; /* PE# for the first VF PE */
+  int pe_num_map[MAX_M64_BAR];/* PE# for the first VF PE or array */

Same question as to m64_map. pdn for non-PF doesn't need it.

   boolm64_single_mode;/* Use M64 BAR in Single Mode */
 #define IODA_INVALID_M64(-1)
   int  m64_map[PCI_SRIOV_NUM_BARS][MAX_M64_BAR];
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 1e6ac86..7633538 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1232,7 +1232,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, 
u16 num_vfs)

   /* Map the M64 here */
   if (pdn-m64_single_mode) {
-  pe_num = pdn-offset + j;
+  pe_num = pdn-pe_num_map[j];
   rc = opal_pci_map_pe_mmio_window(phb-opal_id,
   pe_num, OPAL_M64_WINDOW_TYPE,
   pdn-m64_map[i][j], 0);
@@ -1336,7 +1336,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
   struct pnv_phb*phb;
   struct pci_dn *pdn;
   struct pci_sriov  *iov;
-  u16 num_vfs;
+  u16 num_vfs, i;

   bus = pdev-bus;
   hose = pci_bus_to_host(bus);
@@ -1350,14 +1350,17 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)

   if (phb-type == PNV_PHB_IODA2) {
   if (!pdn-m64_single_mode)
-  pnv_pci_vf_resource_shift(pdev, -pdn-offset);
+  pnv_pci_vf_resource_shift(pdev, -pdn-pe_num_map[0]);

   /* Release M64 windows */
   pnv_pci_vf_release_m64(pdev);

   /* Release PE numbers */
-  bitmap_clear(phb-ioda.pe_alloc, pdn-offset, num_vfs);
-  pdn-offset = 0;
+  if (pdn-m64_single_mode) {
+  for (i = 0; i  num_vfs; i++)
+  pnv_ioda_free_pe(phb, pdn-pe_num_map[i]);
+  } else
+  bitmap_clear(phb-ioda.pe_alloc, pdn-pe_num_map[0], 
num_vfs);
   }
 }

@@ -1383,7 +1386,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, 
u16 num_vfs)

   /* Reserve PE for each VF */
   for (vf_index = 0; vf_index  num_vfs; vf_index++) {
-  pe_num = pdn-offset + vf_index;
+  if (pdn-m64_single_mode)
+  pe_num = pdn-pe_num_map[vf_index];
+  else
+  pe_num = pdn-pe_num_map[0] + vf_index;

   pe = phb-ioda.pe_array[pe_num];
   pe-pe_number = pe_num;
@@ -1425,6 +1431,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 
num_vfs)
   struct pnv_phb*phb;
   struct pci_dn *pdn;
   intret;
+  u16i;

   bus = pdev-bus;
   hose = pci_bus_to_host(bus);
@@ -1448,19 +1455,30 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 
num_vfs)
   }

   /* Calculate available PE for required VFs */
-  mutex_lock(phb-ioda.pe_alloc_mutex);
-  pdn-offset = bitmap_find_next_zero_area(
-  phb-ioda.pe_alloc, phb-ioda.total_pe,
-  0, num_vfs, 0);
-  if (pdn-offset = phb-ioda.total_pe) {
+  if (pdn-m64_single_mode) {
+  for (i = 0; i  num_vfs; i++)
+  pdn-pe_num_map[i] = IODA_INVALID_PE;
+  for (i = 0; i  num_vfs; i++) {
+  pdn-pe_num_map[i] = pnv_ioda_alloc_pe(phb);
+  if (pdn-pe_num_map[i] == IODA_INVALID_PE) {
+  ret = -EBUSY;
+  goto m64_failed;
+  }
+  }
+  } else {
+  mutex_lock(phb-ioda.pe_alloc_mutex);
+  pdn-pe_num_map[0] = 

Re: [PATCH v3 2/6] powerpc/powernv: simplify the calculation of iov resource alignment

2015-08-13 Thread Gavin Shan
On Thu, Aug 13, 2015 at 10:11:07PM +0800, Wei Yang wrote:
The alignment of IOV BAR on PowerNV platform is the total size of the IOV
BAR. No matter whether the IOV BAR is extended with number of
roundup_pow_of_two(total_vfs) or number of max PE number (256), the total
size could be calculated by (vfs_expanded * VF_BAR_size).

This patch simplifies the pnv_pci_iov_resource_alignment() by removing the
first case.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9ac324e..67b8f72 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2987,12 +2987,16 @@ static resource_size_t 
pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
 int resno)
 {
   struct pci_dn *pdn = pci_get_pdn(pdev);
-  resource_size_t align, iov_align;
-
-  iov_align = resource_size(pdev-resource[resno]);
-  if (iov_align)
-  return iov_align;
+  resource_size_t align;

+  /*
+   * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
+   * SR-IOV. While from hardware perspective, the range mapped by M64
+   * BAR should be size aligned.
+   *
+   * This function return the total IOV BAR size if expanded or just the
+   * individual size if not.
+   */

s/return/returns

   align = pci_iov_resource_size(pdev, resno);
   if (pdn-vfs_expanded)
   return pdn-vfs_expanded * align;
-- 
1.7.9.5


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] kvm:powerpc:Fix return statements for wrapper functions in the file book3s_64_mmu_hv.c

2015-08-13 Thread Michael Ellerman
On Wed, 2015-08-12 at 21:06 +0200, Alexander Graf wrote:
 
 On 10.08.15 17:27, Nicholas Krause wrote:
  This fixes the wrapper functions kvm_umap_hva_hv and the function
  kvm_unmap_hav_range_hv to return the return value of the function
  kvm_handle_hva or kvm_handle_hva_range that they are wrapped to
  call internally rather then always making the caller of these
  wrapper functions think they always run successfully by returning
  the value of zero directly.
  
  Signed-off-by: Nicholas Krause xerofo...@gmail.com
 
 Paul, could you please take on this one?

Paul's away for a while can you take it directly?

cheers


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 29/31] parisc: handle page-less SG entries

2015-08-13 Thread Dan Williams
On Thu, Aug 13, 2015 at 7:31 AM, Christoph Hellwig h...@lst.de wrote:
 On Wed, Aug 12, 2015 at 09:01:02AM -0700, Linus Torvalds wrote:
 I'm assuming that anybody who wants to use the page-less
 scatter-gather lists always does so on memory that isn't actually
 virtually mapped at all, or only does so on sane architectures that
 are cache coherent at a physical level, but I'd like that assumption
 *documented* somewhere.

 It's temporarily mapped by kmap-like helpers.  That code isn't in
 this series. The most recent version of it is here:

 https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git/commit/?h=pfnid=de8237c99fdb4352be2193f3a7610e902b9bb2f0

 note that it's not doing the cache flushing it would have to do yet, but
 it's also only enabled for x86 at the moment.

For virtually tagged caches I assume we would temporarily map with
kmap_atomic_pfn_t(), similar to how drm_clflush_pages() implements
powerpc support.  However with DAX we could end up with multiple
virtual aliases for a page-less pfn.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 29/31] parisc: handle page-less SG entries

2015-08-13 Thread David Miller
From: James Bottomley james.bottom...@hansenpartnership.com
Date: Thu, 13 Aug 2015 20:59:20 -0700

 On Thu, 2015-08-13 at 20:30 -0700, Dan Williams wrote:
 On Thu, Aug 13, 2015 at 7:31 AM, Christoph Hellwig h...@lst.de wrote:
  On Wed, Aug 12, 2015 at 09:01:02AM -0700, Linus Torvalds wrote:
  I'm assuming that anybody who wants to use the page-less
  scatter-gather lists always does so on memory that isn't actually
  virtually mapped at all, or only does so on sane architectures that
  are cache coherent at a physical level, but I'd like that assumption
  *documented* somewhere.
 
  It's temporarily mapped by kmap-like helpers.  That code isn't in
  this series. The most recent version of it is here:
 
  https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git/commit/?h=pfnid=de8237c99fdb4352be2193f3a7610e902b9bb2f0
 
  note that it's not doing the cache flushing it would have to do yet, but
  it's also only enabled for x86 at the moment.
 
 For virtually tagged caches I assume we would temporarily map with
 kmap_atomic_pfn_t(), similar to how drm_clflush_pages() implements
 powerpc support.  However with DAX we could end up with multiple
 virtual aliases for a page-less pfn.
 
 At least on some PA architectures, you have to be very careful.
 Improperly managed, multiple aliases will cause the system to crash
 (actually a machine check in the cache chequerboard). For the most
 temperamental systems, we need the cache line flushed and the alias
 mapping ejected from the TLB cache before we access the same page at an
 inequivalent alias.

Also, I want to mention that on sparc64 we manage the cache aliasing
state in the page struct.

Until a page is mapped into userspace, we just record the most recent
cpu to store into that page with kernel side mappings.  Once the page
ends up being mapped or the cpu doing kernel side stores changes, we
actually perform the cache flush.

Generally speaking, I think that all actual physical memory the kernel
operates on should have a struct page backing it.  So this whole
discussion of operating on physical memory in scatter lists without
backing page structs feels really foreign to me.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2] QorIQ/TMU: add thermal management support based on TMU

2015-08-13 Thread Eduardo Valentin
Hello Hongtao,

On Fri, Aug 14, 2015 at 03:15:22AM +, Hongtao Jia wrote:
 Hi Eduardo,
 
 In previous mail I asked questions about including header files in device 
 tree.
 Don't bother, I have already figured out the solution.
 
 Another questions is about cpu cooling:
 I found out that there is no explicit calling for registering cpu cooling
 device in the of-thermal style drivers.

Your understanding is correct.

 
 And Samsung did it in cpufreq driver: drivers/cpufreq/exynos-cpufreq.c
 

Yes.

 Should all the of-thermal driver use the same way?

of-thermal won't handle the cooling device registering. It is typically
registered by the cpufreq driver. Have a look in
drivers/cpufreq/cpufreq-dt.c

 Or is there any recommendation for registering cpu cooling device?
 (I enabled the CONFIG_CPUFREQ_DT and still got no cooling device registered)

If your system supports using cpufreq-dt, then it will handle
registering the cpucooling for you, if you configures the cooling dt
properties in your DT files.

How does your DT entry look like?

BR,

Eduardo 
 
 Thanks.
 
 ---
 Best Regards,
 Hongtao
 
 
  -Original Message-
  From: Linuxppc-dev [mailto:linuxppc-dev-
  bounces+b38951=freescale@lists.ozlabs.org] On Behalf Of Hongtao Jia
  Sent: Friday, August 07, 2015 4:15 PM
  To: Eduardo Valentin
  Cc: Wood Scott-B07421; linuxppc-dev@lists.ozlabs.org; linux-
  p...@vger.kernel.org
  Subject: RE: [PATCH V2] QorIQ/TMU: add thermal management support based
  on TMU
  
  Thanks for your comments.
  Please see my questions inline.
  
  Thanks.
  ---
  Best Regards,
  Hongtao
  
  
   -Original Message-
   From: Eduardo Valentin [mailto:edubez...@gmail.com]
   Sent: Thursday, August 06, 2015 3:43 AM
   To: Jia Hongtao-B38951
   Cc: linux...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; Wood
   Scott-
   B07421
   Subject: Re: [PATCH V2] QorIQ/TMU: add thermal management support
   based on TMU
  
   On Thu, Jul 30, 2015 at 08:13:09AM +, Hongtao Jia wrote:
- Any specific reason why not using OF thermal?
- No, actually.
   
I'd like to use OF thermal after some clarification.
   
Regarding to cooling-maps. For some cases there should be more
than one cpus as cooling device and they are independent.
1. Let's say 4. So we need to provide 4 maps like map0-map3. Right?
  
   That would depend on the amount of sensors you have. Do you have one
   sensor per cpu? if the answer is yes, then you probably want to have
   four different map entries, yes, but one on each thermal zone of each
   cpu temperature sensor. if the answer is no, then you would need to
   have all the maps in the same thermal zone.
  
2. cooling-max-level may vary depend on switch settings or firmware.
   Is that
   OK if I do not provide cooling-min-level and cooling-max-level
   property?
  
   That is already achievable by using the cooling-device property of a
   cooling map.
  
   Please have a look in the example section of the
   Documentation/devicetree/bindings/thermal/thermal.txt
  
  Yes, I read this file.
  So in my understanding:
  There is no need to provide cooling-min-level and cooling-max-level
  property.
  THERMAL_NO_LIMIT value in cooling device node will indicate the driver to
  automatically parse the min and max state, right?
  
  Talking about THERMAL_NO_LIMIT, I need to #include dt-
  bindings/thermal/thermal.h to provide the definition. But I got
  compiling error when build dtb file.
  I did some research and using make t1040qds.dtb in order to involve
  preprocessor.
  But with simply adding #include dt-bindings/thermal/thermal.h to
  t1040si-post.dtsi at line 35 I still got error like this:
  Error: arch/powerpc/boot/dts/fsl/t1040si-post.dtsi:35.1-9 syntax error
  FATAL ERROR: Unable to parse input tree
  
  Could you help me out here.
  Thanks.
  
  
   Let me know if you need further clarification.
  
  
   BR,
  
   Eduardo Valentin
  
   
Thanks.
-Hongtao
   
   
 -Original Message-
 From: Eduardo Valentin [mailto:edubez...@gmail.com]
 Sent: Thursday, July 30, 2015 2:56 PM
 To: Jia Hongtao-B38951
 Cc: linux...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; Wood
 Scott-
 B07421
 Subject: Re: [PATCH V2] QorIQ/TMU: add thermal management support
 based on TMU

 On Wed, Jul 29, 2015 at 02:19:39PM +0800, Jia Hongtao wrote:
  It supports one critical trip point and one passive trip point.
  The cpufreq is used as the cooling device to throttle CPUs when
  the passive trip is crossed.
 
  Signed-off-by: Jia Hongtao hongtao@freescale.com
  ---
  This patch based on:
  http://patchwork.ozlabs.org/patch/482987/
 
  Changes for V2:
  * Add tmu-range parse.
  * Use default trend hook.
  * Using latest thermal_zone_bind_cooling_device API.
  * Add calibration check during initialization.
  * Disable/enalbe device when suspend/resume.
 
   

[PATCH v2 1/2] powerpc/85xx: Add binding for SCFG

2015-08-13 Thread Dongsheng Wang
From: Wang Dongsheng dongsheng.w...@freescale.com

SCFG provides SoC specific configuration and status registers for
the chip. Add this for powerpc platform.

Signed-off-by: Wang Dongsheng dongsheng.w...@freescale.com
---
*V2*
- Remove scfg description in board.txt and create scfg.txt for scfg.
- Change fsl,board-scfg to fsl,chip-scfg

diff --git a/Documentation/devicetree/bindings/powerpc/fsl/scfg.txt 
b/Documentation/devicetree/bindings/powerpc/fsl/scfg.txt
new file mode 100644
index 000..0532c46
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/scfg.txt
@@ -0,0 +1,18 @@
+Freescale Supplement configuration unit (SCFG)
+
+SCFG is the supplemental configuration unit, that provides SoC specific
+configuration and status registers for the chip. Such as getting PEX port
+status.
+
+Required properties:
+
+- compatible: should be fsl,chip-scfg
+- reg: should contain base address and length of SCFG memory-mapped
+registers
+
+Example:
+
+   scfg: global-utilities@fc000 {
+   compatible = fsl,t1040-scfg;
+   reg = 0xfc000 0x1000;
+   };
-- 
2.1.0.27.g96db324

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3] powerpc: Add an inline function to update POWER8 HID0

2015-08-13 Thread Sam Bobroff
On Wed, Aug 05, 2015 at 12:38:31PM +0530, Gautham R. Shenoy wrote:
 Section 3.7 of Version 1.2 of the Power8 Processor User's Manual
 prescribes that updates to HID0 be preceded by a SYNC instruction and
 followed by an ISYNC instruction (Page 91).
 
 Create an inline function name update_power8_hid0() which follows this
 recipe and invoke it from the static split core path.
 
 Signed-off-by: Gautham R. Shenoy e...@linux.vnet.ibm.com

Hi Gautham,

I've tested this on a Power 8 machine and verified that it is able to change
split modes and that when doing so the new code is used.

Reviewed-by: Sam Bobroff sam.bobr...@au1.ibm.com
Tested-by: Sam Bobroff sam.bobr...@au1.ibm.com

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v4 01/11] cxl: Convert MMIO read/write macros to inline functions

2015-08-13 Thread Ian Munsie
Acked-by: Ian Munsie imun...@au1.ibm.com

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v4 03/11] cxl: Allocate and release the SPA with the AFU

2015-08-13 Thread Ian Munsie
Excerpts from Daniel Axtens's message of 2015-08-13 14:11:21 +1000:
 Previously the SPA was allocated and freed upon entering and leaving
 AFU-directed mode. This causes some issues for error recovery - contexts
 hold a pointer inside the SPA, and they may persist after the AFU has
 been detached.
 
 We would ideally like to allocate the SPA when the AFU is allocated, and
 release it until the AFU is released. However, we don't know how big the
 SPA needs to be until we read the AFU descriptor.
 
 Therefore, restructure the code:
 
  - Allocate the SPA only once, on the first attach.
 
  - Release the SPA only when the entire AFU is being released (not
detached). Guard the release with a NULL check, so we don't free
if it was never allocated (e.g. dedicated mode)

This is certainly an improvement, though in the long run I wonder if we
should consider making the contexts increase the refcount of the AFU so
that we can be sure that the AFU structure will outlive the contexts?

That would be a more significant rework though, and this patch is needed
either way and solves an immediate problem, so:

Acked-by: Ian Munsie imun...@au1.ibm.com

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

RE: [PATCH] video/fsl: Fix the sleep function for FSL DIU module

2015-08-13 Thread Wang Dongsheng
Hi Tabi,

 -Original Message-
 From: Timur Tabi [mailto:ti...@tabi.org]
 Sent: Tuesday, March 25, 2014 11:55 PM
 To: Wang Dongsheng-B40534
 Cc: Wood Scott-B07421; Jin Zhengxiong-R64188; Li Yang-Leo-R58472; linuxppc-
 d...@lists.ozlabs.org; linux-fb...@vger.kernel.org
 Subject: Re: [PATCH] video/fsl: Fix the sleep function for FSL DIU module
 
 On 03/25/2014 02:56 AM, Dongsheng Wang wrote:
  From: Jason Jin jason@freescale.com
 
  For deep sleep, the diu module will power off, when wake up from the
  deep sleep, the registers need to be reinitialized.
 
  Signed-off-by: Jason Jin jason@freescale.com
  Signed-off-by: Wang Dongsheng dongsheng.w...@freescale.com
 
  diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c
  index e8758b9..7ec780c 100644
  --- a/drivers/video/fsl-diu-fb.c
  +++ b/drivers/video/fsl-diu-fb.c
  @@ -1628,9 +1628,18 @@ static int fsl_diu_suspend(struct platform_device
 *ofdev, pm_message_t state)
static int fsl_diu_resume(struct platform_device *ofdev)
{
  struct fsl_diu_data *data;
  +   struct mfb_info *mfbi;
 
 You don't need this, if ...
 
  +   int i;
 
  data = dev_get_drvdata(ofdev-dev);
  -   enable_lcdc(data-fsl_diu_info);
  +   fsl_diu_enable_interrupts(data);
  +   update_lcdc(data-fsl_diu_info);
  +
  +   for (i = 0; i  NUM_AOIS; i++) {
  +   mfbi = data-mfb[i];
  +   if (mfbi-count)
 
 ... you do this:
 
   if (data-mfb[i].count)
 
 Also, 'i' should be an 'unsigned int'.
 
  +   fsl_diu_enable_panel(data-fsl_diu_info[i]);
  +   }
 
  return 0;
}
 
 
 Other than that, this seems okay.
 

Thanks, send v2 to update this patch.

Regards,
-Dongsheng
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v4 05/11] cxl: Clean up adapter MMIO unmap path.

2015-08-13 Thread Ian Munsie
Acked-by: Ian Munsie imun...@au1.ibm.com

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v4 04/11] cxl: Make IRQ release idempotent

2015-08-13 Thread Ian Munsie
Acked-by: Ian Munsie imun...@au1.ibm.com

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: RFC: prepare for struct scatterlist entries without page backing

2015-08-13 Thread Boaz Harrosh
On 08/13/2015 05:40 PM, Christoph Hellwig wrote:
 On Wed, Aug 12, 2015 at 03:42:47PM +0300, Boaz Harrosh wrote:
 The support I have suggested and submitted for zone-less sections.
 (In my add_persistent_memory() patchset)

 Would work perfectly well and transparent for all such multimedia cases.
 (All hacks removed). In fact I have loaded pmem (with-pages) on a VRAM
 a few times and it is great easy fun. (I wanted to experiment with cached
 memory over a pcie)
 
 And everyone agree that it was both buggy and incomplete.
 

What? No one ever said anything about bugs. Is the first ever I hear of it.
I was always in the notion that no one even tried it out.

I'm smoking these page-full nvidimms for more than a year. With RDMA to
pears and swap out to disks. So is not that bad I would say

 Dan has done a respin of the page backed nvdimm work with most of
 these comments addressed.
 

I would love some comments. All I got so far is silence. (And I do not
like Dan's patches comments will come next week)

 I have to say I hate both pfn-based I/O [1] and page backed nvdimms with
 passion, so we're looking into the lesser evil with an open mind.
 
 [1] not the SGL part posted here, which I think is quite sane.  The bio
 side is much worse, though.
 

What can I say. I like the page-backed nvdimms. And the long term for me
is 2M pages. I hope we can sit one day soon and you explain to me whats
evil about it. I would really really like to understand

Thanks though
Boaz

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev