Re: [PATCH 1/3] powerpc: Don't build powernv for other platform defconfigs

2014-09-24 Thread Stephen Rothwell
Hi Michael,

On Wed, 24 Sep 2014 15:57:10 +1000 Michael Ellerman m...@ellerman.id.au wrote:

 Because powernv arrived after these other platforms, the defconfigs
 didn't have PPC_POWERNV disabled, and being default y it gets turned on.

Well, that raises the question of why PPC_POWERNV is default y at all?

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


signature.asc
Description: PGP signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 3/3] powerpc/kdump: crash_dump.c needs to include io.h

2014-09-24 Thread Stephen Rothwell
Hi Michael,

On Wed, 24 Sep 2014 15:57:12 +1000 Michael Ellerman m...@ellerman.id.au wrote:

 For __ioremap().

So does that mean that you really want this patch before 2/3 so that you
don't introduce an unnecessary bisection breakage in ppc64_defconfig?

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


signature.asc
Description: PGP signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 1/2] powerpc: Add VM_FAULT_HWPOISON handling to powerpc page fault handler

2014-09-24 Thread Stephen Rothwell
Hi Anton,

On Wed, 24 Sep 2014 10:27:06 +1000 Anton Blanchard an...@samba.org wrote:

 - if (user_mode(regs)) {
 - current-thread.trap_nr = BUS_ADRERR;
 - info.si_signo = SIGBUS;
 - info.si_errno = 0;
 - info.si_code = BUS_ADRERR;
 - info.si_addr = (void __user *)address;
 - force_sig_info(SIGBUS, info, current);
 - return MM_FAULT_RETURN;
 + if (!user_mode(regs))
 + return MM_FAULT_ERR(SIGBUS);
 +
 + current-thread.trap_nr = BUS_ADRERR;
 + info.si_signo = SIGBUS;
 + info.si_errno = 0;
 + info.si_code = BUS_ADRERR;
 + info.si_addr = (void __user *)address;

If you had done this as 2 patches (one to remove the indent and a
second to fix the actual problem), it would have been much easier to
review ...

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


signature.asc
Description: PGP signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/3] powerpc: Simplify do_sigbus

2014-09-24 Thread Anton Blanchard
Exit out early for a kernel fault, avoiding indenting of
most of the function.

Signed-off-by: Anton Blanchard an...@samba.org
---
 arch/powerpc/mm/fault.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 51ab9e7..abc8c81 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -120,16 +120,16 @@ static int do_sigbus(struct pt_regs *regs, unsigned long 
address)
 
up_read(current-mm-mmap_sem);
 
-   if (user_mode(regs)) {
-   current-thread.trap_nr = BUS_ADRERR;
-   info.si_signo = SIGBUS;
-   info.si_errno = 0;
-   info.si_code = BUS_ADRERR;
-   info.si_addr = (void __user *)address;
-   force_sig_info(SIGBUS, info, current);
-   return MM_FAULT_RETURN;
-   }
-   return MM_FAULT_ERR(SIGBUS);
+   if (!user_mode(regs))
+   return MM_FAULT_ERR(SIGBUS);
+
+   current-thread.trap_nr = BUS_ADRERR;
+   info.si_signo = SIGBUS;
+   info.si_errno = 0;
+   info.si_code = BUS_ADRERR;
+   info.si_addr = (void __user *)address;
+   force_sig_info(SIGBUS, info, current);
+   return MM_FAULT_RETURN;
 }
 
 static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
-- 
1.9.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/3] powerpc: Add VM_FAULT_HWPOISON handling to powerpc page fault handler

2014-09-24 Thread Anton Blanchard
do_page_fault was missing knowledge of HWPOISON, and we would oops
if userspace tried to access a poisoned page:

kernel BUG at arch/powerpc/mm/fault.c:180!

Signed-off-by: Anton Blanchard an...@samba.org
---
 arch/powerpc/mm/fault.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index abc8c81..588b6cc 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -114,7 +114,8 @@ static int store_updates_sp(struct pt_regs *regs)
 #define MM_FAULT_CONTINUE  -1
 #define MM_FAULT_ERR(sig)  (sig)
 
-static int do_sigbus(struct pt_regs *regs, unsigned long address)
+static int do_sigbus(struct pt_regs *regs, unsigned long address,
+unsigned int fault)
 {
siginfo_t info;
 
@@ -128,6 +129,13 @@ static int do_sigbus(struct pt_regs *regs, unsigned long 
address)
info.si_errno = 0;
info.si_code = BUS_ADRERR;
info.si_addr = (void __user *)address;
+#ifdef CONFIG_MEMORY_FAILURE
+   if (fault  (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
+   pr_err(MCE: Killing %s:%d due to hardware memory corruption 
fault at %lx\n,
+   current-comm, current-pid, address);
+   info.si_code = BUS_MCEERR_AR;
+   }
+#endif
force_sig_info(SIGBUS, info, current);
return MM_FAULT_RETURN;
 }
@@ -170,11 +178,8 @@ static int mm_fault_error(struct pt_regs *regs, unsigned 
long addr, int fault)
return MM_FAULT_RETURN;
}
 
-   /* Bus error. x86 handles HWPOISON here, we'll add this if/when
-* we support the feature in HW
-*/
-   if (fault  VM_FAULT_SIGBUS)
-   return do_sigbus(regs, addr);
+   if (fault  (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE))
+   return do_sigbus(regs, addr, fault);
 
/* We don't understand the fault code, this is fatal */
BUG();
-- 
1.9.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/3] powerpc: Fill in si_addr_lsb siginfo field

2014-09-24 Thread Anton Blanchard
Fill in the si_addr_lsb siginfo field so the hwpoison code can
pass to userspace the length of memory that has been corrupted.

Signed-off-by: Anton Blanchard an...@samba.org
---
 arch/powerpc/mm/fault.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 588b6cc..24b3f49 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -33,6 +33,7 @@
 #include linux/magic.h
 #include linux/ratelimit.h
 #include linux/context_tracking.h
+#include linux/hugetlb.h
 
 #include asm/firmware.h
 #include asm/page.h
@@ -118,6 +119,7 @@ static int do_sigbus(struct pt_regs *regs, unsigned long 
address,
 unsigned int fault)
 {
siginfo_t info;
+   unsigned int lsb = 0;
 
up_read(current-mm-mmap_sem);
 
@@ -135,7 +137,13 @@ static int do_sigbus(struct pt_regs *regs, unsigned long 
address,
current-comm, current-pid, address);
info.si_code = BUS_MCEERR_AR;
}
+
+   if (fault  VM_FAULT_HWPOISON_LARGE)
+   lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+   if (fault  VM_FAULT_HWPOISON)
+   lsb = PAGE_SHIFT;
 #endif
+   info.si_addr_lsb = lsb;
force_sig_info(SIGBUS, info, current);
return MM_FAULT_RETURN;
 }
-- 
1.9.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 1/2] powerpc: Add VM_FAULT_HWPOISON handling to powerpc page fault handler

2014-09-24 Thread Anton Blanchard

Hi Stephen,

 If you had done this as 2 patches (one to remove the indent and a
 second to fix the actual problem), it would have been much easier to
 review ...

Good idea, I separated it out and resubmitted.

Anton
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/8xx: Remove Kconfig symbol FADS

2014-09-24 Thread Paul Bolle
Commit 39eb56da2b53 (pcmcia: Remove m8xx_pcmcia driver) removed the
only driver that used CONFIG_FADS. Setting the Kconfig symbol FADS is
pointless since that commit. Remove it.

Signed-off-by: Paul Bolle pebo...@tiscali.nl
---
Done on top of next-20140923. Tested with git grep only.

Another cleanup might be to remove MPC8XXFADS (or FADS) from the 8xx
Machine Type choice. Is there any reason left to pick FADS as a
machine type?

 arch/powerpc/platforms/8xx/Kconfig | 4 
 1 file changed, 4 deletions(-)

diff --git a/arch/powerpc/platforms/8xx/Kconfig 
b/arch/powerpc/platforms/8xx/Kconfig
index 247fdea0c8be..831f2e718b06 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -1,6 +1,3 @@
-config FADS
-   bool
-
 config CPM1
bool
select CPM
@@ -13,7 +10,6 @@ choice
 
 config MPC8XXFADS
bool FADS
-   select FADS
 
 config MPC86XADS
bool MPC86XADS
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v4 04/11] drivers: base: support cpu cache information interface to userspace via sysfs

2014-09-24 Thread Greg Kroah-Hartman
On Wed, Sep 17, 2014 at 12:00:48PM -0700, Greg Kroah-Hartman wrote:
 On Wed, Sep 17, 2014 at 06:25:10PM +0100, Sudeep Holla wrote:
  Hi Greg,
  
  On 03/09/14 18:00, Sudeep Holla wrote:
  From: Sudeep Holla sudeep.ho...@arm.com
  
  This patch adds initial support for providing processor cache information
  to userspace through sysfs interface. This is based on already existing
  implementations(x86, ia64, s390 and powerpc) and hence the interface is
  intended to be fully compatible.
  
  The main purpose of this generic support is to avoid further code
  duplication to support new architectures and also to unify all the existing
  different implementations.
  
  This implementation maintains the hierarchy of cache objects which reflects
  the system's cache topology. Cache devices are instantiated as needed as
  CPUs come online. The cache information is replicated per-cpu even if they 
  are
  shared. A per-cpu array of cache information maintained is used mainly for
  sysfs-related book keeping.
  
  It also implements the shared_cpu_map attribute, which is essential for
  enabling both kernel and user-space to discover the system's overall cache
  topology.
  
  This patch also add the missing ABI documentation for the cacheinfo sysfs
  interface already, which is well defined and widely used.
  
  
  Can you review the first 4 patches in this series please ?
 
 It's in my todo queue, which is really long at the moment due to me
 going to conferences (at one right now...)  Will be working on this
 soon, thanks for your patience.

Based on the review comments, I think you are going to change at least
the first patch, right?  Please resend the latest version of this
series, with all of the accumulated tested-by and acked lines and
resend.

thanks,

greg k-h
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/5] char: hw_random: Remove .owner field for driver

2014-09-24 Thread Kiran Padwal
There is no need to init .owner field.

Based on the patch from Peter Griffin peter.grif...@linaro.org
mmc: remove .owner field for drivers using module_platform_driver

This patch removes the superflous .owner field for drivers which
use the module_platform_driver API, as this is overriden in
platform_driver_register anyway.

Signed-off-by: Kiran Padwal kiran.pad...@smartplayin.com
---
 drivers/char/hw_random/atmel-rng.c  |1 -
 drivers/char/hw_random/bcm2835-rng.c|1 -
 drivers/char/hw_random/bcm63xx-rng.c|1 -
 drivers/char/hw_random/exynos-rng.c |1 -
 drivers/char/hw_random/msm-rng.c|1 -
 drivers/char/hw_random/n2-drv.c |1 -
 drivers/char/hw_random/octeon-rng.c |1 -
 drivers/char/hw_random/omap3-rom-rng.c  |1 -
 drivers/char/hw_random/pasemi-rng.c |1 -
 drivers/char/hw_random/ppc4xx-rng.c |1 -
 drivers/char/hw_random/timeriomem-rng.c |1 -
 11 files changed, 11 deletions(-)

diff --git a/drivers/char/hw_random/atmel-rng.c 
b/drivers/char/hw_random/atmel-rng.c
index 851bc7e..25a4de2 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -128,7 +128,6 @@ static struct platform_driver atmel_trng_driver = {
.remove = atmel_trng_remove,
.driver = {
.name   = atmel-trng,
-   .owner  = THIS_MODULE,
 #ifdef CONFIG_PM
.pm = atmel_trng_pm_ops,
 #endif /* CONFIG_PM */
diff --git a/drivers/char/hw_random/bcm2835-rng.c 
b/drivers/char/hw_random/bcm2835-rng.c
index e900961..7192ec2 100644
--- a/drivers/char/hw_random/bcm2835-rng.c
+++ b/drivers/char/hw_random/bcm2835-rng.c
@@ -99,7 +99,6 @@ MODULE_DEVICE_TABLE(of, bcm2835_rng_of_match);
 static struct platform_driver bcm2835_rng_driver = {
.driver = {
.name = bcm2835-rng,
-   .owner = THIS_MODULE,
.of_match_table = bcm2835_rng_of_match,
},
.probe  = bcm2835_rng_probe,
diff --git a/drivers/char/hw_random/bcm63xx-rng.c 
b/drivers/char/hw_random/bcm63xx-rng.c
index 36581ea..ba6a65a 100644
--- a/drivers/char/hw_random/bcm63xx-rng.c
+++ b/drivers/char/hw_random/bcm63xx-rng.c
@@ -162,7 +162,6 @@ static struct platform_driver bcm63xx_rng_driver = {
.remove = bcm63xx_rng_remove,
.driver = {
.name   = bcm63xx-rng,
-   .owner  = THIS_MODULE,
},
 };
 
diff --git a/drivers/char/hw_random/exynos-rng.c 
b/drivers/char/hw_random/exynos-rng.c
index 9f8277c..beaa157 100644
--- a/drivers/char/hw_random/exynos-rng.c
+++ b/drivers/char/hw_random/exynos-rng.c
@@ -169,7 +169,6 @@ static UNIVERSAL_DEV_PM_OPS(exynos_rng_pm_ops, 
exynos_rng_runtime_suspend,
 static struct platform_driver exynos_rng_driver = {
.driver = {
.name   = exynos-rng,
-   .owner  = THIS_MODULE,
.pm = exynos_rng_pm_ops,
},
.probe  = exynos_rng_probe,
diff --git a/drivers/char/hw_random/msm-rng.c b/drivers/char/hw_random/msm-rng.c
index 148521e..cea1c70 100644
--- a/drivers/char/hw_random/msm-rng.c
+++ b/drivers/char/hw_random/msm-rng.c
@@ -185,7 +185,6 @@ static struct platform_driver msm_rng_driver = {
.remove = msm_rng_remove,
.driver = {
.name = KBUILD_MODNAME,
-   .owner = THIS_MODULE,
.of_match_table = of_match_ptr(msm_rng_of_match),
}
 };
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index 292a588..843d6f6 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -750,7 +750,6 @@ MODULE_DEVICE_TABLE(of, n2rng_match);
 static struct platform_driver n2rng_driver = {
.driver = {
.name = n2rng,
-   .owner = THIS_MODULE,
.of_match_table = n2rng_match,
},
.probe  = n2rng_probe,
diff --git a/drivers/char/hw_random/octeon-rng.c 
b/drivers/char/hw_random/octeon-rng.c
index b5cc342..be1c3f6 100644
--- a/drivers/char/hw_random/octeon-rng.c
+++ b/drivers/char/hw_random/octeon-rng.c
@@ -117,7 +117,6 @@ static int __exit octeon_rng_remove(struct platform_device 
*pdev)
 static struct platform_driver octeon_rng_driver = {
.driver = {
.name   = octeon_rng,
-   .owner  = THIS_MODULE,
},
.probe  = octeon_rng_probe,
.remove = __exit_p(octeon_rng_remove),
diff --git a/drivers/char/hw_random/omap3-rom-rng.c 
b/drivers/char/hw_random/omap3-rom-rng.c
index 6f2eaff..a405cdc 100644
--- a/drivers/char/hw_random/omap3-rom-rng.c
+++ b/drivers/char/hw_random/omap3-rom-rng.c
@@ -126,7 +126,6 @@ static int omap3_rom_rng_remove(struct platform_device 
*pdev)
 static struct platform_driver omap3_rom_rng_driver = {
.driver = {
.name   = omap3-rom-rng,
-   .owner  = 

[PATCHv8 3/6] ppc/cell: trivial: replace get_unused_fd() by get_unused_fd_flags(0)

2014-09-24 Thread Yann Droneaud
This patch replaces calls to get_unused_fd() with equivalent call to
get_unused_fd_flags(0) to preserve current behavor for existing code.

In a further patch, get_unused_fd() will be removed so that new code
start using get_unused_fd_flags(), with the hope O_CLOEXEC could be
used, either by default or choosen by userspace.

Link: http://lkml.kernel.org/r/cover.1411562410.git.ydrone...@opteya.com
Cc: Al Viro v...@zeniv.linux.org.uk
Cc: Andrew Morton a...@linux-foundation.org
Cc: triv...@kernel.org
Signed-off-by: Yann Droneaud ydrone...@opteya.com
---
 arch/powerpc/platforms/cell/spufs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c 
b/arch/powerpc/platforms/cell/spufs/inode.c
index 87ba7cf99cd7..51effcec30d8 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -301,7 +301,7 @@ static int spufs_context_open(struct path *path)
int ret;
struct file *filp;
 
-   ret = get_unused_fd();
+   ret = get_unused_fd_flags(0);
if (ret  0)
return ret;
 
@@ -518,7 +518,7 @@ static int spufs_gang_open(struct path *path)
int ret;
struct file *filp;
 
-   ret = get_unused_fd();
+   ret = get_unused_fd_flags(0);
if (ret  0)
return ret;
 
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCHv8 0/6] Getting rid of get_unused_fd() / enable close-on-exec

2014-09-24 Thread Yann Droneaud
 not expect to have to
  call fcntl(fd, F_SETFD, 0) to make it available across
  exec().

  If file descriptor created by a subsystem is not tied to
  the current process resources, it's likely legal to use it
  in a different process context, thus O_CLOEXEC must not be
  the default.

  If one, as a subsystem maintainer, cannot tell for sure
  that no userspace program ever rely current behavior, eg.
  file descriptor being inherited across exec(), then the
  default flag *must* be kept 0 to not break application.

- This subsystem cannot be turned to use O_CLOEXEC by default:

If O_CLOEXEC cannot be made the default, it would be interesting
to think to extend the API to have a (set of) function(s) taking
a flag parameter so that userspace can atomically request
close-on-exec if it need it (and it should need it !).

- Background:

One might want to read Secure File Descriptor Handling [2] by
Ulrich Drepper who is responsible of adding O_CLOEXEC flag on
open(), and flags alike on other syscalls.

One might also want to read PEP-446 Make newly created file
descriptors non-inheritable [3] by Victor Stinner since it has
lot more background information on file descriptor leaking.

One would also like to read Excuse me son, but your code is
leaking !!! [4] by Dan Walsh for advice.

[1] http://lwn.net/Articles/412131/
[2] http://udrepper.livejournal.com/20407.html
[3] http://www.python.org/dev/peps/pep-0446/
[4] http://danwalsh.livejournal.com/53603.html

- Statistics:

In linux-next, tag next-20140924, they're currently:

- 33 calls to fd_install()
   with one call part of anon_inode_getfd()
- 26 calls to get_unused_fd_flags()
   with one call part of anon_inode_getfd()
   with another part of get_unused_fd() macro
- 11 calls to anon_inode_getfd()
-  8 calls to anon_inode_getfile()
   with one call part of anon_inode_getfd()
-  6 calls to get_unused_fd()

Changes from patchset v7 [PATCHSETv7]

- Rebased on top of latest -next
- Simplified commit message for trivial patches
- Proofread commit messages
- Addded CC: linux-...@vger.kernel.org

Changes from patchset v6 [PATCHSETv6]

- Rebased on top of latest -next
- Added Cc: triv...@kernel.org for the first trivials
  patches.

Changes from patchset v5 [PATCHSETv5]

- perf: introduce a flag to enable close-on-exec in
  perf_event_open()
  DROPPED: applied upstream, commit a21b0b354d4a.

Changes from patchset v4 [PATCHSETv4]:

- rewrote cover letter following discussion with perf
  maintainer. Thanks to Peter Zijlstra.

- modified a bit some commit messages.

- events: use get_unused_fd_flags(0) instead of get_unused_fd()
  DROPPED: replaced by following patch.

- perf: introduce a flag to enable close-on-exec in
  perf_event_open()
  NEW: instead of hard coding the flags to 0, this patch
   allows userspace to specify close-on-exec flag.

- fanotify: use get_unused_fd_flags(0) instead of get_unused_fd()
  DROPPED: replaced by following patch.

- fanotify: enable close-on-exec on events' fd when requested in
fanotify_init()
  NEW: instead of hard coding the flags to 0, this patch
   enable close-on-exec if userspace request it.

Changes from patchset v3 [PATCHSETv3]:

- industrialio: use anon_inode_getfd() with O_CLOEXEC flag
  DROPPED: applied upstream, commit a646fbf0fd11.

Changes from patchset v2 [PATCHSETv2]:

- android/sw_sync: use get_unused_fd_flags(O_CLOEXEC) instead
  of get_unused_fd()
  DROPPED: applied upstream, commit 45acea57335e.

- android/sync: use get_unused_fd_flags(O_CLOEXEC) instead of
  get_unused_fd()
  DROPPED: applied upstream, commit 9c6cd3b39048.

- vfio: use get_unused_fd_flags(0) instead of get_unused_fd()
  DROPPED: applied upstream, commit a5d550703d2c.
  Additionally subsystem maintainer applied another patch on top
  to set the flags to O_CLOEXEC, commit 5d042fbdbb2d.

- industrialio: use anon_inode_getfd() with O_CLOEXEC flag
  NEW: propose to use O_CLOEXEC as default flag.

Changes from patchset v1 [PATCHSETv1]:

- explicitly added subsystem maintainers as mail recepients.

- infiniband: use get_unused_fd_flags(0) instead of
  get_unused_fd()
  DROPPED: subsystem maintainer applied another patch
   using get_unused_fd_flags(O_CLOEXEC) as suggested,
   commit da183c7af844.

- android/sw_sync: use get_unused_fd_flags(0) instead of
  get_unused_fd()
  MODIFIED: use get_unused_fd_flags(O_CLOEXEC) as suggested.

- android/sync: use get_unused_fd_flags(0) instead of
  get_unused_fd()
  MODIFIED: use get_unused_fd_flags(O_CLOEXEC) as suggested.

- xfs: use get_unused_fd_flags(0) instead of get_unused_fd()
  DROPPED: applied asis by subsystem maintainer, commit 862a62937e76.

- sctp: use get_unused_fd_flags(0) instead of get_unused_fd()
  DROPPED: applied asis by subsystem maintainer, commit 8a59bd3e9b29.

Links:

[PATCHSETv7]
  http://lkml.kernel.org/r/cover.1401630396.git.ydrone...@opteya.com

[PATCHSETv6]
  http://lkml.kernel.org/r/cover.1394532336

[PATCH v2 2/4] Simplify catalog_read()

2014-09-24 Thread Sukadev Bhattiprolu
catalog_read() implements the read interface for the sysfs file

/sys/bus/event_source/devices/hv_24x7/interface/catalog

It essentially takes a buffer, an offset and count as parameters
to the read() call.  It makes a hypervisor call to read a specific
page from the catalog and copy the required bytes into the given
buffer. Each call to catalog_read() returns at most one 4K page.

Given these requirements, we should be able to simplify the
catalog_read().

Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
---
 arch/powerpc/perf/hv-24x7.c | 92 +
 1 file changed, 10 insertions(+), 82 deletions(-)

diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 2f2215c..9427ef7 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -75,86 +75,6 @@ static struct attribute_group format_group = {
 
 static struct kmem_cache *hv_page_cache;
 
-/*
- * read_offset_data - copy data from one buffer to another while treating the
- *source buffer as a small view on the total avaliable
- *source data.
- *
- * @dest: buffer to copy into
- * @dest_len: length of @dest in bytes
- * @requested_offset: the offset within the source data we want. Must be  0
- * @src: buffer to copy data from
- * @src_len: length of @src in bytes
- * @source_offset: the offset in the sorce data that (src,src_len) refers to.
- * Must be  0
- *
- * returns the number of bytes copied.
- *
- * The following ascii art shows the various buffer possitioning we need to
- * handle, assigns some arbitrary varibles to points on the buffer, and then
- * shows how we fiddle with those values to get things we care about (copy
- * start in src and copy len)
- *
- * s = @src buffer
- * d = @dest buffer
- * '.' areas in d are written to.
- *
- *   u
- *   x wv  z
- * d   |.|
- * s |--|
- *
- *  u
- *   x w   z v
- * d   |--|
- * s |--|
- *
- *   x wu,z,v
- * d   ||
- * s |--|
- *
- *   x,wu,v,z
- * d |..|
- * s |--|
- *
- *   xu
- *   wvz
- * d ||
- * s |--|
- *
- *   x  z   w  v
- * d|--|
- * s |--|
- *
- * x = source_offset
- * w = requested_offset
- * z = source_offset + src_len
- * v = requested_offset + dest_len
- *
- * w_offset_in_s = w - x = requested_offset - source_offset
- * z_offset_in_s = z - x = src_len
- * v_offset_in_s = v - x = request_offset + dest_len - src_len
- */
-static ssize_t read_offset_data(void *dest, size_t dest_len,
-   loff_t requested_offset, void *src,
-   size_t src_len, loff_t source_offset)
-{
-   size_t w_offset_in_s = requested_offset - source_offset;
-   size_t z_offset_in_s = src_len;
-   size_t v_offset_in_s = requested_offset + dest_len - src_len;
-   size_t u_offset_in_s = min(z_offset_in_s, v_offset_in_s);
-   size_t copy_len = u_offset_in_s - w_offset_in_s;
-
-   if (requested_offset  0 || source_offset  0)
-   return -EINVAL;
-
-   if (z_offset_in_s = w_offset_in_s)
-   return 0;
-
-   memcpy(dest, src + w_offset_in_s, copy_len);
-   return copy_len;
-}
-
 static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
  unsigned long version,
  unsigned long index)
@@ -185,6 +105,8 @@ static ssize_t catalog_read(struct file *filp, struct 
kobject *kobj,
ssize_t ret = 0;
size_t catalog_len = 0, catalog_page_len = 0, page_count = 0;
loff_t page_offset = 0;
+   loff_t offset_in_page;
+   size_t copy_len;
uint64_t catalog_version_num = 0;
void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);
struct hv_24x7_catalog_page_0 *page_0 = page;
@@ -203,6 +125,7 @@ static ssize_t catalog_read(struct file *filp, struct 
kobject *kobj,
 
page_offset = offset / 4096;
page_count  = count  / 4096;
+   offset_in_page = count % 4096;
 
if (page_offset = catalog_page_len)
goto e_free;
@@ -216,8 +139,13 @@ static ssize_t catalog_read(struct file *filp, struct 
kobject *kobj,
}
}
 
-   ret = read_offset_data(buf, count, offset,
-   page, 4096, page_offset * 4096);
+   copy_len = 4096 - offset_in_page;
+   if (copy_len  count)
+   copy_len = count;
+
+   memcpy(buf, page+offset_in_page, copy_len);
+   ret = copy_len;
+
 e_free:
if (hret)
pr_err(h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:
-- 
1.8.3.1


[PATCH v2 1/4] powerpc/perf/hv-24x7: use kmem_cache instead of aligned stack allocations

2014-09-24 Thread Sukadev Bhattiprolu
From: Cody P Schafer c...@linux.vnet.ibm.com

Ian pointed out the use of __aligned(4096) caused rather large stack
consumption in single_24x7_request(), so use the kmem_cache
hv_page_cache (which we've already got set up for other allocations)
insead of allocating locally.

CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
CC: Haren Myneni hb...@us.ibm.com
CC: Cody P Schafer d...@codyps.com
Reported-by: Ian Munsie imun...@au1.ibm.com
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com
---
 arch/powerpc/perf/hv-24x7.c | 52 -
 1 file changed, 37 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 70d4f74..2f2215c 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -294,7 +294,7 @@ static unsigned long single_24x7_request(u8 domain, u32 
offset, u16 ix,
 u16 lpar, u64 *res,
 bool success_expected)
 {
-   unsigned long ret;
+   unsigned long ret = -ENOMEM;
 
/*
 * request_buffer and result_buffer are not required to be 4k aligned,
@@ -304,7 +304,27 @@ static unsigned long single_24x7_request(u8 domain, u32 
offset, u16 ix,
struct reqb {
struct hv_24x7_request_buffer buf;
struct hv_24x7_request req;
-   } __packed __aligned(4096) request_buffer = {
+   } __packed * request_buffer;
+   struct resb {
+   struct hv_24x7_data_result_buffer buf;
+   struct hv_24x7_result res;
+   struct hv_24x7_result_element elem;
+   __be64 result;
+   } __packed * result_buffer;
+
+   BUILD_BUG_ON(sizeof(*request_buffer)  4096);
+   BUILD_BUG_ON(sizeof(*result_buffer)  4096);
+
+   request_buffer = kmem_cache_alloc(hv_page_cache, GFP_USER);
+
+   if (!request_buffer)
+   goto out_reqb;
+
+   result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
+   if (!result_buffer)
+   goto out_resb;
+
+   *request_buffer = (struct reqb) {
.buf = {
.interface_version = HV_24X7_IF_VERSION_CURRENT,
.num_requests = 1,
@@ -320,28 +340,30 @@ static unsigned long single_24x7_request(u8 domain, u32 
offset, u16 ix,
}
};
 
-   struct resb {
-   struct hv_24x7_data_result_buffer buf;
-   struct hv_24x7_result res;
-   struct hv_24x7_result_element elem;
-   __be64 result;
-   } __packed __aligned(4096) result_buffer = {};
-
ret = plpar_hcall_norets(H_GET_24X7_DATA,
-   virt_to_phys(request_buffer), sizeof(request_buffer),
-   virt_to_phys(result_buffer),  sizeof(result_buffer));
+   virt_to_phys(request_buffer), sizeof(*request_buffer),
+   virt_to_phys(result_buffer),  sizeof(*result_buffer));
 
if (ret) {
if (success_expected)
pr_err_ratelimited(hcall failed: %d %#x %#x %d = 
0x%lx (%ld) detail=0x%x failing ix=%x\n,
domain, offset, ix, lpar,
ret, ret,
-   result_buffer.buf.detailed_rc,
-   result_buffer.buf.failing_request_ix);
-   return ret;
+   result_buffer-buf.detailed_rc,
+   result_buffer-buf.failing_request_ix);
+   goto out_hcall;
}
 
-   *res = be64_to_cpu(result_buffer.result);
+   *res = be64_to_cpu(result_buffer-result);
+   kfree(result_buffer);
+   kfree(request_buffer);
+   return ret;
+
+out_hcall:
+   kfree(result_buffer);
+out_resb:
+   kfree(request_buffer);
+out_reqb:
return ret;
 }
 
-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 0/4] powerpc/perf: Miscellaneous fixes

2014-09-24 Thread Sukadev Bhattiprolu
Miscellaenous fixes for perf and 24x7 counters in powerpc.

Patches 1,3,4 were submitted earlier as a part of the parametrized
events for 24x7 counters. But they are not directly related to the
parametrized events.

Patch 2 simplifies and fixes a bug in catalog_read() which causes the
catalog file to not read first page.

Changelog[v2]
Rebase to perf/core tree.

Cody P Schafer (3):
  powerpc/perf/hv-24x7: use kmem_cache instead of aligned stack
allocations
  perf Documentation: sysfs events/ interfaces
  perf Documentation: remove duplicated docs for powerpc cpu specific
events

Sukadev Bhattiprolu (1):
  Simplify catalog_read()

 .../testing/sysfs-bus-event_source-devices-events  | 611 ++---
 arch/powerpc/perf/hv-24x7.c| 144 ++---
 2 files changed, 96 insertions(+), 659 deletions(-)

-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 3/4] perf Documentation: sysfs events/ interfaces

2014-09-24 Thread Sukadev Bhattiprolu
From: Cody P Schafer c...@linux.vnet.ibm.com

Add documentation for the event, event.scale, and event.unit
files in sysfs.

event.scale and event.unit were undocumented.
event was previously documented only for specific powerpc pmu events.

CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
CC: Haren Myneni hb...@us.ibm.com
CC: Cody P Schafer d...@codyps.com
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com
---
 .../testing/sysfs-bus-event_source-devices-events  | 60 ++
 1 file changed, 60 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events 
b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
index 7b40a3c..a5226f0 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
@@ -599,3 +599,63 @@ Description:   POWER-systems specific performance 
monitoring events
Further, multiple terms like 'event=0x' can be specified
and separated with comma. All available terms are defined in
the /sys/bus/event_source/devices/dev/format file.
+
+What: /sys/bus/event_source/devices/pmu/events/event
+Date: 2014/02/24
+Contact:   Linux kernel mailing list linux-ker...@vger.kernel.org
+Description:   Per-pmu performance monitoring events specific to the running 
system
+
+   Each file (except for some of those with a '.' in them, '.unit'
+   and '.scale') in the 'events' directory describes a single
+   performance monitoring event supported by the pmu. The name
+   of the file is the name of the event.
+
+   File contents:
+
+   term[=value][,term[=value]]...
+
+   Where term is one of the terms listed under
+   /sys/bus/event_source/devices/pmu/format/ and value is
+   a number is base-16 format with a '0x' prefix (lowercase only).
+   If a term is specified alone (without an assigned value), it
+   is implied that 0x1 is assigned to that term.
+
+   Examples (each of these lines would be in a seperate file):
+
+   event=0x2abc
+   event=0x423,inv,cmask=0x3
+   domain=0x1,offset=0x8,starting_index=0x
+
+   Each of the assignments indicates a value to be assigned to a
+   particular set of bits (as defined by the format file
+   corresponding to the term) in the perf_event structure passed
+   to the perf_open syscall.
+
+What: /sys/bus/event_source/devices/pmu/events/event.unit
+Date: 2014/02/24
+Contact:   Linux kernel mailing list linux-ker...@vger.kernel.org
+Description:   Perf event units
+
+   A string specifying the English plural numerical unit that 
event
+   (once multiplied by event.scale) represents.
+
+   Example:
+
+   Joules
+
+What: /sys/bus/event_source/devices/pmu/events/event.scale
+Date: 2014/02/24
+Contact:   Linux kernel mailing list linux-ker...@vger.kernel.org
+Description:   Perf event scaling factors
+
+   A string representing a floating point value expressed in
+   scientific notation to be multiplied by the event count
+   recieved from the kernel to match the unit specified in the
+   event.unit file.
+
+   Example:
+
+   2.3283064365386962890625e-10
+
+   This is provided to avoid performing floating point arithmetic
+   in the kernel.
-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 4/4] perf Documentation: remove duplicated docs for powerpc cpu specific events

2014-09-24 Thread Sukadev Bhattiprolu
From: Cody P Schafer c...@linux.vnet.ibm.com

Listing specific events doesn't actually help us at all here because:
 - these events actually vary between different ppc processors, they
   aren't garunteed to be present.
 - the documentation of the (generic) file contents is now superceded by the
   docs for arbitrary event file contents.

CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
CC: Haren Myneni hb...@us.ibm.com
CC: Cody P Schafer d...@codyps.com
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com
---
 .../testing/sysfs-bus-event_source-devices-events  | 573 -
 1 file changed, 573 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events 
b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
index a5226f0..20979f8 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
@@ -27,579 +27,6 @@ Description:Generic performance monitoring events
basename.
 
 
-What:  /sys/devices/cpu/events/PM_1PLUS_PPC_CMPL
-   /sys/devices/cpu/events/PM_BRU_FIN
-   /sys/devices/cpu/events/PM_BR_MPRED
-   /sys/devices/cpu/events/PM_CMPLU_STALL
-   /sys/devices/cpu/events/PM_CMPLU_STALL_BRU
-   /sys/devices/cpu/events/PM_CMPLU_STALL_DCACHE_MISS
-   /sys/devices/cpu/events/PM_CMPLU_STALL_DFU
-   /sys/devices/cpu/events/PM_CMPLU_STALL_DIV
-   /sys/devices/cpu/events/PM_CMPLU_STALL_ERAT_MISS
-   /sys/devices/cpu/events/PM_CMPLU_STALL_FXU
-   /sys/devices/cpu/events/PM_CMPLU_STALL_IFU
-   /sys/devices/cpu/events/PM_CMPLU_STALL_LSU
-   /sys/devices/cpu/events/PM_CMPLU_STALL_REJECT
-   /sys/devices/cpu/events/PM_CMPLU_STALL_SCALAR
-   /sys/devices/cpu/events/PM_CMPLU_STALL_SCALAR_LONG
-   /sys/devices/cpu/events/PM_CMPLU_STALL_STORE
-   /sys/devices/cpu/events/PM_CMPLU_STALL_THRD
-   /sys/devices/cpu/events/PM_CMPLU_STALL_VECTOR
-   /sys/devices/cpu/events/PM_CMPLU_STALL_VECTOR_LONG
-   /sys/devices/cpu/events/PM_CYC
-   /sys/devices/cpu/events/PM_GCT_NOSLOT_BR_MPRED
-   /sys/devices/cpu/events/PM_GCT_NOSLOT_BR_MPRED_IC_MISS
-   /sys/devices/cpu/events/PM_GCT_NOSLOT_CYC
-   /sys/devices/cpu/events/PM_GCT_NOSLOT_IC_MISS
-   /sys/devices/cpu/events/PM_GRP_CMPL
-   /sys/devices/cpu/events/PM_INST_CMPL
-   /sys/devices/cpu/events/PM_LD_MISS_L1
-   /sys/devices/cpu/events/PM_LD_REF_L1
-   /sys/devices/cpu/events/PM_RUN_CYC
-   /sys/devices/cpu/events/PM_RUN_INST_CMPL
-   /sys/devices/cpu/events/PM_IC_DEMAND_L2_BR_ALL
-   /sys/devices/cpu/events/PM_GCT_UTIL_7_TO_10_SLOTS
-   /sys/devices/cpu/events/PM_PMC2_SAVED
-   /sys/devices/cpu/events/PM_VSU0_16FLOP
-   /sys/devices/cpu/events/PM_MRK_LSU_DERAT_MISS
-   /sys/devices/cpu/events/PM_MRK_ST_CMPL
-   /sys/devices/cpu/events/PM_NEST_PAIR3_ADD
-   /sys/devices/cpu/events/PM_L2_ST_DISP
-   /sys/devices/cpu/events/PM_L2_CASTOUT_MOD
-   /sys/devices/cpu/events/PM_ISEG
-   /sys/devices/cpu/events/PM_MRK_INST_TIMEO
-   /sys/devices/cpu/events/PM_L2_RCST_DISP_FAIL_ADDR
-   /sys/devices/cpu/events/PM_LSU1_DC_PREF_STREAM_CONFIRM
-   /sys/devices/cpu/events/PM_IERAT_WR_64K
-   /sys/devices/cpu/events/PM_MRK_DTLB_MISS_16M
-   /sys/devices/cpu/events/PM_IERAT_MISS
-   /sys/devices/cpu/events/PM_MRK_PTEG_FROM_LMEM
-   /sys/devices/cpu/events/PM_FLOP
-   /sys/devices/cpu/events/PM_THRD_PRIO_4_5_CYC
-   /sys/devices/cpu/events/PM_BR_PRED_TA
-   /sys/devices/cpu/events/PM_EXT_INT
-   /sys/devices/cpu/events/PM_VSU_FSQRT_FDIV
-   /sys/devices/cpu/events/PM_MRK_LD_MISS_EXPOSED_CYC
-   /sys/devices/cpu/events/PM_LSU1_LDF
-   /sys/devices/cpu/events/PM_IC_WRITE_ALL
-   /sys/devices/cpu/events/PM_LSU0_SRQ_STFWD
-   /sys/devices/cpu/events/PM_PTEG_FROM_RL2L3_MOD
-   /sys/devices/cpu/events/PM_MRK_DATA_FROM_L31_SHR
-   /sys/devices/cpu/events/PM_DATA_FROM_L21_MOD
-   /sys/devices/cpu/events/PM_VSU1_SCAL_DOUBLE_ISSUED
-   /sys/devices/cpu/events/PM_VSU0_8FLOP
-   /sys/devices/cpu/events/PM_POWER_EVENT1
-   /sys/devices/cpu/events/PM_DISP_CLB_HELD_BAL
-   /sys/devices/cpu/events/PM_VSU1_2FLOP
-   /sys/devices/cpu/events/PM_LWSYNC_HELD
-   /sys/devices/cpu/events/PM_PTEG_FROM_DL2L3_SHR
-   /sys/devices/cpu/events/PM_INST_FROM_L21_MOD
-

[PATCH v4 00/10] Add support for parameterized events from sysfs

2014-09-24 Thread Sukadev Bhattiprolu
What this patchset does:

 - the first patch (override sysfs in tools/perf via SYSFS_PATH) was sent out
   previously, but needed a resend anyhow. Having it is useful for testing the
   later changes to tools/perf.
 - the second patch is a bugfix to the powerpc hv-24x7 code which was
   previously sent out, which is a good idea to have when testing these patches
   on POWER8 hardware.

 - document perf sysfs and the changes to add parameterized events
   - semi-notably: removes the growing list of specific POWER cpu events and
 begins documenting them generically, much like the docs for
 /sys/modules/MODULENAME do for modules.
 - tools/perf changes to support parameterized events
 - export some parameterized events from the powerpc pmus hv_24x7 and hv_gpci

Description of event parameters from the documentation patch:

Event parameters are a basic way for partial events to be specified in
sysfs with per-event names given to the fields that need to be filled in
when using a particular event.

It is intended for supporting cases where the single 'cpu' parameter is
insufficient. For example, POWER 8 has events for physical
sockets/cores/cpus that are accessible from with virtual machines. To
keep using the single 'cpu' parameter we'd need to perform a mapping
between Linux's cpus and the physical machine's cpus (in this case
Linux is running under a hypervisor). This isn't possible because
bindings between our cpus and physical cpus may not be fixed, and we
probably won't have a cpu on each physical cpu.

Description of the sysfs contents when events are parameterized (copied from an
included patch):

Examples:

domain=0x1,offset=0x8,starting_index=phys_cpu

In the case of the last example, a value replacing phys_cpu
would need to be provided by the user selecting the particular
event. This is refered to as event parameterization. All
non-numerical values indicate an event parameter.

Notes on how perf-list displays parameterized events (and how to use them,
again culled from an included patch):

PARAMETERIZED EVENTS


Some pmu events listed by 'perf-list' will be displayed with '?' in
them. For example:

  hv_gpci/dtbp_ptitc,phys_processor_idx=?/

This means that when provided as an event, a value for
phys_processor_idx must also be supplied. For example:

  perf stat -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ...

Changelog[v4]
- [Jiri Olsa] Rebase to perf/core tree (fix small merge conflict)

Changelog[v3]
- [Jiri Olsa] Changed the event parameters are specified. If
  event file specifes 'param=val' make the usage 'param=123'
  rather than 'val=123'. (patch 1,2/10)
- Shortened event names using PHYS and VCPU (patch 4/10)
- Print help message if invalid parameter is specified or required
  parameter is missing.
- Moved 3 patches that are unrelated to parametrized events into
  a separate patchset.
- Reordered patches so code changes come first.
Changelog[v2]
- [Joe Perches, David Laight] Use beNN_to_cpu() instead of guessing
  the size from type.
- Use kmem_cache_free() to free page allocated with kmem_cache_alloc().
- Rebase to recent kernel

Cody P Schafer (10):
  tools/perf: support parsing parameterized events
  tools/perf: extend format_alias() to include event parameters
  perf: provide sysfs_show for struct perf_pmu_events_attr
  powerpc/perf/hv-24x7: parse catalog and populate sysfs with events
  perf: add PMU_EVENT_ATTR_STRING() helper
  powerpc/perf/{hv-gpci,hv-common}: generate requests with counters
annotated
  powerpc/perf/hv-gpci: add the remaining gpci requests
  perf Documentation: add event parameters
  tools/perf: Document parameterized and symbolic events
  powerpc/perf/hv-24x7: Document sysfs event description entries

 .../testing/sysfs-bus-event_source-devices-events  |   6 +
 .../testing/sysfs-bus-event_source-devices-hv_24x7 |  22 +
 arch/powerpc/perf/hv-24x7-catalog.h|  25 +
 arch/powerpc/perf/hv-24x7-domains.h|  28 +
 arch/powerpc/perf/hv-24x7.c| 787 -
 arch/powerpc/perf/hv-24x7.h|  12 +-
 arch/powerpc/perf/hv-common.c  |  10 +-
 arch/powerpc/perf/hv-gpci-requests.h   | 262 +++
 arch/powerpc/perf/hv-gpci.c|   8 +
 arch/powerpc/perf/hv-gpci.h|  37 +-
 arch/powerpc/perf/req-gen/_begin.h |  13 +
 arch/powerpc/perf/req-gen/_clear.h |   5 +
 arch/powerpc/perf/req-gen/_end.h   |   4 +
 arch/powerpc/perf/req-gen/_request-begin.h |  15 +
 arch/powerpc/perf/req-gen/_request-end.h   |   8 +
 arch/powerpc/perf/req-gen/perf.h  

[PATCH v4 01/10] tools/perf: support parsing parameterized events

2014-09-24 Thread Sukadev Bhattiprolu
From: Cody P Schafer c...@linux.vnet.ibm.com

Enable event specification like:

pmu/event_name,param1=0x1,param2=0x4/

Assuming that

/sys/bus/event_source/devices/pmu/events/event_name

Contains something like

param2=foo,bar=1,param1=baz

Changelog[v4]:
[Jiri Olsa] Merge to recent perf-core and fix a small conflict.

Changelog[v3]:
[Jiri Olsa] If the sysfs event file specifies 'param=val', make the
usage 'hv_24x7/event,param=123/' rather than 'hv_24x7/event,val=123/'.

CC: Haren Myneni hb...@us.ibm.com
CC: Cody P Schafer d...@codyps.com
Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com

Conflicts:
tools/perf/util/pmu.c
---
 tools/perf/util/parse-events.h |  1 +
 tools/perf/util/pmu.c  | 65 +++---
 2 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index df094b4..9d7d2d5 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -59,6 +59,7 @@ struct parse_events_term {
int type_val;
int type_term;
struct list_head list;
+   bool used;
 };
 
 struct parse_events_evlist {
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 22a4ad5..67e59b9 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -511,31 +511,68 @@ static void pmu_format_value(unsigned long *format, __u64 
value, __u64 *v,
 }
 
 /*
+ * Term is a string term, and might be a param-term. Try to look up it's value
+ * in the remaining terms.
+ * - We have a term like base-or-format-term=param-term,
+ * - We need to find the value supplied for param-term (with param-term named
+ *   in a config string) later on in the term list.
+ */
+static int pmu_resolve_param_term(struct parse_events_term *term,
+ struct list_head *head_terms,
+ __u64 *value)
+{
+   struct parse_events_term *t;
+
+   list_for_each_entry(t, head_terms, list) {
+   if (t-type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+   if (!strcmp(t-config, term-config)) {
+   t-used = true;
+   *value = t-val.num;
+   return 0;
+   }
+   }
+   }
+
+   if (verbose)
+   printf(Required parameter '%s' not specified\n, term-config);
+
+   return -1;
+}
+
+/*
  * Setup one of config[12] attr members based on the
  * user input data - term parameter.
  */
 static int pmu_config_term(struct list_head *formats,
   struct perf_event_attr *attr,
   struct parse_events_term *term,
+  struct list_head *head_terms,
   bool zero)
 {
struct perf_pmu_format *format;
__u64 *vp;
+   __u64 val;
+
+   /*
+* If this is a parameter we've already used for parameterized-eval,
+* skip it in normal eval.
+*/
+   if (term-used)
+   return 0;
 
/*
-* Support only for hardcoded and numnerial terms.
 * Hardcoded terms should be already in, so nothing
 * to be done for them.
 */
if (parse_events__is_hardcoded_term(term))
return 0;
 
-   if (term-type_val != PARSE_EVENTS__TERM_TYPE_NUM)
-   return -EINVAL;
-
format = pmu_find_format(formats, term-config);
-   if (!format)
+   if (!format) {
+   if (verbose)
+   printf(Invalid event/parameter '%s'\n, term-config);
return -EINVAL;
+   }
 
switch (format-value) {
case PERF_PMU_FORMAT_VALUE_CONFIG:
@@ -552,11 +589,16 @@ static int pmu_config_term(struct list_head *formats,
}
 
/*
-* XXX If we ever decide to go with string values for
-* non-hardcoded terms, here's the place to translate
-* them into value.
+* Either directly use a numeric term, or try to translate string terms
+* using event parameters.
 */
-   pmu_format_value(format-bits, term-val.num, vp, zero);
+   if (term-type_val == PARSE_EVENTS__TERM_TYPE_NUM)
+   val = term-val.num;
+   else
+   if (pmu_resolve_param_term(term, head_terms, val))
+   return -EINVAL;
+
+   pmu_format_value(format-bits, val, vp, zero);
return 0;
 }
 
@@ -567,9 +609,10 @@ int perf_pmu__config_terms(struct list_head *formats,
 {
struct parse_events_term *term;
 
-   list_for_each_entry(term, head_terms, list)
-   if (pmu_config_term(formats, attr, term, zero))
+   list_for_each_entry(term, head_terms, list) {
+   if (pmu_config_term(formats, attr, term, head_terms, zero))
return 

[PATCH v4 02/10] tools/perf: extend format_alias() to include event parameters

2014-09-24 Thread Sukadev Bhattiprolu
From: Cody P Schafer c...@linux.vnet.ibm.com

This causes `perf list pmu` to show parameters for parameterized events
like follows:

  pmu/event_name,param1=?,param2=?/ [Kernel PMU event]

An example:

  
hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=?/
 [Kernel PMU event]

Changelog[v6]
[Jir Olsa] If the parameter for an event in sysfs is 'param=val',
have perf-list show the event as 'param=?' rather than 'val=?'.

CC: Haren Myneni hb...@us.ibm.com
CC: Cody P Schafer d...@codyps.com
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com
Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
---
 tools/perf/util/pmu.c | 26 +-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 67e59b9..a05dd9d 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -760,10 +760,33 @@ void perf_pmu__set_format(unsigned long *bits, long from, 
long to)
set_bit(b, bits);
 }
 
+static int sub_non_neg(int a, int b)
+{
+   if (b  a)
+   return 0;
+   return a - b;
+}
+
 static char *format_alias(char *buf, int len, struct perf_pmu *pmu,
  struct perf_pmu_alias *alias)
 {
-   snprintf(buf, len, %s/%s/, pmu-name, alias-name);
+   struct parse_events_term *term;
+   int used = snprintf(buf, len, %s/%s, pmu-name, alias-name);
+
+   list_for_each_entry(term, alias-terms, list)
+   if (term-type_val == PARSE_EVENTS__TERM_TYPE_STR)
+   used += snprintf(buf + used, sub_non_neg(len, used),
+   ,%s=?, term-config);
+
+   if (sub_non_neg(len, used)  0) {
+   buf[used] = '/';
+   used++;
+   }
+   if (sub_non_neg(len, used)  0) {
+   buf[used] = '\0';
+   used++;
+   } else
+   buf[len - 1] = '\0';
return buf;
 }
 
@@ -814,6 +837,7 @@ void print_pmu_events(const char *event_glob, bool 
name_only)
if (is_cpu  !name_only)
aliases[j] = format_alias_or(buf, sizeof(buf),
  pmu, alias);
+
aliases[j] = strdup(aliases[j]);
j++;
}
-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 04/10] powerpc/perf/hv-24x7: parse catalog and populate sysfs with events

2014-09-24 Thread Sukadev Bhattiprolu
From: Cody P Schafer c...@linux.vnet.ibm.com

Retrieves and parses the 24x7 catalog on POWER systems that supply it
(right now, only POWER 8). Events are exposed via sysfs in the standard
fashion, and are all parameterized.

Catalog is (at the moment) only parsed on boot. It needs re-parsing
when a some hypervisor events occur. At that point we'll also need to
prevent old events from continuing to function (counter that is passed
in via spare space in the config values?).

Changelog[v6]
[Sukadev Bhattiprolu] Use PHYS and VCPU in place of PHYSICAL and
VIRTUAL_PROCESSOR to shorten the names of the domains and hence,
events;

Changelog[v2]
[Joe Perches, David Laight] Use beNN_to_cpu() instead of guessing
the size from type.
Use kmem_cache_free() to free page allocated with kmem_cache_alloc().

CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
CC: Haren Myneni hb...@us.ibm.com
CC: Cody P Schafer d...@codyps.com
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com
---
 arch/powerpc/perf/hv-24x7-catalog.h |  25 ++
 arch/powerpc/perf/hv-24x7-domains.h |  28 ++
 arch/powerpc/perf/hv-24x7.c | 787 +++-
 arch/powerpc/perf/hv-24x7.h |  12 +-
 4 files changed, 838 insertions(+), 14 deletions(-)
 create mode 100644 arch/powerpc/perf/hv-24x7-domains.h

diff --git a/arch/powerpc/perf/hv-24x7-catalog.h 
b/arch/powerpc/perf/hv-24x7-catalog.h
index 21b19dd..69e2e1f 100644
--- a/arch/powerpc/perf/hv-24x7-catalog.h
+++ b/arch/powerpc/perf/hv-24x7-catalog.h
@@ -30,4 +30,29 @@ struct hv_24x7_catalog_page_0 {
__u8 reserved6[2];
 } __packed;
 
+struct hv_24x7_event_data {
+   __be16 length; /* in bytes, must be a multiple of 16 */
+   __u8 reserved1[2];
+   __u8 domain; /* Chip = 1, Core = 2 */
+   __u8 reserved2[1];
+   __be16 event_group_record_offs; /* in bytes, must be 8 byte aligned */
+   __be16 event_group_record_len; /* in bytes */
+
+   /* in bytes, offset from event_group_record */
+   __be16 event_counter_offs;
+
+   /* verified_state, unverified_state, caveat_state, broken_state, ... */
+   __be32 flags;
+
+   __be16 primary_group_ix;
+   __be16 group_count;
+   __be16 event_name_len;
+   __u8 remainder[];
+   /* __u8 event_name[event_name_len - 2]; */
+   /* __be16 event_description_len; */
+   /* __u8 event_desc[event_description_len - 2]; */
+   /* __be16 detailed_desc_len; */
+   /* __u8 detailed_desc[detailed_desc_len - 2]; */
+} __packed;
+
 #endif
diff --git a/arch/powerpc/perf/hv-24x7-domains.h 
b/arch/powerpc/perf/hv-24x7-domains.h
new file mode 100644
index 000..49c1efd
--- /dev/null
+++ b/arch/powerpc/perf/hv-24x7-domains.h
@@ -0,0 +1,28 @@
+
+/*
+ * DOMAIN(name, num, index_kind, is_physical)
+ *
+ * @name:  An all caps token, suitable for use in generating an enum
+ * member and appending to an event name in sysfs.
+ *
+ * @num:   The number corresponding to the domain as given in
+ * documentation. We assume the catalog domain and the hcall
+ * domain have the same numbering (so far they do), but this
+ * may need to be changed in the future.
+ *
+ * @index_kind: A stringifiable token describing the meaning of the index
+ * within the given domain. Must fit the parsing rules of the
+ * perf sysfs api.
+ *
+ * @is_physical: True if the domain is physical, false otherwise (if virtual).
+ *
+ * Note: The terms PHYS_CHIP, PHYS_CORE, VCPU correspond to physical chip,
+ *  physical core and virtual processor in 24x7 Counters specifications.
+ */
+
+DOMAIN(PHYS_CHIP, 0x01, chip, true)
+DOMAIN(PHYS_CORE, 0x02, core, true)
+DOMAIN(VCPU_HOME_CORE, 0x03, vcpu, false)
+DOMAIN(VCPU_HOME_CHIP, 0x04, vcpu, false)
+DOMAIN(VCPU_HOME_NODE, 0x05, vcpu, false)
+DOMAIN(VCPU_REMOTE_NODE, 0x06, vcpu, false)
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 9427ef7..f13e2ea 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -13,16 +13,80 @@
 #define pr_fmt(fmt) hv-24x7:  fmt
 
 #include linux/perf_event.h
+#include linux/rbtree.h
 #include linux/module.h
 #include linux/slab.h
+#include linux/vmalloc.h
+
 #include asm/firmware.h
 #include asm/hvcall.h
 #include asm/io.h
+#include linux/byteorder/generic.h
 
 #include hv-24x7.h
 #include hv-24x7-catalog.h
 #include hv-common.h
 
+static const char *domain_to_index_string(unsigned domain)
+{
+   switch (domain) {
+#define DOMAIN(n, v, x, c) \
+   case HV_PERF_DOMAIN_##n:\
+   return #x;
+#include hv-24x7-domains.h
+#undef DOMAIN
+   default:
+   WARN(1, unknown domain %d\n, domain);
+   return UNKNOWN_DOMAIN_INDEX_STRING;
+   }
+}
+
+static const char *event_domain_suffix(unsigned domain)
+{
+   switch (domain) {
+#define DOMAIN(n, v, x, c) \
+   case 

[PATCH v4 03/10] perf: provide sysfs_show for struct perf_pmu_events_attr

2014-09-24 Thread Sukadev Bhattiprolu
From: Cody P Schafer c...@linux.vnet.ibm.com

(struct perf_pmu_events_attr) is defined in include/linux/perf_event.h,
but the only show for it is in x86 and contains x86 specific stuff.

Make a generic one for those of us who are just using the event_str.

CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
CC: Haren Myneni hb...@us.ibm.com
CC: Cody P Schafer d...@codyps.com
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com
---
 include/linux/perf_event.h | 3 +++
 kernel/events/core.c   | 8 
 2 files changed, 11 insertions(+)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 893a0d0..da442a0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -894,6 +894,9 @@ struct perf_pmu_events_attr {
const char *event_str;
 };
 
+ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute 
*attr,
+ char *page);
+
 #define PMU_EVENT_ATTR(_name, _var, _id, _show)
\
 static struct perf_pmu_events_attr _var = {\
.attr = __ATTR(_name, 0444, _show, NULL),   \
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 733c616..9ef60e5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8210,6 +8210,14 @@ void __init perf_event_init(void)
 != 1024);
 }
 
+ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute 
*attr,
+ char *page)
+{
+   struct perf_pmu_events_attr *pmu_attr =
+   container_of(attr, struct perf_pmu_events_attr, attr);
+   return sprintf(page, %s\n, pmu_attr-event_str);
+}
+
 static int __init perf_event_sysfs_init(void)
 {
struct pmu *pmu;
-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 05/10] perf: add PMU_EVENT_ATTR_STRING() helper

2014-09-24 Thread Sukadev Bhattiprolu
From: Cody P Schafer c...@linux.vnet.ibm.com

Helper for constructing static struct perf_pmu_events_attr s.

CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
CC: Haren Myneni hb...@us.ibm.com
CC: Cody P Schafer d...@codyps.com
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com
---
 include/linux/perf_event.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index da442a0..4840c7f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -903,6 +903,13 @@ static struct perf_pmu_events_attr _var = {
\
.id   =  _id,   \
 };
 
+#define PMU_EVENT_ATTR_STRING(_name, _var, _value) \
+static struct perf_pmu_events_attr _var = {\
+   .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL),   \
+   .event_str = _value,\
+};
+
+
 #define PMU_FORMAT_ATTR(_name, _format)
\
 static ssize_t \
 _name##_show(struct device *dev,   \
-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 06/10] powerpc/perf/{hv-gpci, hv-common}: generate requests with counters annotated

2014-09-24 Thread Sukadev Bhattiprolu
From: Cody P Schafer c...@linux.vnet.ibm.com

This adds (in req-gen/) a framework for defining gpci counter requests.
It uses macro magic similar to ftrace.

Also convert the existing hv-gpci request structures and enum values to
use the new framework (and adjust old users of the structs and enum
values to cope with changes in naming).

In exchange for this macro disaster, we get autogenerated event listing
for GPCI in sysfs, build time field offset checking, and zero
duplication of information about GPCI requests.

CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
CC: Haren Myneni hb...@us.ibm.com
CC: Cody P Schafer d...@codyps.com
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com
---
 arch/powerpc/perf/hv-common.c  |  10 +-
 arch/powerpc/perf/hv-gpci-requests.h   |  79 +++
 arch/powerpc/perf/hv-gpci.c|   8 ++
 arch/powerpc/perf/hv-gpci.h|  37 +++
 arch/powerpc/perf/req-gen/_begin.h |  13 +++
 arch/powerpc/perf/req-gen/_clear.h |   5 +
 arch/powerpc/perf/req-gen/_end.h   |   4 +
 arch/powerpc/perf/req-gen/_request-begin.h |  15 +++
 arch/powerpc/perf/req-gen/_request-end.h   |   8 ++
 arch/powerpc/perf/req-gen/perf.h   | 155 +
 10 files changed, 304 insertions(+), 30 deletions(-)
 create mode 100644 arch/powerpc/perf/hv-gpci-requests.h
 create mode 100644 arch/powerpc/perf/req-gen/_begin.h
 create mode 100644 arch/powerpc/perf/req-gen/_clear.h
 create mode 100644 arch/powerpc/perf/req-gen/_end.h
 create mode 100644 arch/powerpc/perf/req-gen/_request-begin.h
 create mode 100644 arch/powerpc/perf/req-gen/_request-end.h
 create mode 100644 arch/powerpc/perf/req-gen/perf.h

diff --git a/arch/powerpc/perf/hv-common.c b/arch/powerpc/perf/hv-common.c
index 47e02b3..7dce8f10 100644
--- a/arch/powerpc/perf/hv-common.c
+++ b/arch/powerpc/perf/hv-common.c
@@ -9,13 +9,13 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps)
unsigned long r;
struct p {
struct hv_get_perf_counter_info_params params;
-   struct cv_system_performance_capabilities caps;
+   struct hv_gpci_system_performance_capabilities caps;
} __packed __aligned(sizeof(uint64_t));
 
struct p arg = {
.params = {
.counter_request = cpu_to_be32(
-   CIR_SYSTEM_PERFORMANCE_CAPABILITIES),
+   HV_GPCI_system_performance_capabilities),
.starting_index = cpu_to_be32(-1),
.counter_info_version_in = 0,
}
@@ -31,9 +31,9 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps)
 
caps-version = arg.params.counter_info_version_out;
caps-collect_privileged = !!arg.caps.perf_collect_privileged;
-   caps-ga = !!(arg.caps.capability_mask  CV_CM_GA);
-   caps-expanded = !!(arg.caps.capability_mask  CV_CM_EXPANDED);
-   caps-lab = !!(arg.caps.capability_mask  CV_CM_LAB);
+   caps-ga = !!(arg.caps.capability_mask  HV_GPCI_CM_GA);
+   caps-expanded = !!(arg.caps.capability_mask  HV_GPCI_CM_EXPANDED);
+   caps-lab = !!(arg.caps.capability_mask  HV_GPCI_CM_LAB);
 
return r;
 }
diff --git a/arch/powerpc/perf/hv-gpci-requests.h 
b/arch/powerpc/perf/hv-gpci-requests.h
new file mode 100644
index 000..0dfc4d9
--- /dev/null
+++ b/arch/powerpc/perf/hv-gpci-requests.h
@@ -0,0 +1,79 @@
+
+#include req-gen/_begin.h
+
+/*
+ * Based on the document getPerfCountInfo v1.07
+ */
+
+/* this needs to be -1 encoded in hex suitable for parsing by tools/perf. */
+#define M1 0x
+
+/*
+ * #define REQUEST_NAME counter_request_name
+ * #define REQUEST_NUM r_num
+ * #define REQUEST_IDX_KIND starting_index_kind
+ * #include I(REQUEST_BEGIN)
+ * REQUEST(
+ * __field(...)
+ * __field(...)
+ * __array(...)
+ * __count(...)
+ * )
+ * #include I(REQUEST_END)
+ *
+ * - starting_index_kind is one of:
+ *   M1: must be -1
+ *   chip_id: hardware chip id or -1 for current hw chip
+ *   phys_processor_idx:
+ *
+ * __count(offset, bytes, name):
+ * a counter that should be exposed via perf
+ * __field(offset, bytes, name)
+ * a normal field
+ * __array(offset, bytes, name)
+ * an array of bytes
+ *
+ *
+ * @bytes for __count, and __field _must_ be a numeral token
+ * in decimal, not an expression and not in hex.
+ *
+ *
+ * TODO:
+ * - expose secondary index (if any counter ever uses it, only 0xA0
+ *   appears to use it right now, and it doesn't have any counters)
+ * - embed versioning info
+ * - include counter descriptions
+ */
+#define REQUEST_NAME dispatch_timebase_by_processor
+#define REQUEST_NUM 0x10
+#define REQUEST_IDX_KIND phys_processor_idx
+#include I(REQUEST_BEGIN)
+REQUEST(__count(0, 8,  processor_time_in_timebase_cycles)
+   __field(0x8,4,  hw_processor_id)
+   __field(0xC,2,

[PATCH v4 07/10] powerpc/perf/hv-gpci: add the remaining gpci requests

2014-09-24 Thread Sukadev Bhattiprolu
From: Cody P Schafer c...@linux.vnet.ibm.com

Add the remaining gpci requests that contain counters suitable for use
by perf. Omit those that don't contain any counters (but note their
ommision).

CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
CC: Haren Myneni hb...@us.ibm.com
CC: Cody P Schafer d...@codyps.com
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com
---
 arch/powerpc/perf/hv-gpci-requests.h | 183 +++
 1 file changed, 183 insertions(+)

diff --git a/arch/powerpc/perf/hv-gpci-requests.h 
b/arch/powerpc/perf/hv-gpci-requests.h
index 0dfc4d9..a908b08 100644
--- a/arch/powerpc/perf/hv-gpci-requests.h
+++ b/arch/powerpc/perf/hv-gpci-requests.h
@@ -65,6 +65,33 @@ REQUEST(__count(0,   8,  
processor_time_in_timebase_cycles)
 )
 #include I(REQUEST_END)
 
+#define REQUEST_NAME 
entitled_capped_uncapped_donated_idle_timebase_by_partition
+#define REQUEST_NUM 0x20
+#define REQUEST_IDX_KIND sibling_part_id
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 8,  partition_id)
+   __count(0x8,8,  entitled_cycles)
+   __count(0x10,   8,  consumed_capped_cycles)
+   __count(0x18,   8,  consumed_uncapped_cycles)
+   __count(0x20,   8,  cycles_donated)
+   __count(0x28,   8,  purr_idle_cycles)
+)
+#include I(REQUEST_END)
+
+/*
+ * Not avaliable for counter_info_version = 0x8, use
+ * run_instruction_cycles_by_partition(0x100) instead.
+ */
+#define REQUEST_NAME run_instructions_run_cycles_by_partition
+#define REQUEST_NUM 0x30
+#define REQUEST_IDX_KIND sibling_part_id
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 8,  partition_id)
+   __count(0x8,8,  instructions_completed)
+   __count(0x10,   8,  cycles)
+)
+#include I(REQUEST_END)
+
 #define REQUEST_NAME system_performance_capabilities
 #define REQUEST_NUM 0x40
 #define REQUEST_IDX_KIND M1
@@ -75,5 +102,161 @@ REQUEST(__field(0, 1,  perf_collect_privileged)
 )
 #include I(REQUEST_END)
 
+#define REQUEST_NAME processor_bus_utilization_abc_links
+#define REQUEST_NUM 0x50
+#define REQUEST_IDX_KIND hw_chip_id
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 4,  hw_chip_id)
+   __array(0x4,0xC,reserved1)
+   __count(0x10,   8,  total_link_cycles)
+   __count(0x18,   8,  idle_cycles_for_a_link)
+   __count(0x20,   8,  idle_cycles_for_b_link)
+   __count(0x28,   8,  idle_cycles_for_c_link)
+   __array(0x30,   0x20,   reserved2)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME processor_bus_utilization_wxyz_links
+#define REQUEST_NUM 0x60
+#define REQUEST_IDX_KIND hw_chip_id
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 4,  hw_chip_id)
+   __array(0x4,0xC,reserved1)
+   __count(0x10,   8,  total_link_cycles)
+   __count(0x18,   8,  idle_cycles_for_w_link)
+   __count(0x20,   8,  idle_cycles_for_x_link)
+   __count(0x28,   8,  idle_cycles_for_y_link)
+   __count(0x30,   8,  idle_cycles_for_z_link)
+   __array(0x38,   0x28,   reserved2)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME processor_bus_utilization_gx_links
+#define REQUEST_NUM 0x70
+#define REQUEST_IDX_KIND hw_chip_id
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 4,  hw_chip_id)
+   __array(0x4,0xC,reserved1)
+   __count(0x10,   8,  gx0_in_address_cycles)
+   __count(0x18,   8,  gx0_in_data_cycles)
+   __count(0x20,   8,  gx0_in_retries)
+   __count(0x28,   8,  gx0_in_bus_cycles)
+   __count(0x30,   8,  gx0_in_cycles_total)
+   __count(0x38,   8,  gx0_out_address_cycles)
+   __count(0x40,   8,  gx0_out_data_cycles)
+   __count(0x48,   8,  gx0_out_retries)
+   __count(0x50,   8,  gx0_out_bus_cycles)
+   __count(0x58,   8,  gx0_out_cycles_total)
+   __count(0x60,   8,  gx1_in_address_cycles)
+   __count(0x68,   8,  gx1_in_data_cycles)
+   __count(0x70,   8,  gx1_in_retries)
+   __count(0x78,   8,  gx1_in_bus_cycles)
+   __count(0x80,   8,  gx1_in_cycles_total)
+   __count(0x88,   8,  gx1_out_address_cycles)
+   __count(0x90,   8,  gx1_out_data_cycles)
+   __count(0x98,   8,  gx1_out_retries)
+   __count(0xA0,   8,  gx1_out_bus_cycles)
+   __count(0xA8,   8,  gx1_out_cycles_total)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME processor_bus_utilization_mc_links
+#define REQUEST_NUM 0x80
+#define REQUEST_IDX_KIND hw_chip_id
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 4,  hw_chip_id)
+   __array(0x4,0xC,reserved1)
+   __count(0x10,   8,  mc0_frames)
+   __count(0x18,   8,  mc0_reads)
+   __count(0x20,   8,  mc0_write)
+   __count(0x28,   8,  mc0_total_cycles)
+   __count(0x30,   8,  mc1_frames)
+   __count(0x38,   8,  mc1_reads)
+   __count(0x40,   8,  mc1_writes)
+   __count(0x48,   8,  

[PATCH v4 09/10] tools/perf: Document parameterized and symbolic events

2014-09-24 Thread Sukadev Bhattiprolu
From: Cody P Schafer c...@linux.vnet.ibm.com

Changelog[v6]:
- [Sukadev Bhattiprolu]: Update documentation of perf-list and
  perf-record; Added documentation for perf-stat.

CC: Haren Myneni hb...@us.ibm.com
CC: Cody P Schafer d...@codyps.com
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com
Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
---
 tools/perf/Documentation/perf-list.txt   | 13 +
 tools/perf/Documentation/perf-record.txt | 12 
 tools/perf/Documentation/perf-stat.txt   | 20 
 3 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-list.txt 
b/tools/perf/Documentation/perf-list.txt
index 6fce6a6..c405da24 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -89,6 +89,19 @@ raw encoding of 0x1A8 can be used:
 You should refer to the processor specific documentation for getting these
 details. Some of them are referenced in the SEE ALSO section below.
 
+PARAMETERIZED EVENTS
+
+
+Some pmu events listed by 'perf-list' will be displayed with '?' in them. For
+example:
+
+  hv_gpci/dtbp_ptitc,starting_index=?/
+
+This means that when provided as an event, a value for 'starting_index' must
+also be supplied. For example:
+
+  perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,starting_index=0x2/' ...
+
 OPTIONS
 ---
 
diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index d460049..a6a2f9c 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -33,6 +33,18 @@ OPTIONS
 - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
  hexadecimal event descriptor.
 
+   - a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where
+ 'param1', 'param2', etc are defined as formats for the PMU in
+ /sys/bus/event_sources/devices/pmu/format/*.
+
+   - a symbolically formed event like 'pmu/config=M,config1=N,config3=K/'
+
+  where M, N, K are numbers (in decimal, hex, octal format). Acceptable
+  values for each of 'config', 'config1' and 'config2' are defined by
+  corresponding entries in 
/sys/bus/event_sources/devices/pmu/format/*
+  param1 and param2 are defined as formats for the PMU in:
+ /sys/bus/event_sources/devices/pmu/format/*
+
 - a hardware breakpoint event in the form of '\mem:addr[:access]'
   where addr is the address in memory you want to break in.
   Access is the memory access type (read, write, execute) it can
diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 29ee857..04e150d 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -25,10 +25,22 @@ OPTIONS
 
 -e::
 --event=::
-   Select the PMU event. Selection can be a symbolic event name
-   (use 'perf list' to list all events) or a raw PMU
-   event (eventsel+umask) in the form of rNNN where NNN is a
-hexadecimal event descriptor.
+   Select the PMU event. Selection can be:
+
+   - a symbolic event name (use 'perf list' to list all events)
+
+   - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
+ hexadecimal event descriptor.
+
+   - a symbolically formed event like 'pmu/param1=0x3,param2/' where
+ param1 and param2 are defined as formats for the PMU in
+ /sys/bus/event_sources/devices/pmu/format/*
+
+   - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/'
+ where M, N, K are numbers (in decimal, hex, octal format).
+ Acceptable values for each of 'config', 'config1' and 'config2'
+ parameters are defined by corresponding entries in
+ /sys/bus/event_sources/devices/pmu/format/*
 
 -i::
 --no-inherit::
-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 08/10] perf Documentation: add event parameters

2014-09-24 Thread Sukadev Bhattiprolu
From: Cody P Schafer c...@linux.vnet.ibm.com

Event parameters are a basic way for partial events to be specified in
sysfs with per-event names given to the fields that need to be filled in
when using a particular event.

It is intended for supporting cases where the single 'cpu' parameter is
insufficient. For example, POWER 8 has events for physical
sockets/cores/cpus that are accessible from with virtual machines. To
keep using the single 'cpu' parameter we'd need to perform a mapping
between Linux's cpus and the physical machine's cpus (in this case
Linux is running under a hypervisor). This isn't possible because
bindings between our cpus and physical cpus may not be fixed, and we
probably won't have a cpu on each physical cpu.

CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
CC: Haren Myneni hb...@us.ibm.com
CC: Cody P Schafer d...@codyps.com
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com
---
 Documentation/ABI/testing/sysfs-bus-event_source-devices-events | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events 
b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
index 20979f8..c1f9850 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
@@ -52,12 +52,18 @@ Description:Per-pmu performance monitoring events 
specific to the running syste
event=0x2abc
event=0x423,inv,cmask=0x3
domain=0x1,offset=0x8,starting_index=0x
+   domain=0x1,offset=0x8,starting_index=phys_cpu
 
Each of the assignments indicates a value to be assigned to a
particular set of bits (as defined by the format file
corresponding to the term) in the perf_event structure passed
to the perf_open syscall.
 
+   In the case of the last example, a value replacing phys_cpu
+   would need to be provided by the user selecting the particular
+   event. This is refered to as event parameterization. All
+   non-numerical values indicate an event parameter.
+
 What: /sys/bus/event_source/devices/pmu/events/event.unit
 Date: 2014/02/24
 Contact:   Linux kernel mailing list linux-ker...@vger.kernel.org
-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 10/10] powerpc/perf/hv-24x7: Document sysfs event description entries

2014-09-24 Thread Sukadev Bhattiprolu
From: Cody P Schafer c...@linux.vnet.ibm.com

CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
CC: Haren Myneni hb...@us.ibm.com
CC: Cody P Schafer d...@codyps.com
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com
---
 .../testing/sysfs-bus-event_source-devices-hv_24x7 | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 
b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
index e78ee79..5b501d7 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
@@ -21,3 +21,25 @@ Contact: Cody P Schafer c...@linux.vnet.ibm.com
 Description:
Exposes the version field of the 24x7 catalog. This is also
extractable from the provided binary catalog sysfs entry.
+
+What:  /sys/bus/event_source/devices/hv_24x7/event_descs/event-name
+Date:  February 2014
+Contact:   Cody P Schafer c...@linux.vnet.ibm.com
+Description:
+   Provides the description of a particular event as provided by
+   the firmware. If firmware does not provide a description, no
+   file will be created.
+
+   Note that the event-name lacks the domain suffix appended for
+   events in the events/ dir.
+
+What:  
/sys/bus/event_source/devices/hv_24x7/event_long_descs/event-name
+Date:  February 2014
+Contact:   Cody P Schafer c...@linux.vnet.ibm.com
+Description:
+   Provides the long description of a particular event as
+   provided by the firmware. If firmware does not provide a
+   description, no file will be created.
+
+   Note that the event-name lacks the domain suffix appended for
+   events in the events/ dir.
-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 1/3] pseries: Make CPU hotplug path endian safe

2014-09-24 Thread Nathan Fontenot
On 09/16/2014 03:15 PM, Thomas Falcon wrote:
 From: Bharata B Rao bhar...@linux.vnet.ibm.com
 
 - ibm,rtas-configure-connector should treat the RTAS data as big endian.
 - Treat ibm,ppc-interrupt-server#s as big-endian when setting
   smp_processor_id during hotplug.
 
 Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com
 Signed-off-by: Thomas Falcon tlfal...@linux.vnet.ibm.com

Acked-by: Nathan Fontenot nf...@linux.vnet.ibm.com

 ---
 
 Changes in v2:
 - Don't convert drc_index to BE in dlpar_configure_connector() but instead
   convert in the caller dlpar_cpu_probe() so that migration path isn't
   affected.
 - Mark members of cc_workarea struct as __be32 instead of u32 (Thomas)
 - Based on top of Thomas Falcon's two patches.
   (http://patchwork.ozlabs.org/patch/388767/)
 
 v1: http://patchwork.ozlabs.org/patch/386216/
 
  arch/powerpc/platforms/pseries/dlpar.c   | 22 +++---
  arch/powerpc/platforms/pseries/hotplug-cpu.c |  4 ++--
  arch/powerpc/platforms/pseries/pseries.h |  3 ++-
  3 files changed, 15 insertions(+), 14 deletions(-)
 
 diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
 b/arch/powerpc/platforms/pseries/dlpar.c
 index a2450b8..5acbe59 100644
 --- a/arch/powerpc/platforms/pseries/dlpar.c
 +++ b/arch/powerpc/platforms/pseries/dlpar.c
 @@ -24,11 +24,11 @@
  #include asm/rtas.h
  
  struct cc_workarea {
 - u32 drc_index;
 - u32 zero;
 - u32 name_offset;
 - u32 prop_length;
 - u32 prop_offset;
 + __be32  drc_index;
 + __be32  zero;
 + __be32  name_offset;
 + __be32  prop_length;
 + __be32  prop_offset;
  };
  
  void dlpar_free_cc_property(struct property *prop)
 @@ -48,11 +48,11 @@ static struct property *dlpar_parse_cc_property(struct 
 cc_workarea *ccwa)
   if (!prop)
   return NULL;
  
 - name = (char *)ccwa + ccwa-name_offset;
 + name = (char *)ccwa + be32_to_cpu(ccwa-name_offset);
   prop-name = kstrdup(name, GFP_KERNEL);
  
 - prop-length = ccwa-prop_length;
 - value = (char *)ccwa + ccwa-prop_offset;
 + prop-length = be32_to_cpu(ccwa-prop_length);
 + value = (char *)ccwa + be32_to_cpu(ccwa-prop_offset);
   prop-value = kmemdup(value, prop-length, GFP_KERNEL);
   if (!prop-value) {
   dlpar_free_cc_property(prop);
 @@ -78,7 +78,7 @@ static struct device_node *dlpar_parse_cc_node(struct 
 cc_workarea *ccwa,
   if (!dn)
   return NULL;
  
 - name = (char *)ccwa + ccwa-name_offset;
 + name = (char *)ccwa + be32_to_cpu(ccwa-name_offset);
   dn-full_name = kasprintf(GFP_KERNEL, %s/%s, path, name);
   if (!dn-full_name) {
   kfree(dn);
 @@ -125,7 +125,7 @@ void dlpar_free_cc_nodes(struct device_node *dn)
  #define CALL_AGAIN   -2
  #define ERR_CFG_USE -9003
  
 -struct device_node *dlpar_configure_connector(u32 drc_index,
 +struct device_node *dlpar_configure_connector(__be32 drc_index,
 struct device_node *parent)
  {
   struct device_node *dn;
 @@ -411,7 +411,7 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t 
 count)
   if (!parent)
   return -ENODEV;
  
 - dn = dlpar_configure_connector(drc_index, parent);
 + dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
   if (!dn)
   return -EINVAL;
  
 diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
 b/arch/powerpc/platforms/pseries/hotplug-cpu.c
 index 20d6297..447f8c6 100644
 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
 +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
 @@ -247,7 +247,7 @@ static int pseries_add_processor(struct device_node *np)
   unsigned int cpu;
   cpumask_var_t candidate_mask, tmp;
   int err = -ENOSPC, len, nthreads, i;
 - const u32 *intserv;
 + const __be32 *intserv;
  
   intserv = of_get_property(np, ibm,ppc-interrupt-server#s, len);
   if (!intserv)
 @@ -293,7 +293,7 @@ static int pseries_add_processor(struct device_node *np)
   for_each_cpu(cpu, tmp) {
   BUG_ON(cpu_present(cpu));
   set_cpu_present(cpu, true);
 - set_hard_smp_processor_id(cpu, *intserv++);
 + set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
   }
   err = 0;
  out_unlock:
 diff --git a/arch/powerpc/platforms/pseries/pseries.h 
 b/arch/powerpc/platforms/pseries/pseries.h
 index 361add6..1796c54 100644
 --- a/arch/powerpc/platforms/pseries/pseries.h
 +++ b/arch/powerpc/platforms/pseries/pseries.h
 @@ -56,7 +56,8 @@ extern void hvc_vio_init_early(void);
  /* Dynamic logical Partitioning/Mobility */
  extern void dlpar_free_cc_nodes(struct device_node *);
  extern void dlpar_free_cc_property(struct property *);
 -extern struct device_node *dlpar_configure_connector(u32, struct device_node 
 *);
 +extern struct device_node *dlpar_configure_connector(__be32,
 + struct device_node *);
  extern 

Re: [PATCH v2 2/3] pseries: Fix endian issues in onlining cpu threads

2014-09-24 Thread Nathan Fontenot
On 09/16/2014 03:15 PM, Thomas Falcon wrote:
 The ibm,ppc-interrupt-server#s property is in big endian format.
 These values need to be converted when used by little endian
 architectures.
 
 Signed-off-by: Thomas Falcon tlfal...@linux.vnet.ibm.com

Acked-by: Nathan Fontenot nf...@linux.vnet.ibm.com

 ---
 Changes in v2:
 
  Followed suggestions from Michael Ellerman
conversion of intserv values occur once
 ---
  arch/powerpc/platforms/pseries/dlpar.c | 8 +---
  1 file changed, 5 insertions(+), 3 deletions(-)
 
 diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
 b/arch/powerpc/platforms/pseries/dlpar.c
 index 5acbe59..187e4eb 100644
 --- a/arch/powerpc/platforms/pseries/dlpar.c
 +++ b/arch/powerpc/platforms/pseries/dlpar.c
 @@ -363,7 +363,8 @@ static int dlpar_online_cpu(struct device_node *dn)
   int rc = 0;
   unsigned int cpu;
   int len, nthreads, i;
 - const u32 *intserv;
 + const __be32 *intserv;
 + u32 thread;
  
   intserv = of_get_property(dn, ibm,ppc-interrupt-server#s, len);
   if (!intserv)
 @@ -373,8 +374,9 @@ static int dlpar_online_cpu(struct device_node *dn)
  
   cpu_maps_update_begin();
   for (i = 0; i  nthreads; i++) {
 + thread = be32_to_cpu(intserv[i]);
   for_each_present_cpu(cpu) {
 - if (get_hard_smp_processor_id(cpu) != intserv[i])
 + if (get_hard_smp_processor_id(cpu) != thread)
   continue;
   BUG_ON(get_cpu_current_state(cpu)
   != CPU_STATE_OFFLINE);
 @@ -388,7 +390,7 @@ static int dlpar_online_cpu(struct device_node *dn)
   }
   if (cpu == num_possible_cpus())
   printk(KERN_WARNING Could not find cpu to online 
 -with physical id 0x%x\n, intserv[i]);
 +with physical id 0x%x\n, thread);
   }
   cpu_maps_update_done();
  
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 3/3] pseries: Fix endian issues in cpu hot-removal

2014-09-24 Thread Nathan Fontenot
On 09/16/2014 03:15 PM, Thomas Falcon wrote:
 When removing a cpu, this patch makes sure that values
 gotten from or passed to firmware are in the correct
 endian format.
 
 Signed-off-by: Thomas Falcon tlfal...@linux.vnet.ibm.com

Acked-by: Nathan Fontenot nf...@linux.vnet.ibm.com

 ---
 Changes in v3:
 
  drc_index in dlpar_cpu_release is no longer const to
  fix compilation error found by Bharata Rao
 ---
  arch/powerpc/platforms/pseries/dlpar.c   | 20 +++-
  arch/powerpc/platforms/pseries/hotplug-cpu.c | 10 ++
  2 files changed, 17 insertions(+), 13 deletions(-)
 
 diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
 b/arch/powerpc/platforms/pseries/dlpar.c
 index 187e4eb..0fad5b6 100644
 --- a/arch/powerpc/platforms/pseries/dlpar.c
 +++ b/arch/powerpc/platforms/pseries/dlpar.c
 @@ -444,7 +444,8 @@ static int dlpar_offline_cpu(struct device_node *dn)
   int rc = 0;
   unsigned int cpu;
   int len, nthreads, i;
 - const u32 *intserv;
 + const __be32 *intserv;
 + u32 thread;
  
   intserv = of_get_property(dn, ibm,ppc-interrupt-server#s, len);
   if (!intserv)
 @@ -454,8 +455,9 @@ static int dlpar_offline_cpu(struct device_node *dn)
  
   cpu_maps_update_begin();
   for (i = 0; i  nthreads; i++) {
 + thread = be32_to_cpu(intserv[i]);
   for_each_present_cpu(cpu) {
 - if (get_hard_smp_processor_id(cpu) != intserv[i])
 + if (get_hard_smp_processor_id(cpu) != thread)
   continue;
  
   if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
 @@ -477,14 +479,14 @@ static int dlpar_offline_cpu(struct device_node *dn)
* Upgrade it's state to CPU_STATE_OFFLINE.
*/
   set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
 - BUG_ON(plpar_hcall_norets(H_PROD, intserv[i])
 + BUG_ON(plpar_hcall_norets(H_PROD, thread)
   != H_SUCCESS);
   __cpu_die(cpu);
   break;
   }
   if (cpu == num_possible_cpus())
   printk(KERN_WARNING Could not find cpu to offline 
 -with physical id 0x%x\n, intserv[i]);
 +with physical id 0x%x\n, thread);
   }
   cpu_maps_update_done();
  
 @@ -496,15 +498,15 @@ out:
  static ssize_t dlpar_cpu_release(const char *buf, size_t count)
  {
   struct device_node *dn;
 - const u32 *drc_index;
 + u32 drc_index;
   int rc;
  
   dn = of_find_node_by_path(buf);
   if (!dn)
   return -EINVAL;
  
 - drc_index = of_get_property(dn, ibm,my-drc-index, NULL);
 - if (!drc_index) {
 + rc = of_property_read_u32(dn, ibm,my-drc-index, drc_index);
 + if (rc) {
   of_node_put(dn);
   return -EINVAL;
   }
 @@ -515,7 +517,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t 
 count)
   return -EINVAL;
   }
  
 - rc = dlpar_release_drc(*drc_index);
 + rc = dlpar_release_drc(drc_index);
   if (rc) {
   of_node_put(dn);
   return rc;
 @@ -523,7 +525,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t 
 count)
  
   rc = dlpar_detach_node(dn);
   if (rc) {
 - dlpar_acquire_drc(*drc_index);
 + dlpar_acquire_drc(drc_index);
   return rc;
   }
  
 diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
 b/arch/powerpc/platforms/pseries/hotplug-cpu.c
 index 447f8c6..5c375f9 100644
 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
 +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
 @@ -90,7 +90,7 @@ static void rtas_stop_self(void)
  {
   static struct rtas_args args = {
   .nargs = 0,
 - .nret = 1,
 + .nret = cpu_to_be32(1),
   .rets = args.args[0],
   };
  
 @@ -312,7 +312,8 @@ static void pseries_remove_processor(struct device_node 
 *np)
  {
   unsigned int cpu;
   int len, nthreads, i;
 - const u32 *intserv;
 + const __be32 *intserv;
 + u32 thread;
  
   intserv = of_get_property(np, ibm,ppc-interrupt-server#s, len);
   if (!intserv)
 @@ -322,8 +323,9 @@ static void pseries_remove_processor(struct device_node 
 *np)
  
   cpu_maps_update_begin();
   for (i = 0; i  nthreads; i++) {
 + thread = be32_to_cpu(intserv[i]);
   for_each_present_cpu(cpu) {
 - if (get_hard_smp_processor_id(cpu) != intserv[i])
 + if (get_hard_smp_processor_id(cpu) != thread)
   continue;
   BUG_ON(cpu_online(cpu));
   set_cpu_present(cpu, false);
 @@ -332,7 +334,7 @@ static void pseries_remove_processor(struct device_node 
 *np)
 

Re: [4/5] pseries: Implement memory hotplug add in the kernel

2014-09-24 Thread Nathan Fontenot
On 09/17/2014 02:07 AM, Michael Ellerman wrote:
 
 On Mon, 2014-09-15 at 15:32 -0500, Nathan Fontenot wrote:
 This patch adds the ability to do memory hotplug adding in the kernel.

 Currently the hotplug add/remove of memory is handled by the drmgr
 command. The drmgr command performs the add/remove by performing
 some work in user-space and making requests to the kernel to handle 
 other pieces. By moving all of the work to the kernel we can do the
 add and remove faster, and provide a common place to do memory hotplug
 for both the PowerVM and PowerKVM environments.

 Signed-off-by: Nathan Fontenot nf...@linux.vnet.ibm.com
 ---

 +for (i = 0; i  entries; i++, lmb++) {
 +u32 drc_index = be32_to_cpu(lmb-drc_index);
 +
 +if (lmbs_to_add == lmbs_added)
 +break;
 +
 +if (be32_to_cpu(lmb-flags)  DRCONF_MEM_ASSIGNED)
 +continue;
 +
 +if (hp_elog-id_type == PSERIES_HP_ELOG_ID_DRC_INDEX
 + lmb-drc_index != hp_elog-_drc_u.drc_index)
 +continue;
 +
 +rc = dlpar_acquire_drc(drc_index);
 +if (rc)
 +continue;
 +
 +rc = dlpar_add_one_lmb(lmb);
 +if (rc) {
 +dlpar_release_drc(drc_index);
 +continue;
 +}
 
 In both the above error cases you just move along. That means we potentially
 hotplugged some memory but not everything that we were asked to. That seems
 like a bad idea, we should either do everything or nothing.
 

Michael, how set are you on the all or nothing approach?

Note that I think the all or nothing approach is best but I think it will
present some problems. We do memory add (and remove) on a LMB basis, so it
is possible to hit a scenario in which we cannot revert back to the original
state. For example, a request to add 5 LMBs only succeeds in adding 4 LMBs.
There is no guarantee that we then remove the 4 MLBs that were added. That
memory could be in use somewhere that it cannot be moved.

I would suggest we continue with the current approach in that we try to
satisfy the request but not try to roll-back the changes if the entire
request cannot be satisfied.

-Nathan   

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: powerpc: Fix build failure when MEMORY_HOTPLUG=y

2014-09-24 Thread Michael Ellerman
On Tue, 2014-19-08 at 21:01:05 UTC, Pranith Kumar wrote:
 ARCH_ENABLE_MEMORY_HOTPLUG is enabled by default for powerpc. This causes 
 build
 failures when SPARSEMEM=n as memory hotplug needs definition which are defined
 only when SPARSEMEM=y. The error is as follows:

 arch/powerpc/platforms/pseries/hotplug-memory.c:27:31: error: 
 'SECTION_SIZE_BITS' undeclared (first use in this function)
 arch/powerpc/platforms/pseries/hotplug-memory.c:27:31: note: each undeclared 
 identifier is reported only once for each function it appears in

I don't see how you can even build pseries without SPARSEMEM=y ?

The three options are:

default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT
default SPARSEMEM_MANUAL if ARCH_SPARSEMEM_DEFAULT
default FLATMEM_MANUAL

We don't have DISCONTIGMEM.

FLATMEM_MANUAL depends on !ARCH_SPARSEMEM_ENABLE.

We turn on ARCH_SPARSEMEM_ENABLE, and it's not user editable.

cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: fadump: fix endianess issues in firmware assisted dump handling

2014-09-24 Thread Michael Ellerman
On Wed, 2014-03-09 at 12:29:48 UTC, Hari Bathini wrote:
 Firmware-assisted dump (fadump) kernel code is not LE compliant. The
 below patch tries to fix this issue. Tested this patch with upstream
 kernel. Did some sanity testing for the  LE fadump vmcore generated.
 Below output shows crash tool successfully opening LE fadump vmcore.
 
 Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
 Reviewed-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com

In general I really dislike this kind of endian conversion, ie. where we just
litter the code with endian conversions at every usage site.

But in this case it's probably OK, because we can't really do much better.

 diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
 index 742694c..7d73b2d 100644
 --- a/arch/powerpc/kernel/fadump.c
 +++ b/arch/powerpc/kernel/fadump.c
 @@ -72,7 +72,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
   return 1;
  
   fw_dump.fadump_supported = 1;
 - fw_dump.ibm_configure_kernel_dump = *token;
 + fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token);

I'm getting a sparse warning here:

  arch/powerpc/kernel/fadump.c:75:45: warning: cast to restricted __be32

I think token should be a __be32 *.

   pr_debug(CPU State Data\n);
 - pr_debug(Magic Number: %llx\n, reg_header-magic_number);
 - pr_debug(NumCpuOffset: %x\n, reg_header-num_cpu_offset);
 + pr_debug(Magic Number: %llx\n, be64_to_cpu(reg_header-magic_number));
 + pr_debug(NumCpuOffset: %x\n, be32_to_cpu(reg_header-num_cpu_offset));
  
 - vaddr += reg_header-num_cpu_offset;
 - num_cpus = *((u32 *)(vaddr));
 + vaddr += be32_to_cpu(reg_header-num_cpu_offset);
 + num_cpus = be32_to_cpu(*((u32 *)(vaddr)));

And here too:

  arch/powerpc/kernel/fadump.c:619:20: warning: cast to restricted __be32

I guess the cast is OK there because you are calculating the offset, but the
cast should be to __be32.


 diff --git a/arch/powerpc/platforms/pseries/lpar.c 
 b/arch/powerpc/platforms/pseries/lpar.c
 index 34e6423..587887e 100644
 --- a/arch/powerpc/platforms/pseries/lpar.c
 +++ b/arch/powerpc/platforms/pseries/lpar.c
 @@ -43,6 +43,7 @@
  #include asm/trace.h
  #include asm/firmware.h
  #include asm/plpar_wrappers.h
 +#include asm/fadump.h
  
  #include pseries.h
  
 @@ -249,8 +250,12 @@ static void pSeries_lpar_hptab_clear(void)
   }
  
  #ifdef __LITTLE_ENDIAN__
 - /* Reset exceptions to big endian */
 - if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 + /*
 +  * Reset exceptions to big endian
 +  * During fadump kernel boot, we dont need to reset exception to big 
 endian
 +  * as we have already booted into LE kernel.
 +  */
 + if (firmware_has_feature(FW_FEATURE_SET_MODE)  !is_fadump_active()) {
   long rc;
  
   rc = pseries_big_endian_exceptions();


It's really unfortunate that we need to inject this knowledge of fadump into
the setup code.

It sounds like we have to do it though, Mahesh said on irc that without it the
system won't boot. Please elaborate on *why* the system won't boot.

And please make the comment much clearer, ie. it's not that we don't need to
reset exceptions to big endian, it's that we *must not* reset them to big
endian.

cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/3] qe_common: add qe common functions to qe_common.c

2014-09-24 Thread Zhao Qiang
qe need to call some common functions, move them into
public directory, add a new file drivers/soc/qe/qe_common.c
for them.

Signed-off-by: Zhao Qiang b45...@freescale.com
---
 drivers/soc/qe/Makefile|   2 +-
 drivers/soc/qe/qe_common.c | 185 +
 include/linux/fsl/qe.h |  52 +++--
 3 files changed, 230 insertions(+), 9 deletions(-)
 create mode 100644 drivers/soc/qe/qe_common.c

diff --git a/drivers/soc/qe/Makefile b/drivers/soc/qe/Makefile
index f1855c1..77f6fd9 100644
--- a/drivers/soc/qe/Makefile
+++ b/drivers/soc/qe/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile for the linux ppc-specific parts of QE
 #
-obj-$(CONFIG_QUICC_ENGINE)+= qe.o qe_ic.o qe_io.o
+obj-$(CONFIG_QUICC_ENGINE)+= qe.o qe_ic.o qe_io.o qe_common.o
 
 obj-$(CONFIG_UCC)  += ucc.o
 obj-$(CONFIG_UCC_SLOW) += ucc_slow.o
diff --git a/drivers/soc/qe/qe_common.c b/drivers/soc/qe/qe_common.c
new file mode 100644
index 000..ee02ae8
--- /dev/null
+++ b/drivers/soc/qe/qe_common.c
@@ -0,0 +1,185 @@
+/*
+ * Common QE code
+ *
+ * Author: Scott Wood scottw...@freescale.com
+ *
+ * Copyright 2007-2008,2010 Freescale Semiconductor, Inc.
+ *
+ * Some parts derived from commproc.c/cpm2_common.c, which is:
+ * Copyright (c) 1997 Dan error_act (dma...@jlc.net)
+ * Copyright (c) 1999-2001 Dan Malek d...@embeddedalley.com
+ * Copyright (c) 2000 MontaVista Software, Inc (sou...@mvista.com)
+ * 2006 (c) MontaVista Software, Inc.
+ * Vitaly Bordug vbor...@ru.mvista.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include linux/init.h
+#include linux/of_device.h
+#include linux/spinlock.h
+#include linux/export.h
+#include linux/of.h
+#include linux/of_address.h
+#include linux/slab.h
+
+#include asm/io.h
+#include asm/rheap.h
+#include linux/fsl/qe.h
+
+static spinlock_t qe_muram_lock;
+static rh_block_t qe_boot_muram_rh_block[16];
+static rh_info_t qe_muram_info;
+static u8 __iomem *muram_vbase;
+static phys_addr_t muram_pbase;
+
+/* Max address size we deal with */
+#define OF_MAX_ADDR_CELLS  4
+
+int qe_muram_init(void)
+{
+   struct device_node *np;
+   struct resource r;
+   u32 zero[OF_MAX_ADDR_CELLS] = {};
+   resource_size_t max = 0;
+   int i = 0;
+   int ret = 0;
+
+   if (muram_pbase)
+   return 0;
+
+   spin_lock_init(qe_muram_lock);
+   /* initialize the info header */
+   rh_init(qe_muram_info, 1,
+   sizeof(qe_boot_muram_rh_block) /
+   sizeof(qe_boot_muram_rh_block[0]),
+   qe_boot_muram_rh_block);
+
+   np = of_find_compatible_node(NULL, NULL, fsl,qe-muram-data);
+   if (!np) {
+   /* try legacy bindings */
+   np = of_find_node_by_name(NULL, data-only);
+   if (!np) {
+   printk(KERN_ERR Cannot find CPM muram data node);
+   ret = -ENODEV;
+   goto out;
+   }
+   }
+
+   muram_pbase = of_translate_address(np, zero);
+   if (muram_pbase == (phys_addr_t)OF_BAD_ADDR) {
+   printk(KERN_ERR Cannot translate zero through CPM muram node);
+   ret = -ENODEV;
+   goto out;
+   }
+
+   while (of_address_to_resource(np, i++, r) == 0) {
+   if (r.end  max)
+   max = r.end;
+
+   rh_attach_region(qe_muram_info, r.start - muram_pbase,
+resource_size(r));
+   }
+
+   muram_vbase = ioremap(muram_pbase, max - muram_pbase + 1);
+   if (!muram_vbase) {
+   printk(KERN_ERR Cannot map CPM muram);
+   ret = -ENOMEM;
+   }
+
+out:
+   of_node_put(np);
+   return ret;
+}
+
+/**
+ * qe_muram_alloc - allocate the requested size worth of multi-user ram
+ * @size: number of bytes to allocate
+ * @align: requested alignment, in bytes
+ *
+ * This function returns an offset into the muram area.
+ * Use qe_dpram_addr() to get the virtual address of the area.
+ * Use qe_muram_free() to free the allocation.
+ */
+unsigned long qe_muram_alloc(unsigned long size, unsigned long align)
+{
+   unsigned long start;
+   unsigned long flags;
+
+   spin_lock_irqsave(qe_muram_lock, flags);
+   qe_muram_info.alignment = align;
+   start = rh_alloc(qe_muram_info, size, commproc);
+   memset(qe_muram_addr(start), 0, size);
+   spin_unlock_irqrestore(qe_muram_lock, flags);
+
+   return start;
+}
+EXPORT_SYMBOL(qe_muram_alloc);
+
+/**
+ * qe_muram_free - free a chunk of multi-user ram
+ * @offset: The beginning of the chunk as returned by qe_muram_alloc().
+ */
+int qe_muram_free(unsigned long offset)
+{
+   int ret;
+   unsigned long flags;
+
+   spin_lock_irqsave(qe_muram_lock, flags);
+   ret = rh_free(qe_muram_info, offset);
+   

[PATCH 3/3] rheap: move rheap.c from arch/powerpc/lib/ to lib/

2014-09-24 Thread Zhao Qiang
qe need to use the rheap, so move it to public directory.

Signed-off-by: Zhao Qiang b45...@freescale.com
---
 arch/powerpc/Kconfig| 3 ---
 arch/powerpc/include/asm/fsl_85xx_cache_sram.h  | 2 +-
 arch/powerpc/lib/Makefile   | 2 --
 arch/powerpc/platforms/44x/Kconfig  | 2 +-
 arch/powerpc/platforms/85xx/Kconfig | 2 +-
 arch/powerpc/platforms/Kconfig  | 2 +-
 arch/powerpc/platforms/Kconfig.cputype  | 2 +-
 arch/powerpc/sysdev/cpm1.c  | 2 +-
 arch/powerpc/sysdev/cpm2.c  | 2 +-
 arch/powerpc/sysdev/cpm_common.c| 2 +-
 arch/powerpc/sysdev/ppc4xx_ocm.c| 2 +-
 drivers/dma/bestcomm/Kconfig| 2 +-
 drivers/soc/qe/Kconfig  | 2 +-
 drivers/soc/qe/qe.c | 2 +-
 drivers/soc/qe/qe_common.c  | 2 +-
 drivers/video/Kconfig   | 2 +-
 include/linux/fsl/bestcomm/sram.h   | 2 +-
 {arch/powerpc/include/asm = include/linux/fsl}/rheap.h | 0
 lib/Kconfig | 3 +++
 lib/Makefile| 2 ++
 {arch/powerpc/lib = lib}/rheap.c   | 2 +-
 21 files changed, 21 insertions(+), 21 deletions(-)
 rename {arch/powerpc/include/asm = include/linux/fsl}/rheap.h (100%)
 rename {arch/powerpc/lib = lib}/rheap.c (99%)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d20dc2b..18b658e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1046,7 +1046,4 @@ config PPC_CLOCK
default n
select HAVE_CLK
 
-config PPC_LIB_RHEAP
-   bool
-
 source arch/powerpc/kvm/Kconfig
diff --git a/arch/powerpc/include/asm/fsl_85xx_cache_sram.h 
b/arch/powerpc/include/asm/fsl_85xx_cache_sram.h
index 2af2bdc..e57888a 100644
--- a/arch/powerpc/include/asm/fsl_85xx_cache_sram.h
+++ b/arch/powerpc/include/asm/fsl_85xx_cache_sram.h
@@ -26,7 +26,7 @@
 #ifndef __ASM_POWERPC_FSL_85XX_CACHE_SRAM_H__
 #define __ASM_POWERPC_FSL_85XX_CACHE_SRAM_H__
 
-#include asm/rheap.h
+#include linux/fsl/rheap.h
 #include linux/spinlock.h
 
 /*
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 4504332..c7b4e2f 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -26,8 +26,6 @@ obj-$(CONFIG_SMP) += locks.o
 obj-$(CONFIG_ALTIVEC)  += vmx-helper.o
 endif
 
-obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
-
 obj-y  += code-patching.o
 obj-y  += feature-fixups.o
 obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
diff --git a/arch/powerpc/platforms/44x/Kconfig 
b/arch/powerpc/platforms/44x/Kconfig
index d6c7506..0ea1aee 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -254,7 +254,7 @@ config PPC4xx_GPIO
 config PPC4xx_OCM
bool PPC4xx On Chip Memory (OCM) support
depends on 4xx
-   select PPC_LIB_RHEAP
+   select LIB_RHEAP
help
  Enable OCM support for PowerPC 4xx platforms with on chip memory,
  OCM provides the fast place for memory access to improve performance.
diff --git a/arch/powerpc/platforms/85xx/Kconfig 
b/arch/powerpc/platforms/85xx/Kconfig
index ae9fdb51..ab4777a 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -19,7 +19,7 @@ if PPC32
 
 config FSL_85XX_CACHE_SRAM
bool Freescale l2cache sram support
-   select PPC_LIB_RHEAP
+   select LIB_RHEAP
help
  When selected, this option enables cache-sram support
  for memory allocation on P1/P2 QorIQ platforms.
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index d09ae32f..9c38a8d 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -282,7 +282,7 @@ config CPM2
bool Enable support for the CPM2 (Communications Processor Module)
depends on (FSL_SOC_BOOKE  PPC32) || 8260
select CPM
-   select PPC_LIB_RHEAP
+   select LIB_RHEAP
select PPC_PCI_CHOICE
select ARCH_REQUIRE_GPIOLIB
help
diff --git a/arch/powerpc/platforms/Kconfig.cputype 
b/arch/powerpc/platforms/Kconfig.cputype
index c9ef8a5..d68d19e 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -33,7 +33,7 @@ config PPC_8xx
bool Freescale 8xx
select FSL_SOC
select 8xx
-   select PPC_LIB_RHEAP
+   select LIB_RHEAP
 
 config 40x
bool AMCC 40x
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index 5e6ff38..c6f5762 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -38,7 +38,7 @@
 #include asm/cpm1.h
 #include asm/io.h
 #include asm/tlbflush.h

[PATCH v2 07/22] PCI/MSI: Refactor struct msi_chip to make it become more common

2014-09-24 Thread Yijing Wang
Now there are a lot of __weak arch functions in MSI code.
These functions make MSI driver complex. Thierry Reding Introduced
a new MSI chip framework to configure MSI/MSI-X irq in ARM. Use
the new MSI chip framework to refactor all other platform MSI
arch code to eliminate weak arch MSI functions. This patch add
.restore_irq() and .setup_irqs() to make it become more common.

Signed-off-by: Yijing Wang wangyij...@huawei.com
Reviewed-by: Lucas Stach l.st...@pengutronix.de
---
 drivers/pci/msi.c   |   15 +++
 include/linux/msi.h |3 +++
 2 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 3acbe65..d10edee 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -64,6 +64,11 @@ int __weak arch_setup_msi_irqs(struct pci_dev *dev, int 
nvec, int type)
 {
struct msi_desc *entry;
int ret;
+   struct msi_chip *chip;
+
+   chip = arch_find_msi_chip(dev);
+   if (chip  chip-setup_irqs)
+   return chip-setup_irqs(dev, nvec, type);
 
/*
 * If an architecture wants to support multiple MSI, it needs to
@@ -106,6 +111,11 @@ void default_teardown_msi_irqs(struct pci_dev *dev)
 
 void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
 {
+   struct msi_chip *chip = arch_find_msi_chip(dev);
+
+   if (chip  chip-teardown_irqs)
+   return chip-teardown_irqs(dev);
+
return default_teardown_msi_irqs(dev);
 }
 
@@ -129,6 +139,11 @@ static void default_restore_msi_irq(struct pci_dev *dev, 
int irq)
 
 void __weak arch_restore_msi_irqs(struct pci_dev *dev)
 {
+   struct msi_chip *chip = arch_find_msi_chip(dev);
+
+   if (chip  chip-restore_irqs)
+   return chip-restore_irqs(dev);
+
return default_restore_msi_irqs(dev);
 }
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 6fdc5c6..4cf1f31 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -69,7 +69,10 @@ struct msi_chip {
struct list_head list;
 
int (*setup_irq)(struct pci_dev *dev, struct msi_desc *desc);
+   int (*setup_irqs)(struct pci_dev *dev, int nvec, int type);
void (*teardown_irq)(unsigned int irq);
+   void (*teardown_irqs)(struct pci_dev *dev);
+   void (*restore_irqs)(struct pci_dev *dev);
 };
 
 #endif /* LINUX_MSI_H */
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 06/22] PCI/MSI: Introduce weak arch_find_msi_chip() to find MSI chip

2014-09-24 Thread Yijing Wang
Introduce weak arch_find_msi_chip() to find the match msi_chip.
Currently, MSI chip associates pci bus to msi_chip. Because in
ARM platform, there may be more than one MSI controller in system.
Associate pci bus to msi_chip help pci device to find the match
msi_chip and setup MSI/MSI-X irq correctly. But in other platform,
like in x86. we only need one MSI chip, because all device use
the same MSI address/data and irq etc. So it's no need to associate
pci bus to MSI chip, just use a arch function, arch_find_msi_chip()
to return the MSI chip for simplicity. The default weak
arch_find_msi_chip() used in ARM platform, find the MSI chip
by pci bus.

Signed-off-by: Yijing Wang wangyij...@huawei.com
---
 drivers/pci/msi.c |7 ++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5f8f3af..3acbe65 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -29,9 +29,14 @@ static int pci_msi_enable = 1;
 
 /* Arch hooks */
 
+struct msi_chip * __weak arch_find_msi_chip(struct pci_dev *dev)
+{
+   return dev-bus-msi;
+}
+
 int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
-   struct msi_chip *chip = dev-bus-msi;
+   struct msi_chip *chip = arch_find_msi_chip(dev);
int err;
 
if (!chip || !chip-setup_irq)
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 05/22] s390/MSI: Use __msi_mask_irq() instead of default_msi_mask_irq()

2014-09-24 Thread Yijing Wang
Now only s390/MSI use default_msi_mask_irq() and
default_msix_mask_irq(), replace them with the common
msi mask irq functions __msi_mask_irq() and __msix_mask_irq().
Remove default_msi_mask_irq() and default_msix_mask_irq().

Signed-off-by: Yijing Wang wangyij...@huawei.com
---
 arch/s390/pci/pci.c |4 ++--
 include/linux/msi.h |2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 2fa7b14..552b990 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -448,9 +448,9 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
/* Release MSI interrupts */
list_for_each_entry(msi, pdev-msi_list, list) {
if (msi-msi_attrib.is_msix)
-   default_msix_mask_irq(msi, 1);
+   __msix_mask_irq(msi, 1);
else
-   default_msi_mask_irq(msi, 1, 1);
+   __msi_mask_irq(msi, 1, 1);
irq_set_msi_desc(msi-irq, NULL);
irq_free_desc(msi-irq);
msi-msg.address_lo = 0;
diff --git a/include/linux/msi.h b/include/linux/msi.h
index cc46a62..6fdc5c6 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -61,8 +61,6 @@ void arch_restore_msi_irqs(struct pci_dev *dev);
 
 void default_teardown_msi_irqs(struct pci_dev *dev);
 void default_restore_msi_irqs(struct pci_dev *dev);
-#define default_msi_mask_irq   __msi_mask_irq
-#define default_msix_mask_irq  __msix_mask_irq
 
 struct msi_chip {
struct module *owner;
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 02/22] PCI/MSI: Remove useless bus-msi assignment

2014-09-24 Thread Yijing Wang
Currently, PCI drivers will initialize bus-msi in
pcibios_add_bus(). pcibios_add_bus() will be called
in every pci bus initialization. So the bus-msi
assignment in pci_alloc_child_bus() is useless.

Signed-off-by: Yijing Wang wangyij...@huawei.com
CC: Thierry Reding thierry.red...@gmail.com
CC: Thomas Petazzoni thomas.petazz...@free-electrons.com
---
 drivers/pci/probe.c |1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e3cf8a2..8296576 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -677,7 +677,6 @@ static struct pci_bus *pci_alloc_child_bus(struct pci_bus 
*parent,
 
child-parent = parent;
child-ops = parent-ops;
-   child-msi = parent-msi;
child-sysdata = parent-sysdata;
child-bus_flags = parent-bus_flags;
 
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 10/22] Irq_remapping/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-24 Thread Yijing Wang
Use MSI chip framework instead of arch MSI functions to configure
MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.

Signed-off-by: Yijing Wang wangyij...@huawei.com
---
 drivers/iommu/irq_remapping.c |6 ++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 33c4395..7929590 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -148,6 +148,11 @@ static int irq_remapping_setup_msi_irqs(struct pci_dev 
*dev,
return do_setup_msix_irqs(dev, nvec);
 }
 
+static struct msi_chip remap_msi_chip = {
+   .setup_irqs = irq_remapping_setup_msi_irqs,
+   .teardown_irq = native_teardown_msi_irq,
+};
+
 static void eoi_ioapic_pin_remapped(int apic, int pin, int vector)
 {
/*
@@ -168,6 +173,7 @@ static void __init irq_remapping_modify_x86_ops(void)
x86_msi.setup_msi_irqs  = irq_remapping_setup_msi_irqs;
x86_msi.setup_hpet_msi  = setup_hpet_msi_remapped;
x86_msi.compose_msi_msg = compose_remapped_msi_msg;
+   x86_msi_chip = remap_msi_chip;
 }
 
 static __init int setup_nointremap(char *str)
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 09/22] x86/xen/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-24 Thread Yijing Wang
Use MSI chip framework instead of arch MSI functions to configure
MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.

Tested-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com
Signed-off-by: Yijing Wang wangyij...@huawei.com
Acked-by: David Vrabel david.vra...@citrix.com
Reviewed-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com
CC: Konrad Rzeszutek Wilk konrad.w...@oracle.com
---
 arch/x86/pci/xen.c |   46 ++
 1 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 84c2fce..e669ee4 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -376,6 +376,11 @@ static void xen_initdom_restore_msi_irqs(struct pci_dev 
*dev)
 }
 #endif
 
+static void xen_teardown_msi_irq(unsigned int irq)
+{
+   xen_destroy_irq(irq);
+}
+
 static void xen_teardown_msi_irqs(struct pci_dev *dev)
 {
struct msi_desc *msidesc;
@@ -385,19 +390,26 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
xen_pci_frontend_disable_msix(dev);
else
xen_pci_frontend_disable_msi(dev);
-
-   /* Free the IRQ's and the msidesc using the generic code. */
-   default_teardown_msi_irqs(dev);
-}
-
-static void xen_teardown_msi_irq(unsigned int irq)
-{
-   xen_destroy_irq(irq);
+   
+   list_for_each_entry(msidesc, dev-msi_list, list) {
+   int i, nvec;
+   if (msidesc-irq == 0)
+   continue;
+   if (msidesc-nvec_used)
+   nvec = msidesc-nvec_used;
+   else
+   nvec = 1  msidesc-msi_attrib.multiple;
+   for (i = 0; i  nvec; i++)
+   xen_teardown_msi_irq(msidesc-irq + i);
+   }
 }
 
 void xen_nop_msi_mask(struct irq_data *data)
 {
 }
+
+struct msi_chip xen_msi_chip;
+
 #endif
 
 int __init pci_xen_init(void)
@@ -418,9 +430,9 @@ int __init pci_xen_init(void)
 #endif
 
 #ifdef CONFIG_PCI_MSI
-   x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
-   x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
-   x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
+   xen_msi_chip.setup_irqs = xen_setup_msi_irqs;
+   xen_msi_chip.teardown_irqs = xen_teardown_msi_irqs;
+   x86_msi_chip = xen_msi_chip;
msi_chip.irq_mask = xen_nop_msi_mask;
msi_chip.irq_unmask = xen_nop_msi_mask;
 #endif
@@ -441,8 +453,9 @@ int __init pci_xen_hvm_init(void)
 #endif
 
 #ifdef CONFIG_PCI_MSI
-   x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
-   x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+   xen_msi_chip.setup_irqs = xen_hvm_setup_msi_irqs;
+   xen_msi_chip.teardown_irq = xen_teardown_msi_irq;
+   x86_msi_chip = xen_msi_chip;
 #endif
return 0;
 }
@@ -499,9 +512,10 @@ int __init pci_xen_initial_domain(void)
int irq;
 
 #ifdef CONFIG_PCI_MSI
-   x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
-   x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
-   x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
+   xen_msi_chip.setup_irqs = xen_initdom_setup_msi_irqs;
+   xen_msi_chip.teardown_irq = xen_teardown_msi_irq;
+   xen_msi_chip.restore_irqs = xen_initdom_restore_msi_irqs;
+   x86_msi_chip = xen_msi_chip;
msi_chip.irq_mask = xen_nop_msi_mask;
msi_chip.irq_unmask = xen_nop_msi_mask;
 #endif
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 01/22] PCI/MSI: Clean up struct msi_chip argument

2014-09-24 Thread Yijing Wang
Msi_chip functions setup_irq/teardown_irq rarely use msi_chip
argument. We can look up msi_chip pointer by the device pointer
or irq number, so clean up msi_chip argument.

Signed-off-by: Yijing Wang wangyij...@huawei.com
CC: Thierry Reding thierry.red...@gmail.com
CC: Thomas Petazzoni thomas.petazz...@free-electrons.com
---
 drivers/irqchip/irq-armada-370-xp.c |8 +++-
 drivers/pci/host/pci-tegra.c|8 +---
 drivers/pci/host/pcie-designware.c  |4 ++--
 drivers/pci/host/pcie-rcar.c|8 +---
 drivers/pci/msi.c   |4 ++--
 include/linux/msi.h |5 ++---
 6 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/drivers/irqchip/irq-armada-370-xp.c 
b/drivers/irqchip/irq-armada-370-xp.c
index df60eab..3909d06 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -129,9 +129,8 @@ static void armada_370_xp_free_msi(int hwirq)
mutex_unlock(msi_used_lock);
 }
 
-static int armada_370_xp_setup_msi_irq(struct msi_chip *chip,
-  struct pci_dev *pdev,
-  struct msi_desc *desc)
+static int armada_370_xp_setup_msi_irq(struct pci_dev *pdev, 
+   struct msi_desc *desc)
 {
struct msi_msg msg;
int virq, hwirq;
@@ -160,8 +159,7 @@ static int armada_370_xp_setup_msi_irq(struct msi_chip 
*chip,
return 0;
 }
 
-static void armada_370_xp_teardown_msi_irq(struct msi_chip *chip,
-  unsigned int irq)
+static void armada_370_xp_teardown_msi_irq(unsigned int irq)
 {
struct irq_data *d = irq_get_irq_data(irq);
unsigned long hwirq = d-hwirq;
diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index 0fb0fdb..edd4040 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -1157,9 +1157,10 @@ static irqreturn_t tegra_pcie_msi_irq(int irq, void 
*data)
return processed  0 ? IRQ_HANDLED : IRQ_NONE;
 }
 
-static int tegra_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
+static int tegra_msi_setup_irq(struct pci_dev *pdev,
   struct msi_desc *desc)
 {
+   struct msi_chip *chip = pdev-bus-msi;
struct tegra_msi *msi = to_tegra_msi(chip);
struct msi_msg msg;
unsigned int irq;
@@ -1185,10 +1186,11 @@ static int tegra_msi_setup_irq(struct msi_chip *chip, 
struct pci_dev *pdev,
return 0;
 }
 
-static void tegra_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
+static void tegra_msi_teardown_irq(unsigned int irq)
 {
-   struct tegra_msi *msi = to_tegra_msi(chip);
struct irq_data *d = irq_get_irq_data(irq);
+   struct msi_chip *chip = irq_get_chip_data(irq);
+   struct tegra_msi *msi = to_tegra_msi(chip);
 
tegra_msi_free(msi, d-hwirq);
 }
diff --git a/drivers/pci/host/pcie-designware.c 
b/drivers/pci/host/pcie-designware.c
index fa2fa45..517f1e1 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c
@@ -342,7 +342,7 @@ static void clear_irq(unsigned int irq)
msi-msi_attrib.multiple = 0;
 }
 
-static int dw_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
+static int dw_msi_setup_irq(struct pci_dev *pdev,
struct msi_desc *desc)
 {
int irq, pos, msgvec;
@@ -383,7 +383,7 @@ static int dw_msi_setup_irq(struct msi_chip *chip, struct 
pci_dev *pdev,
return 0;
 }
 
-static void dw_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
+static void dw_msi_teardown_irq(unsigned int irq)
 {
clear_irq(irq);
 }
diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
index 4884ee5..647bc9f 100644
--- a/drivers/pci/host/pcie-rcar.c
+++ b/drivers/pci/host/pcie-rcar.c
@@ -615,9 +615,10 @@ static irqreturn_t rcar_pcie_msi_irq(int irq, void *data)
return IRQ_HANDLED;
 }
 
-static int rcar_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
+static int rcar_msi_setup_irq(struct pci_dev *pdev,
  struct msi_desc *desc)
 {
+   struct msi_chip *chip = pdev-bus-msi;
struct rcar_msi *msi = to_rcar_msi(chip);
struct rcar_pcie *pcie = container_of(chip, struct rcar_pcie, msi.chip);
struct msi_msg msg;
@@ -645,10 +646,11 @@ static int rcar_msi_setup_irq(struct msi_chip *chip, 
struct pci_dev *pdev,
return 0;
 }
 
-static void rcar_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
+static void rcar_msi_teardown_irq(unsigned int irq)
 {
-   struct rcar_msi *msi = to_rcar_msi(chip);
struct irq_data *d = irq_get_irq_data(irq);
+   struct msi_chip *chip = irq_get_chip_data(irq);
+   struct rcar_msi *msi = to_rcar_msi(chip);
 
rcar_msi_free(msi, d-hwirq);
 }
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index aae2fc8..51d7e62 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -37,7 +37,7 

[PATCH v2 00/22] Use MSI chip framework to configure MSI/MSI-X in all platforms

2014-09-24 Thread Yijing Wang
This series is based Bjorn's pci/msi branch
git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git pci/msi

Currently, there are a lot of weak arch functions in MSI code.
Thierry Reding Introduced MSI chip framework to configure MSI/MSI-X in arm.
This series use MSI chip framework to refactor MSI code across all platforms
to eliminate weak arch functions. Then all MSI irqs will be managed in a 
unified framework. Because this series changed a lot of ARCH MSI code,
so tests in the platforms which MSI code modified are warmly welcomed!

v1-v2:
Add a patch to make s390 MSI code build happy between patch x86/xen/MSI: E..
and s390/MSI: Use MSI... Fix several typo problems found by Lucas.

RFC-v1: 
Updated [patch 4/21] x86/xen/MSI: Eliminate..., export msi_chip instead
of #ifdef to fix MSI bug in xen running in x86. 
Rename arch_get_match_msi_chip() to arch_find_msi_chip().
Drop use struct device as the msi_chip argument, we will do that
later in another patchset.

Yijing Wang (22):
  PCI/MSI: Clean up struct msi_chip argument
  PCI/MSI: Remove useless bus-msi assignment
  MSI: Remove the redundant irq_set_chip_data()
  x86/xen/MSI: Eliminate arch_msix_mask_irq() and arch_msi_mask_irq()
  s390/MSI: Use __msi_mask_irq() instead of default_msi_mask_irq()
  PCI/MSI: Introduce weak arch_find_msi_chip() to find MSI chip
  PCI/MSI: Refactor struct msi_chip to make it become more common
  x86/MSI: Use MSI chip framework to configure MSI/MSI-X irq
  x86/xen/MSI: Use MSI chip framework to configure MSI/MSI-X irq
  Irq_remapping/MSI: Use MSI chip framework to configure MSI/MSI-X irq
  x86/MSI: Remove unused MSI weak arch functions
  MIPS/Octeon/MSI: Use MSI chip framework to configure MSI/MSI-X irq
  MIPS/Xlp: Remove the dead function destroy_irq() to fix build error
  MIPS/Xlp/MSI: Use MSI chip framework to configure MSI/MSI-X irq
  MIPS/Xlr/MSI: Use MSI chip framework to configure MSI/MSI-X irq
  Powerpc/MSI: Use MSI chip framework to configure MSI/MSI-X irq
  s390/MSI: Use MSI chip framework to configure MSI/MSI-X irq
  arm/iop13xx/MSI: Use MSI chip framework to configure MSI/MSI-X irq
  IA64/MSI: Use MSI chip framework to configure MSI/MSI-X irq
  Sparc/MSI: Use MSI chip framework to configure MSI/MSI-X irq
  tile/MSI: Use MSI chip framework to configure MSI/MSI-X irq
  PCI/MSI: Clean up unused MSI arch functions

 arch/arm/mach-iop13xx/include/mach/pci.h |2 +
 arch/arm/mach-iop13xx/iq81340mc.c|1 +
 arch/arm/mach-iop13xx/iq81340sc.c|1 +
 arch/arm/mach-iop13xx/msi.c  |9 ++-
 arch/arm/mach-iop13xx/pci.c  |6 ++
 arch/ia64/kernel/msi_ia64.c  |   18 -
 arch/mips/pci/msi-octeon.c   |   35 ++
 arch/mips/pci/msi-xlp.c  |   18 --
 arch/mips/pci/pci-xlr.c  |   15 -
 arch/powerpc/kernel/msi.c|   14 +++-
 arch/s390/pci/pci.c  |   18 -
 arch/sparc/kernel/pci.c  |   14 +++-
 arch/tile/kernel/pci_gx.c|   14 +++-
 arch/x86/include/asm/apic.h  |4 +
 arch/x86/include/asm/pci.h   |4 +-
 arch/x86/include/asm/x86_init.h  |7 --
 arch/x86/kernel/apic/io_apic.c   |   16 -
 arch/x86/kernel/x86_init.c   |   34 -
 arch/x86/pci/xen.c   |   60 +---
 drivers/iommu/irq_remapping.c|9 ++-
 drivers/irqchip/irq-armada-370-xp.c  |8 +--
 drivers/pci/host/pci-tegra.c |8 ++-
 drivers/pci/host/pcie-designware.c   |4 +-
 drivers/pci/host/pcie-rcar.c |8 ++-
 drivers/pci/msi.c|  114 ++
 drivers/pci/probe.c  |1 -
 include/linux/msi.h  |   26 ++-
 27 files changed, 266 insertions(+), 202 deletions(-)

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 13/22] MIPS/Xlp: Remove the dead function destroy_irq() to fix build error

2014-09-24 Thread Yijing Wang
Commit 465665f78a7 (mips: Kill pointless destroy_irq()) removed
the destroy_irq(). So remove the leftover one in xlp_setup_msix()
to fix build error.

arch/mips/pci/msi-xlp.c: In function 'xlp_setup_msix':
arch/mips/pci/msi-xlp.c:447:3: error: implicit declaration of function 
'destroy_irq'..
cc1: some warnings being treated as errors
make[1]: *** [arch/mips/pci/msi-xlp.o] Error 1
make: *** [arch/mips/pci/] Error 2

Signed-off-by: Yijing Wang wangyij...@huawei.com
Cc: Thomas Gleixner t...@linutronix.de
---
 arch/mips/pci/msi-xlp.c |4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/arch/mips/pci/msi-xlp.c b/arch/mips/pci/msi-xlp.c
index fa374fe..e469dc7 100644
--- a/arch/mips/pci/msi-xlp.c
+++ b/arch/mips/pci/msi-xlp.c
@@ -443,10 +443,8 @@ static int xlp_setup_msix(uint64_t lnkbase, int node, int 
link,
msg.data = 0xc00 | msixvec;
 
ret = irq_set_msi_desc(xirq, desc);
-   if (ret  0) {
-   destroy_irq(xirq);
+   if (ret  0) 
return ret;
-   }
 
write_msi_msg(xirq, msg);
return 0;
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 11/22] x86/MSI: Remove unused MSI weak arch functions

2014-09-24 Thread Yijing Wang
Now we can clean up MSI weak arch functions in x86.

Signed-off-by: Yijing Wang wangyij...@huawei.com
---
 arch/x86/include/asm/pci.h  |3 ---
 arch/x86/include/asm/x86_init.h |4 
 arch/x86/kernel/apic/io_apic.c  |2 +-
 arch/x86/kernel/x86_init.c  |   24 
 drivers/iommu/irq_remapping.c   |1 -
 5 files changed, 1 insertions(+), 33 deletions(-)

diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 878a06d..34f9676 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -96,14 +96,11 @@ extern void pci_iommu_alloc(void);
 #ifdef CONFIG_PCI_MSI
 /* implemented in arch/x86/kernel/apic/io_apic. */
 struct msi_desc;
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void native_teardown_msi_irq(unsigned int irq);
-void native_restore_msi_irqs(struct pci_dev *dev);
 int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
  unsigned int irq_base, unsigned int irq_offset);
 extern struct msi_chip *x86_msi_chip;
 #else
-#define native_setup_msi_irqs  NULL
 #define native_teardown_msi_irqNULL
 #endif
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index f58a9c7..2514f67 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -174,13 +174,9 @@ struct pci_dev;
 struct msi_msg;
 
 struct x86_msi_ops {
-   int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq,
unsigned int dest, struct msi_msg *msg,
   u8 hpet_id);
-   void (*teardown_msi_irq)(unsigned int irq);
-   void (*teardown_msi_irqs)(struct pci_dev *dev);
-   void (*restore_msi_irqs)(struct pci_dev *dev);
int  (*setup_hpet_msi)(unsigned int irq, unsigned int id);
 };
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 882b95e..f998192 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3200,7 +3200,7 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc 
*msidesc,
return 0;
 }
 
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+static int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
struct msi_desc *msidesc;
unsigned int irq;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 234b072..cc32568 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -110,34 +110,10 @@ EXPORT_SYMBOL_GPL(x86_platform);
 
 #if defined(CONFIG_PCI_MSI)
 struct x86_msi_ops x86_msi = {
-   .setup_msi_irqs = native_setup_msi_irqs,
.compose_msi_msg= native_compose_msi_msg,
-   .teardown_msi_irq   = native_teardown_msi_irq,
-   .teardown_msi_irqs  = default_teardown_msi_irqs,
-   .restore_msi_irqs   = default_restore_msi_irqs,
.setup_hpet_msi = default_setup_hpet_msi,
 };
 
-/* MSI arch specific hooks */
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-   return x86_msi.setup_msi_irqs(dev, nvec, type);
-}
-
-void arch_teardown_msi_irqs(struct pci_dev *dev)
-{
-   x86_msi.teardown_msi_irqs(dev);
-}
-
-void arch_teardown_msi_irq(unsigned int irq)
-{
-   x86_msi.teardown_msi_irq(irq);
-}
-
-void arch_restore_msi_irqs(struct pci_dev *dev)
-{
-   x86_msi.restore_msi_irqs(dev);
-}
 #endif
 
 struct x86_io_apic_ops x86_io_apic_ops = {
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 7929590..99b1c0f 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -170,7 +170,6 @@ static void __init irq_remapping_modify_x86_ops(void)
x86_io_apic_ops.set_affinity= set_remapped_irq_affinity;
x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry;
x86_io_apic_ops.eoi_ioapic_pin  = eoi_ioapic_pin_remapped;
-   x86_msi.setup_msi_irqs  = irq_remapping_setup_msi_irqs;
x86_msi.setup_hpet_msi  = setup_hpet_msi_remapped;
x86_msi.compose_msi_msg = compose_remapped_msi_msg;
x86_msi_chip = remap_msi_chip;
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 14/22] MIPS/Xlp/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-24 Thread Yijing Wang
Use MSI chip framework instead of arch MSI functions to configure
MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.

Signed-off-by: Yijing Wang wangyij...@huawei.com
---
 arch/mips/pci/msi-xlp.c |   14 --
 1 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/mips/pci/msi-xlp.c b/arch/mips/pci/msi-xlp.c
index e469dc7..6b791ef 100644
--- a/arch/mips/pci/msi-xlp.c
+++ b/arch/mips/pci/msi-xlp.c
@@ -245,7 +245,7 @@ static struct irq_chip xlp_msix_chip = {
.irq_unmask = unmask_msi_irq,
 };
 
-void arch_teardown_msi_irq(unsigned int irq)
+void xlp_teardown_msi_irq(unsigned int irq)
 {
 }
 
@@ -450,7 +450,7 @@ static int xlp_setup_msix(uint64_t lnkbase, int node, int 
link,
return 0;
 }
 
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+static int xlp_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
struct pci_dev *lnkdev;
uint64_t lnkbase;
@@ -472,6 +472,16 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct 
msi_desc *desc)
return xlp_setup_msi(lnkbase, node, link, desc);
 }
 
+static struct msi_chip xlp_chip = {
+   .setup_irq = xlp_setup_msi_irq,
+   .teardown_irq = xlp_teardown_msi_irq,
+};
+
+struct msi_chip *arch_find_msi_chip(struct pci_dev *dev)
+{
+   return xlp_chip;
+}
+
 void __init xlp_init_node_msi_irqs(int node, int link)
 {
struct nlm_soc_info *nodep;
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 04/22] x86/xen/MSI: Eliminate arch_msix_mask_irq() and arch_msi_mask_irq()

2014-09-24 Thread Yijing Wang
Commit 0e4ccb150 added two __weak arch functions arch_msix_mask_irq()
and arch_msi_mask_irq() to fix a bug found when running xen in x86.
Introduced these two funcntions make MSI code complex. And mask/unmask
is the irq actions related to interrupt controller, should not use
weak arch functions to override mask/unmask interfaces. This patch
reverted commit 0e4ccb150 and export struct irq_chip msi_chip, modify
msi_chip-irq_mask/irq_unmask() in xen init functions to fix this
bug for simplicity. Also this is preparation for using struct
msi_chip instead of weak arch MSI functions in all platforms.
Keep default_msi_mask_irq() and default_msix_mask_irq() in
linux/msi.h to make s390 MSI code compile happy, they wiil be removed
in the later patch.

Tested-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com
Signed-off-by: Yijing Wang wangyij...@huawei.com
Acked-by: David Vrabel david.vra...@citrix.com
Reviewed-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com
CC: Konrad Rzeszutek Wilk konrad.w...@oracle.com
---
 arch/x86/include/asm/apic.h |4 
 arch/x86/include/asm/x86_init.h |3 ---
 arch/x86/kernel/apic/io_apic.c  |2 +-
 arch/x86/kernel/x86_init.c  |   10 --
 arch/x86/pci/xen.c  |   16 ++--
 drivers/pci/msi.c   |   22 ++
 include/linux/msi.h |6 --
 7 files changed, 21 insertions(+), 42 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 465b309..47a5f94 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -43,6 +43,10 @@ static inline void generic_apic_probe(void)
 }
 #endif
 
+#ifdef CONFIG_PCI_MSI
+extern struct irq_chip msi_chip;
+#endif
+
 #ifdef CONFIG_X86_LOCAL_APIC
 
 extern unsigned int apic_verbosity;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index e45e4da..f58a9c7 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -172,7 +172,6 @@ struct x86_platform_ops {
 
 struct pci_dev;
 struct msi_msg;
-struct msi_desc;
 
 struct x86_msi_ops {
int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
@@ -183,8 +182,6 @@ struct x86_msi_ops {
void (*teardown_msi_irqs)(struct pci_dev *dev);
void (*restore_msi_irqs)(struct pci_dev *dev);
int  (*setup_hpet_msi)(unsigned int irq, unsigned int id);
-   u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag);
-   u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag);
 };
 
 struct IO_APIC_route_entry;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e877cfb..2a2ec28 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3161,7 +3161,7 @@ msi_set_affinity(struct irq_data *data, const struct 
cpumask *mask, bool force)
  * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
  * which implement the MSI or MSI-X Capability Structure.
  */
-static struct irq_chip msi_chip = {
+struct irq_chip msi_chip = {
.name   = PCI-MSI,
.irq_unmask = unmask_msi_irq,
.irq_mask   = mask_msi_irq,
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index e48b674..234b072 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -116,8 +116,6 @@ struct x86_msi_ops x86_msi = {
.teardown_msi_irqs  = default_teardown_msi_irqs,
.restore_msi_irqs   = default_restore_msi_irqs,
.setup_hpet_msi = default_setup_hpet_msi,
-   .msi_mask_irq   = default_msi_mask_irq,
-   .msix_mask_irq  = default_msix_mask_irq,
 };
 
 /* MSI arch specific hooks */
@@ -140,14 +138,6 @@ void arch_restore_msi_irqs(struct pci_dev *dev)
 {
x86_msi.restore_msi_irqs(dev);
 }
-u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
-{
-   return x86_msi.msi_mask_irq(desc, mask, flag);
-}
-u32 arch_msix_mask_irq(struct msi_desc *desc, u32 flag)
-{
-   return x86_msi.msix_mask_irq(desc, flag);
-}
 #endif
 
 struct x86_io_apic_ops x86_io_apic_ops = {
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index ad03739..84c2fce 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -394,13 +394,9 @@ static void xen_teardown_msi_irq(unsigned int irq)
 {
xen_destroy_irq(irq);
 }
-static u32 xen_nop_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
-{
-   return 0;
-}
-static u32 xen_nop_msix_mask_irq(struct msi_desc *desc, u32 flag)
+
+void xen_nop_msi_mask(struct irq_data *data)
 {
-   return 0;
 }
 #endif
 
@@ -425,8 +421,8 @@ int __init pci_xen_init(void)
x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
-   x86_msi.msi_mask_irq = xen_nop_msi_mask_irq;
-   x86_msi.msix_mask_irq = xen_nop_msix_mask_irq;
+   msi_chip.irq_mask = xen_nop_msi_mask;
+ 

[PATCH v2 03/22] MSI: Remove the redundant irq_set_chip_data()

2014-09-24 Thread Yijing Wang
Currently, pcie-designware, pcie-rcar, pci-tegra drivers
use irq chip_data to save the msi_chip pointer. They
already call irq_set_chip_data() in their own MSI irq map
functions. So irq_set_chip_data() in arch_setup_msi_irq()
is useless.

Signed-off-by: Yijing Wang wangyij...@huawei.com
---
 drivers/pci/msi.c |5 ++---
 1 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 51d7e62..50f67a3 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -41,14 +41,13 @@ int __weak arch_setup_msi_irq(struct pci_dev *dev, struct 
msi_desc *desc)
if (err  0)
return err;
 
-   irq_set_chip_data(desc-irq, chip);
-
return 0;
 }
 
 void __weak arch_teardown_msi_irq(unsigned int irq)
 {
-   struct msi_chip *chip = irq_get_chip_data(irq);
+   struct msi_desc *entry = irq_get_msi_desc(irq);
+   struct msi_chip *chip = entry-dev-bus-msi;
 
if (!chip || !chip-teardown_irq)
return;
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 16/22] Powerpc/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-24 Thread Yijing Wang
Use MSI chip framework instead of arch MSI functions to configure
MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.

Signed-off-by: Yijing Wang wangyij...@huawei.com
Acked-by: Michael Ellerman m...@ellerman.id.au
---
 arch/powerpc/kernel/msi.c |   14 --
 1 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c
index 71bd161..01781a4 100644
--- a/arch/powerpc/kernel/msi.c
+++ b/arch/powerpc/kernel/msi.c
@@ -13,7 +13,7 @@
 
 #include asm/machdep.h
 
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+static int ppc_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
if (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs) {
pr_debug(msi: Platform doesn't provide MSI callbacks.\n);
@@ -27,7 +27,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int 
type)
return ppc_md.setup_msi_irqs(dev, nvec, type);
 }
 
-void arch_teardown_msi_irqs(struct pci_dev *dev)
+static void ppc_teardown_msi_irqs(struct pci_dev *dev)
 {
ppc_md.teardown_msi_irqs(dev);
 }
+
+static struct msi_chip ppc_msi_chip = {
+   .setup_irqs = ppc_setup_msi_irqs,
+   .teardown_irqs = ppc_teardown_msi_irqs,
+};
+
+struct msi_chip *arch_find_msi_chip(struct pci_dev *dev)
+{
+   return ppc_msi_chip;
+}
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 12/22] MIPS/Octeon/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-24 Thread Yijing Wang
Use MSI chip framework instead of arch MSI functions to configure
MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.

Signed-off-by: Yijing Wang wangyij...@huawei.com
---
 arch/mips/pci/msi-octeon.c |   35 ++-
 1 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/arch/mips/pci/msi-octeon.c b/arch/mips/pci/msi-octeon.c
index 63bbe07..14f2d16 100644
--- a/arch/mips/pci/msi-octeon.c
+++ b/arch/mips/pci/msi-octeon.c
@@ -57,7 +57,7 @@ static int msi_irq_size;
  *
  * Returns 0 on success.
  */
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+static int octeon_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
struct msi_msg msg;
u16 control;
@@ -132,12 +132,12 @@ msi_irq_allocated:
/* Make sure the search for available interrupts didn't fail */
if (irq = 64) {
if (request_private_bits) {
-   pr_err(arch_setup_msi_irq: Unable to find %d free 
interrupts, trying just one,
+   pr_err(octeon_setup_msi_irq: Unable to find %d free 
interrupts, trying just one,
   1  request_private_bits);
request_private_bits = 0;
goto try_only_one;
} else
-   panic(arch_setup_msi_irq: Unable to find a free MSI 
interrupt);
+   panic(octeon_setup_msi_irq: Unable to find a free MSI 
interrupt);
}
 
/* MSI interrupts start at logical IRQ OCTEON_IRQ_MSI_BIT0 */
@@ -168,7 +168,7 @@ msi_irq_allocated:
msg.address_hi = (0 + CVMX_SLI_PCIE_MSI_RCV)  32;
break;
default:
-   panic(arch_setup_msi_irq: Invalid octeon_dma_bar_type);
+   panic(octeon_setup_msi_irq: Invalid octeon_dma_bar_type);
}
msg.data = irq - OCTEON_IRQ_MSI_BIT0;
 
@@ -182,7 +182,7 @@ msi_irq_allocated:
return 0;
 }
 
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+static int octeon_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
struct msi_desc *entry;
int ret;
@@ -201,7 +201,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int 
type)
return 1;
 
list_for_each_entry(entry, dev-msi_list, list) {
-   ret = arch_setup_msi_irq(dev, entry);
+   ret = octeon_setup_msi_irq(dev, entry);
if (ret  0)
return ret;
if (ret  0)
@@ -210,14 +210,13 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, 
int type)
 
return 0;
 }
-
 /**
  * Called when a device no longer needs its MSI interrupts. All
  * MSI interrupts for the device are freed.
  *
  * @irq:The devices first irq number. There may be multple in sequence.
  */
-void arch_teardown_msi_irq(unsigned int irq)
+static void octeon_teardown_msi_irq(unsigned int irq)
 {
int number_irqs;
u64 bitmask;
@@ -226,8 +225,8 @@ void arch_teardown_msi_irq(unsigned int irq)
 
if ((irq  OCTEON_IRQ_MSI_BIT0)
|| (irq  msi_irq_size + OCTEON_IRQ_MSI_BIT0))
-   panic(arch_teardown_msi_irq: Attempted to teardown illegal 
- MSI interrupt (%d), irq);
+   panic(octeon_teardown_msi_irq: Attempted to teardown illegal 
+   MSI interrupt (%d), irq);
 
irq -= OCTEON_IRQ_MSI_BIT0;
index = irq / 64;
@@ -240,7 +239,7 @@ void arch_teardown_msi_irq(unsigned int irq)
 */
number_irqs = 0;
while ((irq0 + number_irqs  64) 
-  (msi_multiple_irq_bitmask[index]
+   (msi_multiple_irq_bitmask[index]
 (1ull  (irq0 + number_irqs
number_irqs++;
number_irqs++;
@@ -249,8 +248,8 @@ void arch_teardown_msi_irq(unsigned int irq)
/* Shift the mask to the correct bit location */
bitmask = irq0;
if ((msi_free_irq_bitmask[index]  bitmask) != bitmask)
-   panic(arch_teardown_msi_irq: Attempted to teardown MSI 
- interrupt (%d) not in use, irq);
+   panic(octeon_teardown_msi_irq: Attempted to teardown MSI 
+   interrupt (%d) not in use, irq);
 
/* Checks are done, update the in use bitmask */
spin_lock(msi_free_irq_bitmask_lock);
@@ -259,6 +258,16 @@ void arch_teardown_msi_irq(unsigned int irq)
spin_unlock(msi_free_irq_bitmask_lock);
 }
 
+static struct msi_chip octeon_msi_chip = {
+   .setup_irqs = octeon_setup_msi_irqs,
+   .teardown_irq = octeon_teardown_msi_irq,
+};
+
+struct msi_chip *arch_find_msi_chip(struct pci_dev *dev)
+{
+   return octeon_msi_chip;
+}
+
 static DEFINE_RAW_SPINLOCK(octeon_irq_msi_lock);
 
 static u64 msi_rcv_reg[4];
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org

[PATCH v2 18/22] arm/iop13xx/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-24 Thread Yijing Wang
Use MSI chip framework instead of arch MSI functions to configure
MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.

Signed-off-by: Yijing Wang wangyij...@huawei.com
---
 arch/arm/mach-iop13xx/include/mach/pci.h |2 ++
 arch/arm/mach-iop13xx/iq81340mc.c|1 +
 arch/arm/mach-iop13xx/iq81340sc.c|1 +
 arch/arm/mach-iop13xx/msi.c  |9 +++--
 arch/arm/mach-iop13xx/pci.c  |6 ++
 5 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-iop13xx/include/mach/pci.h 
b/arch/arm/mach-iop13xx/include/mach/pci.h
index 59f42b5..7a073cb 100644
--- a/arch/arm/mach-iop13xx/include/mach/pci.h
+++ b/arch/arm/mach-iop13xx/include/mach/pci.h
@@ -10,6 +10,8 @@ struct pci_bus *iop13xx_scan_bus(int nr, struct pci_sys_data 
*);
 void iop13xx_atu_select(struct hw_pci *plat_pci);
 void iop13xx_pci_init(void);
 void iop13xx_map_pci_memory(void);
+void iop13xx_add_bus(struct pci_bus *bus);
+extern struct msi_chip iop13xx_msi_chip;
 
 #define IOP_PCI_STATUS_ERROR (PCI_STATUS_PARITY |   \
   PCI_STATUS_SIG_TARGET_ABORT | \
diff --git a/arch/arm/mach-iop13xx/iq81340mc.c 
b/arch/arm/mach-iop13xx/iq81340mc.c
index 9cd07d3..19d47cb 100644
--- a/arch/arm/mach-iop13xx/iq81340mc.c
+++ b/arch/arm/mach-iop13xx/iq81340mc.c
@@ -59,6 +59,7 @@ static struct hw_pci iq81340mc_pci __initdata = {
.map_irq= iq81340mc_pcix_map_irq,
.scan   = iop13xx_scan_bus,
.preinit= iop13xx_pci_init,
+   .add_bus= iop13xx_add_bus;
 };
 
 static int __init iq81340mc_pci_init(void)
diff --git a/arch/arm/mach-iop13xx/iq81340sc.c 
b/arch/arm/mach-iop13xx/iq81340sc.c
index b3ec11c..4d56993 100644
--- a/arch/arm/mach-iop13xx/iq81340sc.c
+++ b/arch/arm/mach-iop13xx/iq81340sc.c
@@ -61,6 +61,7 @@ static struct hw_pci iq81340sc_pci __initdata = {
.scan   = iop13xx_scan_bus,
.map_irq= iq81340sc_atux_map_irq,
.preinit= iop13xx_pci_init
+   .add_bus= iop13xx_add_bus;
 };
 
 static int __init iq81340sc_pci_init(void)
diff --git a/arch/arm/mach-iop13xx/msi.c b/arch/arm/mach-iop13xx/msi.c
index e7730cf..1a8cb2f 100644
--- a/arch/arm/mach-iop13xx/msi.c
+++ b/arch/arm/mach-iop13xx/msi.c
@@ -132,7 +132,7 @@ static struct irq_chip iop13xx_msi_chip = {
.irq_unmask = unmask_msi_irq,
 };
 
-int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+static int iop13xx_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
int id, irq = irq_alloc_desc_from(IRQ_IOP13XX_MSI_0, -1);
struct msi_msg msg;
@@ -159,7 +159,12 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct 
msi_desc *desc)
return 0;
 }
 
-void arch_teardown_msi_irq(unsigned int irq)
+static void iop13xx_teardown_msi_irq(unsigned int irq)
 {
irq_free_desc(irq);
 }
+
+struct msi_chip iop13xx_chip = {
+   .setup_irq = iop13xx_setup_msi_irq,
+   .teardown_irq = iop13xx_teardown_msi_irq,
+};
diff --git a/arch/arm/mach-iop13xx/pci.c b/arch/arm/mach-iop13xx/pci.c
index 9082b84..f498800 100644
--- a/arch/arm/mach-iop13xx/pci.c
+++ b/arch/arm/mach-iop13xx/pci.c
@@ -962,6 +962,12 @@ void __init iop13xx_atu_select(struct hw_pci *plat_pci)
}
 }
 
+void iop13xx_add_bus(struct pci_bus *bus)
+{
+   if (IS_ENABLED(CONFIG_PCI_MSI)) 
+   bus-msi = iop13xx_msi_chip;
+}
+
 void __init iop13xx_pci_init(void)
 {
/* clear pre-existing south bridge errors */
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 15/22] MIPS/Xlr/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-24 Thread Yijing Wang
Use MSI chip framework instead of arch MSI functions to configure
MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.

Signed-off-by: Yijing Wang wangyij...@huawei.com
---
 arch/mips/pci/pci-xlr.c |   15 +--
 1 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c
index 0dde803..7bd91cc 100644
--- a/arch/mips/pci/pci-xlr.c
+++ b/arch/mips/pci/pci-xlr.c
@@ -214,11 +214,11 @@ static int get_irq_vector(const struct pci_dev *dev)
 }
 
 #ifdef CONFIG_PCI_MSI
-void arch_teardown_msi_irq(unsigned int irq)
+void xlr_teardown_msi_irq(unsigned int irq)
 {
 }
 
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+int xlr_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
struct msi_msg msg;
struct pci_dev *lnk;
@@ -263,6 +263,17 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct 
msi_desc *desc)
write_msi_msg(irq, msg);
return 0;
 }
+
+static struct msi_chip xlr_msi_chip = {
+   .setup_irq = xlr_setup_msi_irq,
+   .teardown_irq = xlr_teardown_msi_irq,
+};
+
+struct msi_chip *arch_find_msi_chip(struct pci_dev *dev)
+{
+   return xlr_msi_chip;
+}
+
 #endif
 
 /* Extra ACK needed for XLR on chip PCI controller */
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 17/22] s390/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-24 Thread Yijing Wang
Use MSI chip framework instead of arch MSI functions to configure
MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.

Signed-off-by: Yijing Wang wangyij...@huawei.com
Acked-by: Sebastian Ott seb...@linux.vnet.ibm.com
---
 arch/s390/pci/pci.c |   14 --
 1 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 552b990..da5316e 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -358,7 +358,7 @@ static void zpci_irq_handler(struct airq_struct *airq)
}
 }
 
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+int zpci_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 {
struct zpci_dev *zdev = get_zdev(pdev);
unsigned int hwirq, msi_vecs;
@@ -434,7 +434,7 @@ out:
return rc;
 }
 
-void arch_teardown_msi_irqs(struct pci_dev *pdev)
+static void zpci_teardown_msi_irqs(struct pci_dev *pdev)
 {
struct zpci_dev *zdev = get_zdev(pdev);
struct msi_desc *msi;
@@ -464,6 +464,16 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
airq_iv_free_bit(zpci_aisb_iv, zdev-aisb);
 }
 
+static struct msi_chip zpci_msi_chip = {
+   .setup_irqs = zpci_setup_msi_irqs,
+   .teardown_irqs = zpci_teardown_msi_irqs,
+};
+
+struct msi_chip *arch_find_msi_chip(struct pci_dev *dev)
+{
+   return zpci_msi_chip;
+}
+
 static void zpci_map_resources(struct zpci_dev *zdev)
 {
struct pci_dev *pdev = zdev-pdev;
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 19/22] IA64/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-24 Thread Yijing Wang
Use MSI chip framework instead of arch MSI functions to configure
MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.

Signed-off-by: Yijing Wang wangyij...@huawei.com
---
 arch/ia64/kernel/msi_ia64.c |   18 ++
 1 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 4efe748..55ac859 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -112,15 +112,15 @@ static struct irq_chip ia64_msi_chip = {
 };
 
 
-int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+static int arch_ia64_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
if (platform_setup_msi_irq)
-   return platform_setup_msi_irq(pdev, desc);
+   return platform_setup_msi_irq(dev, desc);
 
-   return ia64_setup_msi_irq(pdev, desc);
+   return ia64_setup_msi_irq(dev, desc);
 }
 
-void arch_teardown_msi_irq(unsigned int irq)
+static void arch_ia64_teardown_msi_irq(unsigned int irq)
 {
if (platform_teardown_msi_irq)
return platform_teardown_msi_irq(irq);
@@ -128,6 +128,16 @@ void arch_teardown_msi_irq(unsigned int irq)
return ia64_teardown_msi_irq(irq);
 }
 
+static struct msi_chip chip = {
+   .setup_irq = arch_ia64_setup_msi_irq,
+   .teardown_irq = arch_ia64_teardown_msi_irq,
+};
+
+struct msi_chip *arch_find_msi_chip(struct pci_dev *dev)
+{
+   return chip;
+}
+
 #ifdef CONFIG_INTEL_IOMMU
 #ifdef CONFIG_SMP
 static int dmar_msi_set_affinity(struct irq_data *data,
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 20/22] Sparc/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-24 Thread Yijing Wang
Use MSI chip framework instead of arch MSI functions to configure
MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.

Signed-off-by: Yijing Wang wangyij...@huawei.com
---
 arch/sparc/kernel/pci.c |   14 --
 1 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index b36365f..2a89ee2 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -905,7 +905,7 @@ int pci_domain_nr(struct pci_bus *pbus)
 EXPORT_SYMBOL(pci_domain_nr);
 
 #ifdef CONFIG_PCI_MSI
-int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+int sparc_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
 {
struct pci_pbm_info *pbm = pdev-dev.archdata.host_controller;
unsigned int irq;
@@ -916,7 +916,7 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct 
msi_desc *desc)
return pbm-setup_msi_irq(irq, pdev, desc);
 }
 
-void arch_teardown_msi_irq(unsigned int irq)
+void sparc_teardown_msi_irq(unsigned int irq)
 {
struct msi_desc *entry = irq_get_msi_desc(irq);
struct pci_dev *pdev = entry-dev;
@@ -925,6 +925,16 @@ void arch_teardown_msi_irq(unsigned int irq)
if (pbm-teardown_msi_irq)
pbm-teardown_msi_irq(irq, pdev);
 }
+
+static struct msi_chip sparc_msi_chip = {
+   .setup_irq = sparc_setup_msi_irq,
+   .teardown_irq = sparc_teardown_msi_irq,
+};
+
+struct msi_chip *arch_find_msi_chip(struct pci_dev *dev)
+{
+   return sparc_msi_chip;
+}
 #endif /* !(CONFIG_PCI_MSI) */
 
 static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit)
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 21/22] tile/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-24 Thread Yijing Wang
Use MSI chip framework instead of arch MSI functions to configure
MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.

Signed-off-by: Yijing Wang wangyij...@huawei.com
---
 arch/tile/kernel/pci_gx.c |   14 --
 1 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index e39f9c5..4912b75 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -1485,7 +1485,7 @@ static struct irq_chip tilegx_msi_chip = {
/* TBD: support set_affinity. */
 };
 
-int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+static int tile_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
 {
struct pci_controller *controller;
gxio_trio_context_t *trio_context;
@@ -1604,7 +1604,17 @@ is_64_failure:
return ret;
 }
 
-void arch_teardown_msi_irq(unsigned int irq)
+void tile_teardown_msi_irq(unsigned int irq)
 {
irq_free_hwirq(irq);
 }
+
+static struct msi_chip tile_msi_chip = {
+   .setup_irq = tile_setup_msi_irq,
+   .teardown_irq = tile_teardown_msi_irq,
+};
+
+struct msi_chip *arch_find_msi_chip(struct pci_dev *dev)
+{
+   return tile_msi_chip;
+}
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 22/22] PCI/MSI: Clean up unused MSI arch functions

2014-09-24 Thread Yijing Wang
Now we use struct msi_chip in all platforms to configure
MSI/MSI-X. We can clean up the unused arch functions.

Signed-off-by: Yijing Wang wangyij...@huawei.com
Reviewed-by: Lucas Stach l.st...@pengutronix.de
---
 drivers/iommu/irq_remapping.c |2 +-
 drivers/pci/msi.c |  100 +++-
 include/linux/msi.h   |   14 --
 3 files changed, 39 insertions(+), 77 deletions(-)

diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 99b1c0f..6e645f0 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -92,7 +92,7 @@ error:
 
/*
 * Restore altered MSI descriptor fields and prevent just destroyed
-* IRQs from tearing down again in default_teardown_msi_irqs()
+* IRQs from tearing down again in teardown_msi_irqs()
 */
msidesc-irq = 0;
msidesc-nvec_used = 0;
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index d10edee..9fe427f 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -34,51 +34,31 @@ struct msi_chip * __weak arch_find_msi_chip(struct pci_dev 
*dev)
return dev-bus-msi;
 }
 
-int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
-{
-   struct msi_chip *chip = arch_find_msi_chip(dev);
-   int err;
-
-   if (!chip || !chip-setup_irq)
-   return -EINVAL;
-
-   err = chip-setup_irq(dev, desc);
-   if (err  0)
-   return err;
-
-   return 0;
-}
-
-void __weak arch_teardown_msi_irq(unsigned int irq)
-{
-   struct msi_desc *entry = irq_get_msi_desc(irq);
-   struct msi_chip *chip = entry-dev-bus-msi;
-
-   if (!chip || !chip-teardown_irq)
-   return;
-
-   chip-teardown_irq(irq);
-}
-
-int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+int setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
struct msi_desc *entry;
int ret;
struct msi_chip *chip;
 
chip = arch_find_msi_chip(dev);
-   if (chip  chip-setup_irqs)
+   if (!chip)
+   return -EINVAL;
+
+   if (chip-setup_irqs)
return chip-setup_irqs(dev, nvec, type);
 
/*
 * If an architecture wants to support multiple MSI, it needs to
-* override arch_setup_msi_irqs()
+* implement chip-setup_irqs().
 */
if (type == PCI_CAP_ID_MSI  nvec  1)
return 1;
 
+   if (!chip-setup_irq)
+   return -EINVAL;
+
list_for_each_entry(entry, dev-msi_list, list) {
-   ret = arch_setup_msi_irq(dev, entry);
+   ret = chip-setup_irq(dev, entry);
if (ret  0)
return ret;
if (ret  0)
@@ -88,13 +68,20 @@ int __weak arch_setup_msi_irqs(struct pci_dev *dev, int 
nvec, int type)
return 0;
 }
 
-/*
- * We have a default implementation available as a separate non-weak
- * function, as it is used by the Xen x86 PCI code
- */
-void default_teardown_msi_irqs(struct pci_dev *dev)
+static void teardown_msi_irqs(struct pci_dev *dev)
 {
struct msi_desc *entry;
+   struct msi_chip *chip;
+
+   chip = arch_find_msi_chip(dev);
+   if (!chip)
+   return;
+
+   if (chip-teardown_irqs)
+   return chip-teardown_irqs(dev);
+
+   if (!chip-teardown_irq)
+   return;
 
list_for_each_entry(entry, dev-msi_list, list) {
int i, nvec;
@@ -105,20 +92,10 @@ void default_teardown_msi_irqs(struct pci_dev *dev)
else
nvec = 1  entry-msi_attrib.multiple;
for (i = 0; i  nvec; i++)
-   arch_teardown_msi_irq(entry-irq + i);
+   chip-teardown_irq(entry-irq + i);
}
 }
 
-void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
-{
-   struct msi_chip *chip = arch_find_msi_chip(dev);
-
-   if (chip  chip-teardown_irqs)
-   return chip-teardown_irqs(dev);
-
-   return default_teardown_msi_irqs(dev);
-}
-
 static void default_restore_msi_irq(struct pci_dev *dev, int irq)
 {
struct msi_desc *entry;
@@ -137,10 +114,18 @@ static void default_restore_msi_irq(struct pci_dev *dev, 
int irq)
write_msi_msg(irq, entry-msg);
 }
 
-void __weak arch_restore_msi_irqs(struct pci_dev *dev)
+static void default_restore_msi_irqs(struct pci_dev *dev)
 {
-   struct msi_chip *chip = arch_find_msi_chip(dev);
+   struct msi_desc *entry = NULL;
+
+   list_for_each_entry(entry, dev-msi_list, list) {
+   default_restore_msi_irq(dev, entry-irq);
+   }
+}
 
+static void restore_msi_irqs(struct pci_dev *dev)
+{
+   struct msi_chip *chip = arch_find_msi_chip(dev);
if (chip  chip-restore_irqs)
return chip-restore_irqs(dev);
 
@@ -249,15 +234,6 @@ void unmask_msi_irq(struct irq_data *data)
msi_set_mask_bit(data, 0);
 }
 

[PATCH v2 08/22] x86/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-24 Thread Yijing Wang
Use MSI chip framework instead of arch MSI functions to configure
MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.

Signed-off-by: Yijing Wang wangyij...@huawei.com
---
 arch/x86/include/asm/pci.h |1 +
 arch/x86/kernel/apic/io_apic.c |   12 
 2 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 0892ea0..878a06d 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -101,6 +101,7 @@ void native_teardown_msi_irq(unsigned int irq);
 void native_restore_msi_irqs(struct pci_dev *dev);
 int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
  unsigned int irq_base, unsigned int irq_offset);
+extern struct msi_chip *x86_msi_chip;
 #else
 #define native_setup_msi_irqs  NULL
 #define native_teardown_msi_irqNULL
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 2a2ec28..882b95e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3337,6 +3337,18 @@ int default_setup_hpet_msi(unsigned int irq, unsigned 
int id)
 }
 #endif
 
+struct msi_chip apic_msi_chip = {
+   .setup_irqs = native_setup_msi_irqs,
+   .teardown_irq = native_teardown_msi_irq,
+};
+
+struct msi_chip *arch_find_msi_chip(struct pci_dev *dev)
+{
+   return x86_msi_chip;
+}
+
+struct msi_chip *x86_msi_chip = apic_msi_chip;
+
 #endif /* CONFIG_PCI_MSI */
 /*
  * Hypertransport interrupt support
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [2/5] powerpc/eeh: Add eeh_pe_state sysfs entry

2014-09-24 Thread Michael Ellerman
On Sun, 2014-17-08 at 03:02:26 UTC, Gavin Shan wrote:
 The patch adds sysfs entry eeh_pe_state. Reading on it returns
 the PE's state while writing to it clears the frozen state. It's
 used to check or clear the PE frozen state from userland for
 debugging purpose.
 
 diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
 index e2595ba..e69bcbb 100644
 --- a/arch/powerpc/kernel/eeh_sysfs.c
 +++ b/arch/powerpc/kernel/eeh_sysfs.c
 @@ -54,6 +54,63 @@ EEH_SHOW_ATTR(eeh_mode,mode,
 0x%x);
  EEH_SHOW_ATTR(eeh_config_addr, config_addr, 0x%x);
  EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  0x%x);
  
 +static ssize_t eeh_pe_state_show(struct device *dev,
 +  struct device_attribute *attr, char *buf)
 +{
 + struct pci_dev *pdev = to_pci_dev(dev);
 + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
 + int state;
 +
 + if (!edev || !edev-pe)
 + return 0;
 +
 + state = eeh_ops-get_state(edev-pe, NULL);
 + return sprintf(buf, PHB#%d-PE#%d: 0x%08x 0x%08x\n,
 +edev-pe-phb-global_number,
 +edev-pe-addr, state, edev-pe-state);

Shouldn't this only display the state, ie not the number and addr etc.

And why are there two states, state and edev-pe-state ?

 +static ssize_t eeh_pe_state_store(struct device *dev,
 +   struct device_attribute *attr,
 +   const char *buf, size_t count)
 +{
 + struct pci_dev *pdev = to_pci_dev(dev);
 + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
 + int ret;
 +
 + if (!edev || !edev-pe)
 + return 0;

Shouldn't that be an error?

 + /* Nothing to do if it's not frozen */
 + if (!(edev-pe-state  EEH_PE_ISOLATED))
 + return 0;
 +
 + /* Enable MMIO */
 + ret = eeh_pci_enable(edev-pe, EEH_OPT_THAW_MMIO);
 + if (ret) {
 + pr_warn(%s: Failure %d enabling MMIO for PHB#%d-PE#%d\n,
 + __func__, ret, edev-pe-phb-global_number,
 + edev-pe-addr);
 + return 0;

Error ?

 + }
 +
 + /* Enable DMA */
 + ret = eeh_pci_enable(edev-pe, EEH_OPT_THAW_DMA);
 + if (ret) {
 + pr_warn(%s: Failure %d enabling DMA for PHB#%d-PE#%d\n,
 + __func__, ret, edev-pe-phb-global_number,
 + edev-pe-addr);
 + return 0;

Error?

And should it roll back, ie. unthaw MMIO?


cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/3] powerpc/eeh: Dump PCI config space for all child devices

2014-09-24 Thread Gavin Shan
The PEs can be organized as nested. Current implementation doesn't
dump PCI config space for subordinate devices of child PEs. However,
the frozen PE could be caused by those subordinate devices of its
child PEs.

The patch dumps PCI config space for all subordinate devices of the
problematic PE.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/eeh.c | 35 ---
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 6c88d781..03d75cb 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -117,7 +117,7 @@ static DEFINE_MUTEX(eeh_dev_mutex);
  * not dynamically alloced, so that it ends up in RMO where RTAS
  * can access it.
  */
-#define EEH_PCI_REGS_LOG_LEN 4096
+#define EEH_PCI_REGS_LOG_LEN 8192
 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
 
 /*
@@ -148,16 +148,12 @@ static int __init eeh_setup(char *str)
 }
 __setup(eeh=, eeh_setup);
 
-/**
- * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
- * @edev: device to report data for
- * @buf: point to buffer in which to log
- * @len: amount of room in buffer
- *
- * This routine captures assorted PCI configuration space data,
- * and puts them into a buffer for RTAS error logging.
+/*
+ * This routine captures assorted PCI configuration space data
+ * for the indicated PCI device, and puts them into a buffer
+ * for RTAS error logging.
  */
-static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
+static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 {
struct device_node *dn = eeh_dev_to_of_node(edev);
u32 cfg;
@@ -255,6 +251,19 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, 
char *buf, size_t len)
return n;
 }
 
+static void *eeh_dump_pe_log(void *data, void *flag)
+{
+   struct eeh_pe *pe = data;
+   struct eeh_dev *edev, *tmp;
+   size_t *plen = flag;
+
+   eeh_pe_for_each_dev(pe, edev, tmp)
+   *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
+ EEH_PCI_REGS_LOG_LEN - *plen);
+
+   return NULL;
+}
+
 /**
  * eeh_slot_error_detail - Generate combined log including driver log and 
error log
  * @pe: EEH PE
@@ -268,7 +277,6 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, 
char *buf, size_t len)
 void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 {
size_t loglen = 0;
-   struct eeh_dev *edev, *tmp;
 
/*
 * When the PHB is fenced or dead, it's pointless to collect
@@ -286,10 +294,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
eeh_pe_restore_bars(pe);
 
pci_regs_buf[0] = 0;
-   eeh_pe_for_each_dev(pe, edev, tmp) {
-   loglen += eeh_gather_pci_data(edev, pci_regs_buf + 
loglen,
- EEH_PCI_REGS_LOG_LEN - 
loglen);
-   }
+   eeh_pe_traverse(pe, eeh_dump_pe_log, loglen);
}
 
eeh_ops-get_log(pe, severity, pci_regs_buf, loglen);
-- 
1.8.3.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/3] powerpc/powernv: Fetch frozen PE on top level

2014-09-24 Thread Gavin Shan
It should have been part of commit 1ad7a72c5 (powerpc/eeh: Report
frozen parent PE prior to child PE). There are 2 ways to report
EEH errors: proactively polling triggered by PCI config or IO
accesses, or interrupt driven event. We missed to report and handle
parent frozen PE prior to child frozen PE for the later case on
PowerNV platform.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/powernv/eeh-ioda.c | 48 ++-
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c 
b/arch/powerpc/platforms/powernv/eeh-ioda.c
index f248586..57de63c 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -880,14 +880,12 @@ static int ioda_eeh_get_pe(struct pci_controller *hose,
 * the master PE because slave PE is invisible
 * to EEH core.
 */
-   if (phb-get_pe_state) {
-   pnv_pe = phb-ioda.pe_array[pe_no];
-   if (pnv_pe-flags  PNV_IODA_PE_SLAVE) {
-   pnv_pe = pnv_pe-master;
-   WARN_ON(!pnv_pe ||
-   !(pnv_pe-flags  PNV_IODA_PE_MASTER));
-   pe_no = pnv_pe-pe_number;
-   }
+   pnv_pe = phb-ioda.pe_array[pe_no];
+   if (pnv_pe-flags  PNV_IODA_PE_SLAVE) {
+   pnv_pe = pnv_pe-master;
+   WARN_ON(!pnv_pe ||
+   !(pnv_pe-flags  PNV_IODA_PE_MASTER));
+   pe_no = pnv_pe-pe_number;
}
 
/* Find the PE according to PE# */
@@ -898,15 +896,37 @@ static int ioda_eeh_get_pe(struct pci_controller *hose,
if (!dev_pe)
return -EEXIST;
 
-   /*
-* At this point, we're sure the compound PE should
-* be put into frozen state.
-*/
+   /* Freeze the (compound) PE */
*pe = dev_pe;
-   if (phb-freeze_pe 
-   !(dev_pe-state  EEH_PE_ISOLATED))
+   if (!(dev_pe-state  EEH_PE_ISOLATED))
phb-freeze_pe(phb, pe_no);
 
+   /*
+* At this point, we're sure the (compound) PE should
+* have been frozen. However, we still need poke until
+* hitting the frozen PE on top level.
+*/
+   dev_pe = dev_pe-parent;
+   while (dev_pe  !(dev_pe-type  EEH_PE_PHB)) {
+   int ret;
+   int active_flags = (EEH_STATE_MMIO_ACTIVE |
+   EEH_STATE_DMA_ACTIVE);
+
+   ret = eeh_ops-get_state(dev_pe, NULL);
+   if (ret = 0 || (ret  active_flags) == active_flags) {
+   dev_pe = dev_pe-parent;
+   continue;
+   }
+
+   /* Frozen parent PE */
+   *pe = dev_pe;
+   if (!(dev_pe-state  EEH_PE_ISOLATED))
+   phb-freeze_pe(phb, dev_pe-addr);
+
+   /* Next one */
+   dev_pe = dev_pe-parent;
+   }
+
return 0;
 }
 
-- 
1.8.3.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC PATCH] powerpc/powernv: Fix dma_ops for bypass window

2014-09-24 Thread Gavin Shan
When using bypass window on IODA2, the incorrect DMA operations
dma_iommu_ops is used by devices. The device driver calls
dma_get_required_mask() to determine using 32-bits or bypass DMA
window. Unfortunately, the returned DMA mask always forces the
driver to use 32-bits DMA window. The problem was reported on
the device as follows:

0004:03:00.0 0107: 1000:0087 (rev 05)
0004:03:00.0 Serial Attached SCSI controller: LSI Logic / Symbios \
 Logic SAS2308 PCI-Express Fusion-MPT SAS-2 (rev 05)

The patch fixes above issue by keeping things consistent: when
enabling bypass window, we switch to dma_direct_ops. Instead,
switch to pci_dma_ops when disabling bypass window.

Reported-by: Murali N. Iyer mni...@us.ibm.com
Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 76 +++
 1 file changed, 46 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 36b1a7a..60e44d9 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -883,13 +883,29 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
set_dma_offset(pdev-dev, pe-tce_bypass_base);
} else {
dev_info(pdev-dev, Using 32-bit DMA via iommu\n);
-   set_dma_ops(pdev-dev, dma_iommu_ops);
+   set_dma_ops(pdev-dev, get_pci_dma_ops());
set_iommu_table_base(pdev-dev, pe-tce32_table);
}
*pdev-dev.dma_mask = dma_mask;
return 0;
 }
 
+static void pnv_ioda_setup_dev_dma(struct pnv_ioda_pe *pe,
+  struct pci_dev *pdev,
+  bool add_to_iommu_group)
+{
+   if (pe-tce_bypass_enabled) {
+   set_dma_ops(pdev-dev, dma_direct_ops);
+   set_dma_offset(pdev-dev, pe-tce_bypass_base);
+   } else {
+   set_dma_ops(pdev-dev, get_pci_dma_ops());
+   set_iommu_table_base(pdev-dev, pe-tce32_table);
+   }
+
+   if (add_to_iommu_group)
+   iommu_add_device(pdev-dev);
+}
+
 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
   struct pci_bus *bus,
   bool add_to_iommu_group)
@@ -897,11 +913,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
struct pci_dev *dev;
 
list_for_each_entry(dev, bus-devices, bus_list) {
-   if (add_to_iommu_group)
-   set_iommu_table_base_and_group(dev-dev,
-  pe-tce32_table);
-   else
-   set_iommu_table_base(dev-dev, pe-tce32_table);
+   pnv_ioda_setup_dev_dma(pe, dev, add_to_iommu_group);
 
if (dev-subordinate)
pnv_ioda_setup_bus_dma(pe, dev-subordinate,
@@ -909,6 +921,15 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
}
 }
 
+static void pnv_ioda_setup_pe_dma(struct pnv_ioda_pe *pe,
+ bool add_to_iommu_group)
+{
+   if (pe-pdev)
+   pnv_ioda_setup_dev_dma(pe, pe-pdev, add_to_iommu_group);
+   else
+   pnv_ioda_setup_bus_dma(pe, pe-pbus, add_to_iommu_group);
+}
+
 static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
 struct iommu_table *tbl,
 __be64 *startp, __be64 *endp, bool rm)
@@ -1080,11 +1101,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
*phb,
iommu_init_table(tbl, phb-hose-node);
iommu_register_group(tbl, phb-hose-global_number, pe-pe_number);
 
-   if (pe-pdev)
-   set_iommu_table_base_and_group(pe-pdev-dev, tbl);
-   else
-   pnv_ioda_setup_bus_dma(pe, pe-pbus, true);
-
+   pnv_ioda_setup_pe_dma(pe, true);
return;
  fail:
/* XXX Failure: Try to fallback to 64-bit only ? */
@@ -1101,7 +1118,13 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table 
*tbl, bool enable)
uint16_t window_id = (pe-pe_number  1 ) + 1;
int64_t rc;
 
+   /* Check if we really need do something */
+   if (pe-tce_bypass_enabled == enable)
+   return;
+
pe_info(pe, %sabling 64-bit DMA bypass\n, enable ? En : Dis);
+   pe-tce_bypass_enabled = enable;
+
if (enable) {
phys_addr_t top = memblock_end_of_DRAM();
 
@@ -1117,22 +1140,15 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table 
*tbl, bool enable)
 window_id,
 pe-tce_bypass_base,
 0);
-
-   /*
-* EEH needs the mapping between IOMMU table and group
-* of those VFIO/KVM 

Re: [1/4] powerpc/powernv: Sync header with firmware

2014-09-24 Thread Michael Ellerman
On Tue, 2014-26-08 at 07:56:16 UTC, Gavin Shan wrote:
 From: Mike Qiu qiud...@linux.vnet.ibm.com
 
 The patch synchronizes firmware header file (opal.h) for PCI error
 injection.
 
 diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
 index 4593a93..9113653 100644
 --- a/arch/powerpc/include/asm/opal.h
 +++ b/arch/powerpc/include/asm/opal.h
 @@ -200,6 +201,33 @@ enum OpalPciErrorSeverity {
   OPAL_EEH_SEV_INF= 5
  };
  
 +enum OpalErrinjctType {
 + OpalErrinjctTypeIoaBusError = 0,
 + OpalErrinjctTypeIoaBusError64   = 1,
 +
 + /* IoaBusError  IoaBusError64 */
 + OpalEjtIoaLoadMemAddr   = 0,
 + OpalEjtIoaLoadMemData   = 1,
 + OpalEjtIoaLoadIoAddr= 2,
 + OpalEjtIoaLoadIoData= 3,
 + OpalEjtIoaLoadConfigAddr= 4,
 + OpalEjtIoaLoadConfigData= 5,
 + OpalEjtIoaStoreMemAddr  = 6,
 + OpalEjtIoaStoreMemData  = 7,
 + OpalEjtIoaStoreIoAddr   = 8,
 + OpalEjtIoaStoreIoData   = 9,
 + OpalEjtIoaStoreConfigAddr   = 10,
 + OpalEjtIoaStoreConfigData   = 11,
 + OpalEjtIoaDmaReadMemAddr= 12,
 + OpalEjtIoaDmaReadMemData= 13,
 + OpalEjtIoaDmaReadMemMaster  = 14,
 + OpalEjtIoaDmaReadMemTarget  = 15,
 + OpalEjtIoaDmaWriteMemAddr   = 16,
 + OpalEjtIoaDmaWriteMemData   = 17,
 + OpalEjtIoaDmaWriteMemMaster = 18,
 + OpalEjtIoaDmaWriteMemTarget = 19,
 +};

I realise these come from the skiboot source, but they're just too ugly.

Please use kernel style naming, like most of the rest of the file, eg:

OPAL_ERR_INJECT_IOA_BUS_ERR

Also this enum seems to contain two separate types, the first two values are
the type, and the rest are functions.

The only usage I see is:

/* Sanity check on error type */
if (type  OpalErrinjctTypeIoaBusError   ||
type  OpalErrinjctTypeIoaBusError64 ||
function  OpalEjtIoaLoadMemAddr ||
function  OpalEjtIoaDmaWriteMemTarget) {
pr_warn(%s: Invalid error type %d-%d\n,
__func__, type, function);
return -ERANGE;
}

So we could also just do:

# define OPAL_ERR_INJECT_TYPE_MIN   0
# define OPAL_ERR_INJECT_TYPE_MAX   1

# define OPAL_ERR_INJECT_FUNC_MIN   0
# define OPAL_ERR_INJECT_FUNC_MAX   19

if (type  OPAL_ERR_INJECT_TYPE_MIN ||
type  OPAL_ERR_INJECT_TYPE_MAX ||
function  OPAL_ERR_INJECT_FUNC_MIN ||
function  OPAL_ERR_INJECT_FUNC_MIN)
{
pr_warn(%s: Invalid error type %d-%d\n, __func__, type, 
function);
return -ERANGE;
}


cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [2/5] powerpc/eeh: Add eeh_pe_state sysfs entry

2014-09-24 Thread Gavin Shan
On Thu, Sep 25, 2014 at 02:09:58PM +1000, Michael Ellerman wrote:
On Sun, 2014-17-08 at 03:02:26 UTC, Gavin Shan wrote:
 The patch adds sysfs entry eeh_pe_state. Reading on it returns
 the PE's state while writing to it clears the frozen state. It's
 used to check or clear the PE frozen state from userland for
 debugging purpose.
 
 diff --git a/arch/powerpc/kernel/eeh_sysfs.c 
 b/arch/powerpc/kernel/eeh_sysfs.c
 index e2595ba..e69bcbb 100644
 --- a/arch/powerpc/kernel/eeh_sysfs.c
 +++ b/arch/powerpc/kernel/eeh_sysfs.c
 @@ -54,6 +54,63 @@ EEH_SHOW_ATTR(eeh_mode,mode,
 0x%x);
  EEH_SHOW_ATTR(eeh_config_addr, config_addr, 0x%x);
  EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  0x%x);
  
 +static ssize_t eeh_pe_state_show(struct device *dev,
 + struct device_attribute *attr, char *buf)
 +{
 +struct pci_dev *pdev = to_pci_dev(dev);
 +struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
 +int state;
 +
 +if (!edev || !edev-pe)
 +return 0;
 +
 +state = eeh_ops-get_state(edev-pe, NULL);
 +return sprintf(buf, PHB#%d-PE#%d: 0x%08x 0x%08x\n,
 +   edev-pe-phb-global_number,
 +   edev-pe-addr, state, edev-pe-state);

Shouldn't this only display the state, ie not the number and addr etc.


Yes, I'll remove PHB#%d-PE#%d in next revision. Another sysfs entry
gives the PE number: /sys/bus/pci/devices/:xx:xx.x/eeh_pe_config_addr

And why are there two states, state and edev-pe-state ?


state is from hardware, edev-pe-state is software maintained state.

 +static ssize_t eeh_pe_state_store(struct device *dev,
 +  struct device_attribute *attr,
 +  const char *buf, size_t count)
 +{
 +struct pci_dev *pdev = to_pci_dev(dev);
 +struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
 +int ret;
 +
 +if (!edev || !edev-pe)
 +return 0;

Shouldn't that be an error?

 +/* Nothing to do if it's not frozen */
 +if (!(edev-pe-state  EEH_PE_ISOLATED))
 +return 0;
 +
 +/* Enable MMIO */
 +ret = eeh_pci_enable(edev-pe, EEH_OPT_THAW_MMIO);
 +if (ret) {
 +pr_warn(%s: Failure %d enabling MMIO for PHB#%d-PE#%d\n,
 +__func__, ret, edev-pe-phb-global_number,
 +edev-pe-addr);
 +return 0;

Error ?

 +}
 +
 +/* Enable DMA */
 +ret = eeh_pci_enable(edev-pe, EEH_OPT_THAW_DMA);
 +if (ret) {
 +pr_warn(%s: Failure %d enabling DMA for PHB#%d-PE#%d\n,
 +__func__, ret, edev-pe-phb-global_number,
 +edev-pe-addr);
 +return 0;

Error?


Yes, I'll fix all Error cases.

And should it roll back, ie. unthaw MMIO?


It's not necessary as it's only for debugging purpose. The main
purpose is to keep dumping the PE hardware/software state when
recovering one specific PE.

Thanks,
Gavin

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [1/4] powerpc/powernv: Sync header with firmware

2014-09-24 Thread Gavin Shan
On Thu, Sep 25, 2014 at 02:27:47PM +1000, Michael Ellerman wrote:
On Tue, 2014-26-08 at 07:56:16 UTC, Gavin Shan wrote:
 From: Mike Qiu qiud...@linux.vnet.ibm.com
 
 The patch synchronizes firmware header file (opal.h) for PCI error
 injection.
 
 diff --git a/arch/powerpc/include/asm/opal.h 
 b/arch/powerpc/include/asm/opal.h
 index 4593a93..9113653 100644
 --- a/arch/powerpc/include/asm/opal.h
 +++ b/arch/powerpc/include/asm/opal.h
 @@ -200,6 +201,33 @@ enum OpalPciErrorSeverity {
  OPAL_EEH_SEV_INF= 5
  };
  
 +enum OpalErrinjctType {
 +OpalErrinjctTypeIoaBusError = 0,
 +OpalErrinjctTypeIoaBusError64   = 1,
 +
 +/* IoaBusError  IoaBusError64 */
 +OpalEjtIoaLoadMemAddr   = 0,
 +OpalEjtIoaLoadMemData   = 1,
 +OpalEjtIoaLoadIoAddr= 2,
 +OpalEjtIoaLoadIoData= 3,
 +OpalEjtIoaLoadConfigAddr= 4,
 +OpalEjtIoaLoadConfigData= 5,
 +OpalEjtIoaStoreMemAddr  = 6,
 +OpalEjtIoaStoreMemData  = 7,
 +OpalEjtIoaStoreIoAddr   = 8,
 +OpalEjtIoaStoreIoData   = 9,
 +OpalEjtIoaStoreConfigAddr   = 10,
 +OpalEjtIoaStoreConfigData   = 11,
 +OpalEjtIoaDmaReadMemAddr= 12,
 +OpalEjtIoaDmaReadMemData= 13,
 +OpalEjtIoaDmaReadMemMaster  = 14,
 +OpalEjtIoaDmaReadMemTarget  = 15,
 +OpalEjtIoaDmaWriteMemAddr   = 16,
 +OpalEjtIoaDmaWriteMemData   = 17,
 +OpalEjtIoaDmaWriteMemMaster = 18,
 +OpalEjtIoaDmaWriteMemTarget = 19,
 +};

I realise these come from the skiboot source, but they're just too ugly.

Please use kernel style naming, like most of the rest of the file, eg:

   OPAL_ERR_INJECT_IOA_BUS_ERR

Also this enum seems to contain two separate types, the first two values are
the type, and the rest are functions.


Yes, two separate types: One is major error type, another one is
specific error type in that domain.

The only usage I see is:

   /* Sanity check on error type */
   if (type  OpalErrinjctTypeIoaBusError   ||
   type  OpalErrinjctTypeIoaBusError64 ||
   function  OpalEjtIoaLoadMemAddr ||
   function  OpalEjtIoaDmaWriteMemTarget) {
   pr_warn(%s: Invalid error type %d-%d\n,
   __func__, type, function);
   return -ERANGE;
   }

So we could also just do:

# define OPAL_ERR_INJECT_TYPE_MIN  0
# define OPAL_ERR_INJECT_TYPE_MAX  1

# define OPAL_ERR_INJECT_FUNC_MIN  0
# define OPAL_ERR_INJECT_FUNC_MAX  19

   if (type  OPAL_ERR_INJECT_TYPE_MIN ||
   type  OPAL_ERR_INJECT_TYPE_MAX ||
   function  OPAL_ERR_INJECT_FUNC_MIN ||
   function  OPAL_ERR_INJECT_FUNC_MIN)
   {
   pr_warn(%s: Invalid error type %d-%d\n, __func__, type, 
 function);
   return -ERANGE;
   }


Ok. I'll take this and put it into next revision.

Thanks,
Gavin

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc: Print instruction when logging unhandled exceptions

2014-09-24 Thread Anton Blanchard
It is often useful to see the instruction that caused an unhandled
exception.

Signed-off-by: Anton Blanchard an...@samba.org
---
 arch/powerpc/kernel/traps.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 0dc43f9..27e30c8 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -36,10 +36,10 @@
 #include linux/debugfs.h
 #include linux/ratelimit.h
 #include linux/context_tracking.h
+#include linux/uaccess.h
 
 #include asm/emulated_ops.h
 #include asm/pgtable.h
-#include asm/uaccess.h
 #include asm/io.h
 #include asm/machdep.h
 #include asm/rtas.h
@@ -242,9 +242,9 @@ void _exception(int signr, struct pt_regs *regs, int code, 
unsigned long addr)
 {
siginfo_t info;
const char fmt32[] = KERN_INFO %s[%d]: unhandled signal %d  \
-   at %08lx nip %08lx lr %08lx code %x\n;
+   at %08lx nip %08lx lr %08lx code %x insn %08x\n;
const char fmt64[] = KERN_INFO %s[%d]: unhandled signal %d  \
-   at %016lx nip %016lx lr %016lx code %x\n;
+   at %016lx nip %016lx lr %016lx code %x insn %08x\n;
 
if (!user_mode(regs)) {
die(Exception in kernel mode, regs, signr);
@@ -252,9 +252,18 @@ void _exception(int signr, struct pt_regs *regs, int code, 
unsigned long addr)
}
 
if (show_unhandled_signals  unhandled_signal(current, signr)) {
+   u32 __user *nia = (u32 __user *)regs-nip;
+   u32 insn = 0;
+
+   pagefault_disable();
+   if (!access_ok(VERIFY_READ, nia, sizeof(*nia)) ||
+   __get_user_inatomic(insn, nia))
+   insn = 0xUL;
+   pagefault_enable();
+
printk_ratelimited(regs-msr  MSR_64BIT ? fmt64 : fmt32,
   current-comm, current-pid, signr,
-  addr, regs-nip, regs-link, code);
+  addr, regs-nip, regs-link, code, insn);
}
 
if (arch_irqs_disabled()  !arch_irq_disabled_regs(regs))
-- 
1.9.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 03/13] powerpc/spapr: vfio: Implement spapr_tce_iommu_ops

2014-09-24 Thread Alexey Kardashevskiy
On 09/24/2014 06:42 AM, Alex Williamson wrote:
 On Tue, 2014-09-23 at 13:00 +1000, Alexey Kardashevskiy wrote:
 Modern IBM POWERPC systems support multiple IOMMU tables per PE
 so we need a more reliable way (compared to container_of()) to get
 a PE pointer from the iommu_table struct pointer used in IOMMU functions.

 At the moment IOMMU group data points to an iommu_table struct. This
 introduces a spapr_tce_iommu_group struct which keeps an iommu_owner
 and a spapr_tce_iommu_ops struct. For IODA, iommu_owner is a pointer to
 the pnv_ioda_pe struct, for others it is still a pointer to
 the iommu_table struct. The ops structs correspond to the type which
 iommu_owner points to.

 This defines a get_table() callback which returns an iommu_table
 by its number.

 As the IOMMU group data pointer points to variable type instead of
 iommu_table, VFIO SPAPR TCE driver is updated to use the new type.
 This changes the tce_container struct to store iommu_group instead of
 iommu_table.

 So, it was:
 - iommu_table points to iommu_group via iommu_table::it_group;
 - iommu_group points to iommu_table via iommu_group_get_iommudata();

 now it is:
 - iommu_table points to iommu_group via iommu_table::it_group;
 - iommu_group points to spapr_tce_iommu_group via
 iommu_group_get_iommudata();
 - spapr_tce_iommu_group points to either (depending on .get_table()):
  - iommu_table;
  - pnv_ioda_pe;

 This uses pnv_ioda1_iommu_get_table for both IODA12 but IODA2 will
 have own pnv_ioda2_iommu_get_table soon and pnv_ioda1_iommu_get_table
 will only be used for IODA1.

 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
  arch/powerpc/include/asm/iommu.h|   6 ++
  arch/powerpc/include/asm/tce.h  |  13 +++
  arch/powerpc/kernel/iommu.c |  35 ++-
  arch/powerpc/platforms/powernv/pci-ioda.c   |  31 +-
  arch/powerpc/platforms/powernv/pci-p5ioc2.c |   1 +
  arch/powerpc/platforms/powernv/pci.c|   2 +-
  arch/powerpc/platforms/pseries/iommu.c  |  10 +-
  drivers/vfio/vfio_iommu_spapr_tce.c | 148 
 ++--
  8 files changed, 208 insertions(+), 38 deletions(-)

 diff --git a/arch/powerpc/include/asm/iommu.h 
 b/arch/powerpc/include/asm/iommu.h
 index 42632c7..84ee339 100644
 --- a/arch/powerpc/include/asm/iommu.h
 +++ b/arch/powerpc/include/asm/iommu.h
 @@ -108,13 +108,19 @@ extern void iommu_free_table(struct iommu_table *tbl, 
 const char *node_name);
   */
  extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
  int nid);
 +
 +struct spapr_tce_iommu_ops;
  #ifdef CONFIG_IOMMU_API
  extern void iommu_register_group(struct iommu_table *tbl,
 + void *iommu_owner,
 + struct spapr_tce_iommu_ops *ops,
   int pci_domain_number, unsigned long pe_num);
  extern int iommu_add_device(struct device *dev);
  extern void iommu_del_device(struct device *dev);
  #else
  static inline void iommu_register_group(struct iommu_table *tbl,
 +void *iommu_owner,
 +struct spapr_tce_iommu_ops *ops,
  int pci_domain_number,
  unsigned long pe_num)
  {
 diff --git a/arch/powerpc/include/asm/tce.h b/arch/powerpc/include/asm/tce.h
 index 743f36b..9f159eb 100644
 --- a/arch/powerpc/include/asm/tce.h
 +++ b/arch/powerpc/include/asm/tce.h
 @@ -50,5 +50,18 @@
  #define TCE_PCI_READ0x1 /* read from PCI 
 allowed */
  #define TCE_VB_WRITE0x1 /* write from VB 
 allowed */
  
 +struct spapr_tce_iommu_group;
 +
 +struct spapr_tce_iommu_ops {
 +struct iommu_table *(*get_table)(
 +struct spapr_tce_iommu_group *data,
 +int num);
 +};
 +
 +struct spapr_tce_iommu_group {
 +void *iommu_owner;
 +struct spapr_tce_iommu_ops *ops;
 +};
 +
  #endif /* __KERNEL__ */
  #endif /* _ASM_POWERPC_TCE_H */
 diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
 index b378f78..1c5dae7 100644
 --- a/arch/powerpc/kernel/iommu.c
 +++ b/arch/powerpc/kernel/iommu.c
 @@ -878,24 +878,53 @@ void iommu_free_coherent(struct iommu_table *tbl, 
 size_t size,
   */
  static void group_release(void *iommu_data)
  {
 -struct iommu_table *tbl = iommu_data;
 -tbl-it_group = NULL;
 +kfree(iommu_data);
  }
  
 +static struct iommu_table *spapr_tce_default_get_table(
 +struct spapr_tce_iommu_group *data, int num)
 +{
 +struct iommu_table *tbl = data-iommu_owner;
 +
 +switch (num) {
 +case 0:
 +if (tbl-it_size)
 +return tbl;
 +/* fallthru */
 +default:
 +return NULL;
 +}
 +}
 +
 +static struct spapr_tce_iommu_ops spapr_tce_default_ops = {
 +.get_table = spapr_tce_default_get_table
 +};
 +
  void