[PATCH 0/2] powerpc: allow kvm to use kerel debug framework

2013-07-04 Thread Bharat Bhushan
From: Bharat Bhushan bharat.bhus...@freescale.com

This patchset moves the debug registers in a structure, which allows
kvm to use same structure for debug emulation.

Note: Earilier a patchset 
https://lists.ozlabs.org/pipermail/linuxppc-dev/2013-June/108132.html;
was sent which is a bunch of six patches. That patchset is divided into two 
parts:
1) powerpc specific changes (These 2 patches are actually have those 
changes)
2) KVM specific changes (will send separate patch on agraf repository)

Bharat Bhushan (2):
  powerpc: remove unnecessary line continuations
  powerpc: move debug registers in a structure

 arch/powerpc/include/asm/processor.h |   38 +
 arch/powerpc/include/asm/reg_booke.h |8 +-
 arch/powerpc/kernel/asm-offsets.c|2 +-
 arch/powerpc/kernel/process.c|   42 +-
 arch/powerpc/kernel/ptrace.c |  154 +-
 arch/powerpc/kernel/ptrace32.c   |2 +-
 arch/powerpc/kernel/signal_32.c  |6 +-
 arch/powerpc/kernel/traps.c  |   35 
 8 files changed, 147 insertions(+), 140 deletions(-)


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 1/2] powerpc: remove unnecessary line continuations

2013-07-04 Thread Bharat Bhushan
Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
 arch/powerpc/kernel/process.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index c517dbe..19b8733 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -325,7 +325,7 @@ static void set_debug_reg_defaults(struct thread_struct 
*thread)
/*
 * Force User/Supervisor bits to b11 (user-only MSR[PR]=1)
 */
-   thread-dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |   \
+   thread-dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
DBCR1_IAC3US | DBCR1_IAC4US;
/*
 * Force Data Address Compare User/Supervisor bits to be User-only
-- 
1.7.0.4


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 2/2] powerpc: move debug registers in a structure

2013-07-04 Thread Bharat Bhushan
This way we can use same data type struct with KVM and
also help in using other debug related function.

Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
 arch/powerpc/include/asm/processor.h |   38 +
 arch/powerpc/include/asm/reg_booke.h |8 +-
 arch/powerpc/kernel/asm-offsets.c|2 +-
 arch/powerpc/kernel/process.c|   42 +-
 arch/powerpc/kernel/ptrace.c |  154 +-
 arch/powerpc/kernel/ptrace32.c   |2 +-
 arch/powerpc/kernel/signal_32.c  |6 +-
 arch/powerpc/kernel/traps.c  |   35 
 8 files changed, 147 insertions(+), 140 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 47a35b0..9e9aa26 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -147,22 +147,7 @@ typedef struct {
 #define TS_FPR(i) fpr[i][TS_FPROFFSET]
 #define TS_TRANS_FPR(i) transact_fpr[i][TS_FPROFFSET]
 
-struct thread_struct {
-   unsigned long   ksp;/* Kernel stack pointer */
-   unsigned long   ksp_limit;  /* if ksp = ksp_limit stack overflow */
-
-#ifdef CONFIG_PPC64
-   unsigned long   ksp_vsid;
-#endif
-   struct pt_regs  *regs;  /* Pointer to saved register state */
-   mm_segment_tfs; /* for get_fs() validation */
-#ifdef CONFIG_BOOKE
-   /* BookE base exception scratch space; align on cacheline */
-   unsigned long   normsave[8] cacheline_aligned;
-#endif
-#ifdef CONFIG_PPC32
-   void*pgdir; /* root of page-table tree */
-#endif
+struct debug_reg {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
/*
 * The following help to manage the use of Debug Control Registers
@@ -199,6 +184,27 @@ struct thread_struct {
unsigned long   dvc2;
 #endif
 #endif
+};
+
+struct thread_struct {
+   unsigned long   ksp;/* Kernel stack pointer */
+   unsigned long   ksp_limit;  /* if ksp = ksp_limit stack overflow */
+
+#ifdef CONFIG_PPC64
+   unsigned long   ksp_vsid;
+#endif
+   struct pt_regs  *regs;  /* Pointer to saved register state */
+   mm_segment_tfs; /* for get_fs() validation */
+#ifdef CONFIG_BOOKE
+   /* BookE base exception scratch space; align on cacheline */
+   unsigned long   normsave[8] cacheline_aligned;
+#endif
+#ifdef CONFIG_PPC32
+   void*pgdir; /* root of page-table tree */
+#endif
+   /* Debug Registers */
+   struct debug_reg debug;
+
/* FP and VSX 0-31 register set */
double  fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
struct {
diff --git a/arch/powerpc/include/asm/reg_booke.h 
b/arch/powerpc/include/asm/reg_booke.h
index b417de3..455dc89 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -381,7 +381,7 @@
 #define DBCR0_IA34T0x4000  /* Instr Addr 3-4 range Toggle */
 #define DBCR0_FT   0x0001  /* Freeze Timers on debug event */
 
-#define dbcr_iac_range(task)   ((task)-thread.dbcr0)
+#define dbcr_iac_range(task)   ((task)-thread.debug.dbcr0)
 #define DBCR_IAC12IDBCR0_IA12  /* Range Inclusive */
 #define DBCR_IAC12X(DBCR0_IA12 | DBCR0_IA12X)  /* Range Exclusive */
 #define DBCR_IAC12MODE (DBCR0_IA12 | DBCR0_IA12X)  /* IAC 1-2 Mode Bits */
@@ -395,7 +395,7 @@
 #define DBCR1_DAC1W0x2000  /* DAC1 Write Debug Event */
 #define DBCR1_DAC2W0x1000  /* DAC2 Write Debug Event */
 
-#define dbcr_dac(task) ((task)-thread.dbcr1)
+#define dbcr_dac(task) ((task)-thread.debug.dbcr1)
 #define DBCR_DAC1R DBCR1_DAC1R
 #define DBCR_DAC1W DBCR1_DAC1W
 #define DBCR_DAC2R DBCR1_DAC2R
@@ -441,7 +441,7 @@
 #define DBCR0_CRET 0x0020  /* Critical Return Debug Event */
 #define DBCR0_FT   0x0001  /* Freeze Timers on debug event */
 
-#define dbcr_dac(task) ((task)-thread.dbcr0)
+#define dbcr_dac(task) ((task)-thread.debug.dbcr0)
 #define DBCR_DAC1R DBCR0_DAC1R
 #define DBCR_DAC1W DBCR0_DAC1W
 #define DBCR_DAC2R DBCR0_DAC2R
@@ -475,7 +475,7 @@
 #define DBCR1_IAC34MX  0x00C0  /* Instr Addr 3-4 range eXclusive */
 #define DBCR1_IAC34AT  0x0001  /* Instr Addr 3-4 range Toggle */
 
-#define dbcr_iac_range(task)   ((task)-thread.dbcr1)
+#define dbcr_iac_range(task)   ((task)-thread.debug.dbcr1)
 #define DBCR_IAC12IDBCR1_IAC12M/* Range Inclusive */
 #define DBCR_IAC12XDBCR1_IAC12MX   /* Range Exclusive */
 #define DBCR_IAC12MODE DBCR1_IAC12MX   /* IAC 1-2 Mode Bits */
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index c7e8afc..d56727c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -113,7 +113,7 @@ int main(void)
 #endif /* CONFIG_SPE */
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-  

RE: [PATCH 1/2] powerpc/booke64: Add LRAT error exception handler

2013-07-04 Thread Caraman Mihai Claudiu-B02008
 -Original Message-
 From: Wood Scott-B07421
 Sent: Wednesday, July 03, 2013 11:18 PM
 To: Caraman Mihai Claudiu-B02008
 Cc: linuxppc-dev@lists.ozlabs.org; kvm-...@vger.kernel.org;
 k...@vger.kernel.org; Caraman Mihai Claudiu-B02008
 Subject: Re: [PATCH 1/2] powerpc/booke64: Add LRAT error exception
 handler
 
 On 07/03/2013 11:56:05 AM, Mihai Caraman wrote:
  @@ -1410,6 +1423,7 @@ _GLOBAL(setup_doorbell_ivors)
   _GLOBAL(setup_ehv_ivors)
  SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */
  SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */
  +   SET_IVOR(42, 0x340) /* LRAT Error */
 
 What happens if we write to IVOR42 on e5500?  If the answer is no-op,
 is that behavior guaranteed on any CPU with E.HV but not LRAT?

Oops. I would rather do it __setup_cpu_e6500 in the same way we deal with 
AltiVec.

-Mike

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH 2/2] KVM: PPC: Book3E: Add LRAT error exception handler

2013-07-04 Thread Caraman Mihai Claudiu-B02008
 -Original Message-
 From: Wood Scott-B07421
 Sent: Wednesday, July 03, 2013 11:17 PM
 To: Caraman Mihai Claudiu-B02008
 Cc: linuxppc-dev@lists.ozlabs.org; kvm-...@vger.kernel.org;
 k...@vger.kernel.org; Caraman Mihai Claudiu-B02008
 Subject: Re: [PATCH 2/2] KVM: PPC: Book3E: Add LRAT error exception
 handler
 
 On 07/03/2013 11:56:06 AM, Mihai Caraman wrote:
  With LRAT (Logical to Real Address Translation) error exception
  handler in kernel
  KVM needs to add the counterpart otherwise will break the build.
 
  Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
  ---
   arch/powerpc/kvm/bookehv_interrupts.S |2 ++
   1 files changed, 2 insertions(+), 0 deletions(-)
 
 Please combine these two patches to avoid breaking bisectability.
 
 -Scott

This is a solid reason. Ben it's ok for you to apply the combined patch?
If so I will respin it.

-Mike


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH 3/6] KVM: PPC: Book3E: Increase FPU laziness

2013-07-04 Thread Caraman Mihai Claudiu-B02008


 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-
 ow...@vger.kernel.org] On Behalf Of Alexander Graf
 Sent: Wednesday, July 03, 2013 9:40 PM
 To: Wood Scott-B07421
 Cc: Caraman Mihai Claudiu-B02008; kvm-...@vger.kernel.org;
 k...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org
 Subject: Re: [PATCH 3/6] KVM: PPC: Book3E: Increase FPU laziness
 
 
 On 03.07.2013, at 20:37, Scott Wood wrote:
 
  On 07/03/2013 07:42:36 AM, Mihai Caraman wrote:
  Increase FPU laziness by calling kvmppc_load_guest_fp() just before
  returning to guest instead of each sched in. Without this improvement
  an interrupt may also claim floting point corrupting guest state.
  Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
  ---
  arch/powerpc/kvm/booke.c  |1 +
  arch/powerpc/kvm/e500mc.c |2 --
  2 files changed, 1 insertions(+), 2 deletions(-)
  diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
  index 113961f..3cae2e3 100644
  --- a/arch/powerpc/kvm/booke.c
  +++ b/arch/powerpc/kvm/booke.c
  @@ -1204,6 +1204,7 @@ int kvmppc_handle_exit(struct kvm_run *run,
 struct kvm_vcpu *vcpu,
 r = (s  2) | RESUME_HOST | (r  RESUME_FLAG_NV);
 } else {
 kvmppc_lazy_ee_enable();
  +  kvmppc_load_guest_fp(vcpu);
 }
 }
  diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
  index 19c8379..09da1ac 100644
  --- a/arch/powerpc/kvm/e500mc.c
  +++ b/arch/powerpc/kvm/e500mc.c
  @@ -143,8 +143,6 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu,
 int cpu)
 kvmppc_e500_tlbil_all(vcpu_e500);
 __get_cpu_var(last_vcpu_on_cpu) = vcpu;
 }
  -
  -  kvmppc_load_guest_fp(vcpu);
  }
  void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 
  Can we now remove vcpu-fpu_active, and the comment that says Kernel
 usage of FP (via
  enable_kernel_fp()) in this thread must not occur while vcpu-
 fpu_active is set.?
 
 I think so, yes.

Yes, as I already did this for AltiVec.

-Mike



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/2] KVM: PPC: Book3E: Add LRAT error exception handler

2013-07-04 Thread Benjamin Herrenschmidt
On Thu, 2013-07-04 at 06:47 +, Caraman Mihai Claudiu-B02008 wrote:
 This is a solid reason. Ben it's ok for you to apply the combined
 patch? If so I will respin it.

Sure, but nowadays, all that stuff goes via Scott and Alex.

Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 2/4 v6] KVM: PPC: exit to user space on ehpriv 1 instruction

2013-07-04 Thread Bharat Bhushan
ehpriv 1 instruction is used for setting software breakpoints
by user space. This patch adds support to exit to user space
with run-debug have relevant information.

As this is the first point we are using run-debug, also defined
the run-debug structure.

Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
v5-v6
 - using ehpriv 1 instread of ehpriv for software breakpoint

 arch/powerpc/include/asm/disassemble.h |4 
 arch/powerpc/include/asm/kvm_booke.h   |7 ++-
 arch/powerpc/include/uapi/asm/kvm.h|   21 +
 arch/powerpc/kvm/booke.c   |2 +-
 arch/powerpc/kvm/e500_emulate.c|   26 ++
 5 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/disassemble.h 
b/arch/powerpc/include/asm/disassemble.h
index 9b198d1..856f8de 100644
--- a/arch/powerpc/include/asm/disassemble.h
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -77,4 +77,8 @@ static inline unsigned int get_d(u32 inst)
return inst  0x;
 }
 
+static inline unsigned int get_oc(u32 inst)
+{
+   return (inst  11)  0x7fff;
+}
 #endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/asm/kvm_booke.h 
b/arch/powerpc/include/asm/kvm_booke.h
index d3c1eb3..dd8f615 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -26,7 +26,12 @@
 /* LPIDs we support with this build -- runtime limit may be lower */
 #define KVMPPC_NR_LPIDS64
 
-#define KVMPPC_INST_EHPRIV 0x7c00021c
+#define KVMPPC_INST_EHPRIV 0x7c00021c
+#define EHPRIV_OC_SHIFT11
+/* ehpriv 1 : ehpriv with OC = 1 is used for debug emulation */
+#define EHPRIV_OC_DEBUG1
+#define KVMPPC_INST_EHPRIV_DEBUG   (KVMPPC_INST_EHPRIV | \
+(EHPRIV_OC_DEBUG  EHPRIV_OC_SHIFT))
 
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 0fb1a6e..ded0607 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -269,7 +269,24 @@ struct kvm_fpu {
__u64 fpr[32];
 };
 
+/*
+ * Defines for h/w breakpoint, watchpoint (read, write or both) and
+ * software breakpoint.
+ * These are used as type in KVM_SET_GUEST_DEBUG ioctl and status
+ * for KVM_DEBUG_EXIT.
+ */
+#define KVMPPC_DEBUG_NONE  0x0
+#define KVMPPC_DEBUG_BREAKPOINT(1UL  1)
+#define KVMPPC_DEBUG_WATCH_WRITE   (1UL  2)
+#define KVMPPC_DEBUG_WATCH_READ(1UL  3)
 struct kvm_debug_exit_arch {
+   __u64 address;
+   /*
+* exiting to userspace because of h/w breakpoint, watchpoint
+* (read, write or both) and software breakpoint.
+*/
+   __u32 status;
+   __u32 reserved;
 };
 
 /* for KVM_SET_GUEST_DEBUG */
@@ -281,10 +298,6 @@ struct kvm_guest_debug_arch {
 * Type denotes h/w breakpoint, read watchpoint, write
 * watchpoint or watchpoint (both read and write).
 */
-#define KVMPPC_DEBUG_NONE  0x0
-#define KVMPPC_DEBUG_BREAKPOINT(1UL  1)
-#define KVMPPC_DEBUG_WATCH_WRITE   (1UL  2)
-#define KVMPPC_DEBUG_WATCH_READ(1UL  3)
__u32 type;
__u32 reserved;
} bp[16];
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 62d4ece..4c9f6ad 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1460,7 +1460,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
val = get_reg_val(reg-id, vcpu-arch.tsr);
break;
case KVM_REG_PPC_DEBUG_INST:
-   val = get_reg_val(reg-id, KVMPPC_INST_EHPRIV);
+   val = get_reg_val(reg-id, KVMPPC_INST_EHPRIV_DEBUG);
break;
default:
r = kvmppc_get_one_reg(vcpu, reg-id, val);
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index b10a012..6163a03 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -26,6 +26,7 @@
 #define XOP_TLBRE   946
 #define XOP_TLBWE   978
 #define XOP_TLBILX  18
+#define XOP_EHPRIV  270
 
 #ifdef CONFIG_KVM_E500MC
 static int dbell2prio(ulong param)
@@ -82,6 +83,26 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, 
int rb)
 }
 #endif
 
+static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+  unsigned int inst, int *advance)
+{
+   int emulated = EMULATE_DONE;
+
+   switch (get_oc(inst)) {
+   case EHPRIV_OC_DEBUG:
+   run-exit_reason = KVM_EXIT_DEBUG;
+   run-debug.arch.address = vcpu-arch.pc;
+   run-debug.arch.status = 0;
+   kvmppc_account_exit(vcpu, DEBUG_EXITS);
+

[PATCH 1/4 v6] powerpc: export debug registers save function for KVM

2013-07-04 Thread Bharat Bhushan
KVM need this function when switching from vcpu to user-space
thread. My subsequent patch will use this function.

Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
v5-v6
 - switch_booke_debug_regs() not guarded by the compiler switch

 arch/powerpc/include/asm/switch_to.h |1 +
 arch/powerpc/kernel/process.c|3 ++-
 2 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 200d763..db68f1d 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -29,6 +29,7 @@ extern void giveup_vsx(struct task_struct *);
 extern void enable_kernel_spe(void);
 extern void giveup_spe(struct task_struct *);
 extern void load_up_spe(struct task_struct *);
+extern void switch_booke_debug_regs(struct thread_struct *new_thread);
 
 #ifndef CONFIG_SMP
 extern void discard_lazy_cpu_state(void);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 01ff496..da586aa 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -362,12 +362,13 @@ static void prime_debug_regs(struct thread_struct *thread)
  * debug registers, set the debug registers from the values
  * stored in the new thread.
  */
-static void switch_booke_debug_regs(struct thread_struct *new_thread)
+void switch_booke_debug_regs(struct thread_struct *new_thread)
 {
if ((current-thread.debug.dbcr0  DBCR0_IDM)
|| (new_thread-debug.dbcr0  DBCR0_IDM))
prime_debug_regs(new_thread);
 }
+EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
 #else  /* !CONFIG_PPC_ADV_DEBUG_REGS */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
 static void set_debug_reg_defaults(struct thread_struct *thread)
-- 
1.7.0.4


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 3/4 v6] KVM: PPC: Using struct debug_reg

2013-07-04 Thread Bharat Bhushan
For KVM also use the struct debug_reg defined in asm/processor.h

Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
v5-v6
 - no changes

 arch/powerpc/include/asm/kvm_host.h |   13 +
 arch/powerpc/kvm/booke.c|   34 --
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index af326cd..838a577 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -381,17 +381,6 @@ struct kvmppc_slb {
 #define KVMPPC_EPR_USER1 /* exit to userspace to fill EPR */
 #define KVMPPC_EPR_KERNEL  2 /* in-kernel irqchip */
 
-struct kvmppc_booke_debug_reg {
-   u32 dbcr0;
-   u32 dbcr1;
-   u32 dbcr2;
-#ifdef CONFIG_KVM_E500MC
-   u32 dbcr4;
-#endif
-   u64 iac[KVMPPC_BOOKE_MAX_IAC];
-   u64 dac[KVMPPC_BOOKE_MAX_DAC];
-};
-
 #define KVMPPC_IRQ_DEFAULT 0
 #define KVMPPC_IRQ_MPIC1
 #define KVMPPC_IRQ_XICS2
@@ -535,7 +524,7 @@ struct kvm_vcpu_arch {
u32 eptcfg;
u32 epr;
u32 crit_save;
-   struct kvmppc_booke_debug_reg dbg_reg;
+   struct debug_reg dbg_reg;
 #endif
gpa_t paddr_accessed;
gva_t vaddr_accessed;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 4c9f6ad..87aa727 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1424,7 +1424,6 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
int r = 0;
union kvmppc_one_reg val;
int size;
-   long int i;
 
size = one_reg_size(reg-id);
if (size  sizeof(val))
@@ -1432,16 +1431,24 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
 
switch (reg-id) {
case KVM_REG_PPC_IAC1:
+   val = get_reg_val(reg-id, vcpu-arch.dbg_reg.iac1);
+   break;
case KVM_REG_PPC_IAC2:
+   val = get_reg_val(reg-id, vcpu-arch.dbg_reg.iac2);
+   break;
+#if CONFIG_PPC_ADV_DEBUG_IACS  2
case KVM_REG_PPC_IAC3:
+   val = get_reg_val(reg-id, vcpu-arch.dbg_reg.iac3);
+   break;
case KVM_REG_PPC_IAC4:
-   i = reg-id - KVM_REG_PPC_IAC1;
-   val = get_reg_val(reg-id, vcpu-arch.dbg_reg.iac[i]);
+   val = get_reg_val(reg-id, vcpu-arch.dbg_reg.iac4);
break;
+#endif
case KVM_REG_PPC_DAC1:
+   val = get_reg_val(reg-id, vcpu-arch.dbg_reg.dac1);
+   break;
case KVM_REG_PPC_DAC2:
-   i = reg-id - KVM_REG_PPC_DAC1;
-   val = get_reg_val(reg-id, vcpu-arch.dbg_reg.dac[i]);
+   val = get_reg_val(reg-id, vcpu-arch.dbg_reg.dac2);
break;
case KVM_REG_PPC_EPR: {
u32 epr = get_guest_epr(vcpu);
@@ -1481,7 +1488,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
int r = 0;
union kvmppc_one_reg val;
int size;
-   long int i;
 
size = one_reg_size(reg-id);
if (size  sizeof(val))
@@ -1492,16 +1498,24 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 
struct kvm_one_reg *reg)
 
switch (reg-id) {
case KVM_REG_PPC_IAC1:
+   vcpu-arch.dbg_reg.iac1 = set_reg_val(reg-id, val);
+   break;
case KVM_REG_PPC_IAC2:
+   vcpu-arch.dbg_reg.iac2 = set_reg_val(reg-id, val);
+   break;
+#if CONFIG_PPC_ADV_DEBUG_IACS  2
case KVM_REG_PPC_IAC3:
+   vcpu-arch.dbg_reg.iac3 = set_reg_val(reg-id, val);
+   break;
case KVM_REG_PPC_IAC4:
-   i = reg-id - KVM_REG_PPC_IAC1;
-   vcpu-arch.dbg_reg.iac[i] = set_reg_val(reg-id, val);
+   vcpu-arch.dbg_reg.iac4 = set_reg_val(reg-id, val);
break;
+#endif
case KVM_REG_PPC_DAC1:
+   vcpu-arch.dbg_reg.dac1 = set_reg_val(reg-id, val);
+   break;
case KVM_REG_PPC_DAC2:
-   i = reg-id - KVM_REG_PPC_DAC1;
-   vcpu-arch.dbg_reg.dac[i] = set_reg_val(reg-id, val);
+   vcpu-arch.dbg_reg.dac2 = set_reg_val(reg-id, val);
break;
case KVM_REG_PPC_EPR: {
u32 new_epr = set_reg_val(reg-id, val);
-- 
1.7.0.4


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 4/4 v6] KVM: PPC: Add userspace debug stub support

2013-07-04 Thread Bharat Bhushan
This patch adds the debug stub support on booke/bookehv.
Now QEMU debug stub can use hw breakpoint, watchpoint and
software breakpoint to debug guest.

This is how we save/restore debug register context when switching
between guest, userspace and kernel user-process:

When QEMU is running
 - thread-debug_reg == QEMU debug register context.
 - Kernel will handle switching the debug register on context switch.
 - no vcpu_load() called

QEMU makes ioctls (except RUN)
 - This will call vcpu_load()
 - should not change context.
 - Some ioctls can change vcpu debug register, context saved in 
vcpu-debug_regs

QEMU Makes RUN ioctl
 - Save thread-debug_reg on STACK
 - Store thread-debug_reg == vcpu-debug_reg
 - load thread-debug_reg
 - RUN VCPU ( So thread points to vcpu context )

Context switch happens When VCPU running
 - makes vcpu_load() should not load any context
 - kernel loads the vcpu context as thread-debug_regs points to vcpu context.

On heavyweight_exit
 - Load the context saved on stack in thread-debug_reg

Currently we do not support debug resource emulation to guest,
On debug exception, always exit to user space irrespective of
user space is expecting the debug exception or not. If this is
unexpected exception (breakpoint/watchpoint event not set by
userspace) then let us leave the action on user space. This
is similar to what it was before, only thing is that now we
have proper exit state available to user space.

Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
v5-v6
 - no changes

 arch/powerpc/include/asm/kvm_host.h |3 +
 arch/powerpc/include/uapi/asm/kvm.h |1 +
 arch/powerpc/kvm/booke.c|  239 ---
 arch/powerpc/kvm/booke.h|5 +
 4 files changed, 230 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 838a577..aeb490d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -524,7 +524,10 @@ struct kvm_vcpu_arch {
u32 eptcfg;
u32 epr;
u32 crit_save;
+   /* guest debug registers*/
struct debug_reg dbg_reg;
+   /* hardware visible debug registers when in guest state */
+   struct debug_reg shadow_dbg_reg;
 #endif
gpa_t paddr_accessed;
gva_t vaddr_accessed;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index ded0607..f5077c2 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -27,6 +27,7 @@
 #define __KVM_HAVE_PPC_SMT
 #define __KVM_HAVE_IRQCHIP
 #define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_GUEST_DEBUG
 
 struct kvm_regs {
__u64 pc;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 87aa727..7b54802 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -133,6 +133,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
+static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+{
+   /* Synchronize guest's desire to get debug interrupts into shadow MSR */
+#ifndef CONFIG_KVM_BOOKE_HV
+   vcpu-arch.shadow_msr = ~MSR_DE;
+   vcpu-arch.shadow_msr |= vcpu-arch.shared-msr  MSR_DE;
+#endif
+
+   /* Force enable debug interrupts when user space wants to debug */
+   if (vcpu-guest_debug) {
+#ifdef CONFIG_KVM_BOOKE_HV
+   /*
+* Since there is no shadow MSR, sync MSR_DE into the guest
+* visible MSR.
+*/
+   vcpu-arch.shared-msr |= MSR_DE;
+#else
+   vcpu-arch.shadow_msr |= MSR_DE;
+   vcpu-arch.shared-msr = ~MSR_DE;
+#endif
+   }
+}
+
 /*
  * Helper function for full MSR writes.  No need to call this if only
  * EE/CE/ME/DE/RI are changing.
@@ -150,6 +173,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
kvmppc_mmu_msr_notify(vcpu, old_msr);
kvmppc_vcpu_sync_spe(vcpu);
kvmppc_vcpu_sync_fpu(vcpu);
+   kvmppc_vcpu_sync_debug(vcpu);
 }
 
 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
@@ -655,6 +679,7 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
int ret, s;
+   struct thread_struct thread;
 #ifdef CONFIG_PPC_FPU
unsigned int fpscr;
int fpexc_mode;
@@ -698,12 +723,21 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
 
kvmppc_load_guest_fp(vcpu);
 #endif
+   /* Switch to guest debug context */
+   thread.debug = vcpu-arch.shadow_dbg_reg;
+   switch_booke_debug_regs(thread);
+   thread.debug = current-thread.debug;
+   current-thread.debug = vcpu-arch.shadow_dbg_reg;
 
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 
/* No need for kvm_guest_exit. It's done in handle_exit.
   We also get here with interrupts enabled. */
 
+   /* Switch back to user space debug 

[PATCH 0/4 v6] KVM :PPC: Userspace Debug support

2013-07-04 Thread Bharat Bhushan
From: Bharat Bhushan bharat.bhus...@freescale.com

Note: These patches depends on https://lkml.org/lkml/2013/7/4/49.

This patchset adds the userspace debug support for booke/bookehv.
this is tested on powerpc e500v2/e500mc devices.

We are now assuming that debug resource will not be used by kernel for
its own debugging. It will be used for only kernel user process debugging.
So the kernel debug load interface during context_to is used to load
debug conext for that selected process.

v5-v6
 - Earlier it was a patchset of six patches. Now this is devided into
   two parts: 1) powerpc spcific changes 2) kvm specific changes.
   This patchset now contains only KVM specific changes.
 - using ehpriv 1 (earlier using ehpriv) for software breakpoint
 -

v4-v5
 - Some comments reworded and other cleanup (like change of function name etc)
 - Added a function for setting MSRP rather than inline

v3-v4
 - 4 out of 7 patches of initial patchset were applied.
   This patchset is on and above those 4 patches
 - KVM local struct kvmppc_booke_debug_reg is replaced by
   powerpc global struct debug_reg
 - use switch_booke_debug_regs() for debug register context switch.
 - Save DBSR before kernel pre-emption is enabled.
 - Some more cleanup

v2-v3
 - We are now assuming that debug resource will not be used by
   kernel for its own debugging.
   It will be used for only kernel user process debugging.
   So the kernel debug load interface during context_to is
   used to load debug conext for that selected process.

v1-v2
 - Debug registers are save/restore in vcpu_put/vcpu_get.
   Earlier the debug registers are saved/restored in guest entry/exit

Bharat Bhushan (4):
  powerpc: export debug registers save function for KVM
  KVM: PPC: exit to user space on ehpriv 1 instruction
  KVM: PPC: Using struct debug_reg
  KVM: PPC: Add userspace debug stub support

 arch/powerpc/include/asm/disassemble.h |4 +
 arch/powerpc/include/asm/kvm_booke.h   |7 +-
 arch/powerpc/include/asm/kvm_host.h|   16 +--
 arch/powerpc/include/asm/switch_to.h   |1 +
 arch/powerpc/include/uapi/asm/kvm.h|   22 ++-
 arch/powerpc/kernel/process.c  |3 +-
 arch/powerpc/kvm/booke.c   |  275 
 arch/powerpc/kvm/booke.h   |5 +
 arch/powerpc/kvm/e500_emulate.c|   26 +++
 9 files changed, 312 insertions(+), 47 deletions(-)


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH] Emulate sync instruction variants

2013-07-04 Thread David Laight
 Reserved fields of the sync instruction have been used for other
 instructions (e.g. lwsync).  On processors that do not support variants
 of the sync instruction, emulate it by executing a sync to subsume the
 effect of the intended instruction.
...
 + /* Emulate sync instruction variants */
 + if ((instword  PPC_INST_SYNC_MASK) == PPC_INST_SYNC) {
 + PPC_WARN_EMULATED(sync, regs);
 + asm volatile (sync);
 + return 0;
 + }

Do you need to execute 'sync' here?
It is worth checking whether the trap entry/exit doesn't
do an implicit one for you.

David



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] Emulate sync instruction variants

2013-07-04 Thread Benjamin Herrenschmidt
On Thu, 2013-07-04 at 09:31 +0100, David Laight wrote:
 Do you need to execute 'sync' here?
 It is worth checking whether the trap entry/exit doesn't
 do an implicit one for you.

Not really. It does an implicit isync (more than one even) but
not a sync.

Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


help with custom board based on mpc5200b

2013-07-04 Thread Lorenzo Forzini
Hello, we're using a custom board based on mpc5200b

We are using kernel 2.6.33 and when we using ethernet, we get 80% packet loss 
with ping.
Where could i find the solutions?

U-boot information:
CPU:   MPC5200B v2.2, Core v1.4 at 132 MHz
   Bus 132 MHz, IPB 132 MHz, PCI 66 MHz
Board: Ran Controller Board

I2C:   85 kHz, ready

Thanks

Lorenzo
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v2 2/2] perf tools: Make Power7 events available for perf

2013-07-04 Thread Michael Ellerman
On Tue, Jun 25, 2013 at 12:46:42PM -0400, Vince Weaver wrote:
 On Tue, 25 Jun 2013, Runzhen Wang wrote:
 
  This patch makes all the POWER7 events available in sysfs.
  
  ...
 
  $ size arch/powerpc/perf/power7-pmu.o
 textdata bss dec hex filename
 30732720   0579316a1 arch/powerpc/perf/power7-pmu.o
  
  and after the patch is applied, it is:
  
  $ size arch/powerpc/perf/power7-pmu.o
 textdata bss dec hex filename
15950   31112   0   47062b7d6 arch/powerpc/perf/power7-pmu.o
 
 So if I'm reading this right, there's 45k of overhead for just one cpu 
 type?

I think there's another ~56K at runtime too, at least on my system where
each sysfs_dirent is 112 bytes.
 
 What happens if we do this on x86?
 
 If we have similar for p6/p4/core2/nehalem/ivb/snb/amd10h/amd15h/amd16h/knb
 that's 450k of event defintions in the kernel.  And may I remind everyone 
 that you can't compile perf_event support as a module, nor can you 
 unconfigure it on x86 (it's always built in, no option to disable).

To be honest on Power7 systems we're not really bothered about ~100K,
that's less than two pages. But I agree with your point that it's
getting a bit silly.

Various folks have tried over the years to get alternative approaches
adopted (as I'm sure you know), and this has just ended up as the path
of least resistance.

 I'd like to repeat my unpopular position that we just link perf against 
 libpfm4 and keep event tables in userspace where they belong.

I don't think it even needs libpfm4, just some csv files in tools/perf
would do the trick.

Instead we have Google using gooda, which provides event decoding on top
of perf (via libpfm4). Andi Kleen at Intel has a tool that provides
event decoding on top of perf. Presumably Facebook do too? And at IBM
most folks still use oprofile, because it provides event decoding. 

cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 0/8] powerpc: enable the relocatable support for fsl booke 32bit kernel

2013-07-04 Thread Kevin Hao
v2:
These patches are based on the Ben's next branch. In this version we choose
to do a second relocation if the PAGE_OFFSET is not mapped to the memstart_addr
and we also choose to set the tlb1 entries for the kernel space in address
space 1. With this implementation:
  * We can load the kernel at any place between
 memstart_addr ~ memstart_addr + 768M
  * We can reserve any memory between memstart_addr ~ memstart_addr + 768M
for a kdump kernel.

I have done a kdump boot on a p2020rdb kernel with the memory reserved by
'crashkernel=32M@320M'.


v1:
Currently the fsl booke 32bit kernel is using the DYNAMIC_MEMSTART relocation
method. But the RELOCATABLE method is more flexible and has less alignment
restriction. So enable this feature on this platform and use it by
default for the kdump kernel.

These patches have passed the kdump boot test on a p2020rdb board.

Kevin Hao (8):
  powerpc/fsl_booke: protect the access to MAS7 with MMU_FTR_BIG_PHYS
  powerpc/fsl_booke: introduce get_phys_addr function
  powerpc: enable the relocatable support for the fsl booke 32bit kernel
  powerpc/fsl_booke: set the tlb entry for the kernel address in AS1
  memblock: introduce the memblock_reinit function
  powerpc: introduce early_get_first_memblock_info
  powerpc/fsl_booke: make sure PAGE_OFFSET map to memstart_addr for
relocatable kernel
  powerpc/fsl_booke: enable the relocatable for the kdump kernel

 arch/powerpc/Kconfig  |   5 +-
 arch/powerpc/kernel/entry_32.S|   8 +-
 arch/powerpc/kernel/fsl_booke_entry_mapping.S |  14 +-
 arch/powerpc/kernel/head_fsl_booke.S  | 233 --
 arch/powerpc/kernel/prom.c|  24 +++
 arch/powerpc/mm/fsl_booke_mmu.c   |  56 +++
 arch/powerpc/mm/hugetlbpage-book3e.c  |   3 +-
 arch/powerpc/mm/mmu_decl.h|   2 +
 include/linux/memblock.h  |   1 +
 include/linux/of_fdt.h|   1 +
 mm/memblock.c |  33 ++--
 11 files changed, 340 insertions(+), 40 deletions(-)

-- 
1.8.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 1/8] powerpc/fsl_booke: protect the access to MAS7 with MMU_FTR_BIG_PHYS

2013-07-04 Thread Kevin Hao
The e500v1 doesn't implement the MAS7, so we should avoid to access
this register on that implementations. Some code use the
CONFIG_PHYS_64BIT to protect these accesses, but this is not accurate.
In theory we can enable the CONFIG_PHYS_64BIT for a e500v1 board and
the CONFIG_PHYS_64BIT is also enabled by default in mpc85xx_defconfig
which definitely have the support for e500v1 board. The MMU_FTR_BIG_PHYS
should be the right choice.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
A new patch in v2.

 arch/powerpc/kernel/entry_32.S| 8 +---
 arch/powerpc/kernel/fsl_booke_entry_mapping.S | 6 --
 arch/powerpc/kernel/head_fsl_booke.S  | 4 
 arch/powerpc/mm/hugetlbpage-book3e.c  | 3 ++-
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 22b45a4..2ce22c2 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -75,10 +75,10 @@ crit_transfer_to_handler:
stw r0,MAS3(r11)
mfspr   r0,SPRN_MAS6
stw r0,MAS6(r11)
-#ifdef CONFIG_PHYS_64BIT
+BEGIN_MMU_FTR_SECTION
mfspr   r0,SPRN_MAS7
stw r0,MAS7(r11)
-#endif /* CONFIG_PHYS_64BIT */
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
 #endif /* CONFIG_PPC_BOOK3E_MMU */
 #ifdef CONFIG_44x
mfspr   r0,SPRN_MMUCR
@@ -1112,8 +1112,10 @@ exc_exit_restart_end:
 #if defined(CONFIG_PPC_BOOK3E_MMU)
 #ifdef CONFIG_PHYS_64BIT
 #defineRESTORE_MAS7
\
+BEGIN_MMU_FTR_SECTION  \
lwz r11,MAS7(r1);   \
-   mtspr   SPRN_MAS7,r11;
+   mtspr   SPRN_MAS7,r11;  \
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
 #else
 #defineRESTORE_MAS7
 #endif /* CONFIG_PHYS_64BIT */
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S 
b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index a92c79b..2201f84 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -88,9 +88,11 @@ skpinv:  addir6,r6,1 /* 
Increment */
 1: mflrr7
 
mfspr   r8,SPRN_MAS3
-#ifdef CONFIG_PHYS_64BIT
+BEGIN_MMU_FTR_SECTION
mfspr   r23,SPRN_MAS7
-#endif
+MMU_FTR_SECTION_ELSE
+   li  r23,0
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_BIG_PHYS)
and r8,r6,r8
subfic  r9,r6,-4096
and r9,r9,r7
diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index d10a7ca..a04a48d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -82,7 +82,11 @@ _ENTRY(_start);
and r19,r3,r18  /* r19 = page offset */
andcr31,r20,r18 /* r31 = page base */
or  r31,r31,r19 /* r31 = devtree phys addr */
+BEGIN_MMU_FTR_SECTION
mfspr   r30,SPRN_MAS7
+MMU_FTR_SECTION_ELSE
+   li  r30,0
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_BIG_PHYS)
 
li  r25,0   /* phys kernel start (low) */
li  r24,0   /* CPU number */
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c 
b/arch/powerpc/mm/hugetlbpage-book3e.c
index 3bc7006..ac63e7e 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -103,7 +103,8 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, 
unsigned long ea,
if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) {
mtspr(SPRN_MAS7_MAS3, mas7_3);
} else {
-   mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
+   if (mmu_has_feature(MMU_FTR_BIG_PHYS))
+   mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
}
 
-- 
1.8.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 2/8] powerpc/fsl_booke: introduce get_phys_addr function

2013-07-04 Thread Kevin Hao
Move the codes which translate a effective address to physical address
to a separate function. So it can be reused by other code.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
A new patch in v2.

 arch/powerpc/kernel/head_fsl_booke.S | 54 +---
 1 file changed, 32 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index a04a48d..dab091e 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -65,28 +65,9 @@ _ENTRY(_start);
nop
 
/* Translate device tree address to physical, save in r30/r31 */
-   mfmsr   r16
-   mfspr   r17,SPRN_PID
-   rlwinm  r17,r17,16,0x3fff   /* turn PID into MAS6[SPID] */
-   rlwimi  r17,r16,28,0x0001   /* turn MSR[DS] into MAS6[SAS] */
-   mtspr   SPRN_MAS6,r17
-
-   tlbsx   0,r3/* must succeed */
-
-   mfspr   r16,SPRN_MAS1
-   mfspr   r20,SPRN_MAS3
-   rlwinm  r17,r16,25,0x1f /* r17 = log2(page size) */
-   li  r18,1024
-   slw r18,r18,r17 /* r18 = page size */
-   addir18,r18,-1
-   and r19,r3,r18  /* r19 = page offset */
-   andcr31,r20,r18 /* r31 = page base */
-   or  r31,r31,r19 /* r31 = devtree phys addr */
-BEGIN_MMU_FTR_SECTION
-   mfspr   r30,SPRN_MAS7
-MMU_FTR_SECTION_ELSE
-   li  r30,0
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_BIG_PHYS)
+   bl  get_phys_addr
+   mr  r30,r3
+   mr  r31,r4
 
li  r25,0   /* phys kernel start (low) */
li  r24,0   /* CPU number */
@@ -860,6 +841,35 @@ KernelSPE:
 #endif /* CONFIG_SPE */
 
 /*
+ * Translate the effec addr in r3 to phys addr. The phys addr will be put
+ * into r3(higher 32bit) and r4(lower 32bit)
+ */
+get_phys_addr:
+   mfmsr   r8
+   mfspr   r9,SPRN_PID
+   rlwinm  r9,r9,16,0x3fff /* turn PID into MAS6[SPID] */
+   rlwimi  r9,r8,28,0x0001 /* turn MSR[DS] into MAS6[SAS] */
+   mtspr   SPRN_MAS6,r9
+
+   tlbsx   0,r3/* must succeed */
+
+   mfspr   r8,SPRN_MAS1
+   mfspr   r12,SPRN_MAS3
+   rlwinm  r9,r8,25,0x1f   /* r9 = log2(page size) */
+   li  r10,1024
+   slw r10,r10,r9  /* r10 = page size */
+   addir10,r10,-1
+   and r11,r3,r10  /* r11 = page offset */
+   andcr4,r12,r10  /* r4 = page base */
+   or  r4,r4,r11   /* r4 = devtree phys addr */
+BEGIN_MMU_FTR_SECTION
+   mfspr   r3,SPRN_MAS7
+MMU_FTR_SECTION_ELSE
+   li  r3,0
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_BIG_PHYS)
+   blr
+
+/*
  * Global functions
  */
 
-- 
1.8.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 3/8] powerpc: enable the relocatable support for the fsl booke 32bit kernel

2013-07-04 Thread Kevin Hao
This is based on the codes in the head_44x.S. Since we always align to
256M before mapping the PAGE_OFFSET for a relocatable kernel, we also
change the init tlb map to 256M size.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
v2: Move the code to set kernstart_addr and virt_phys_offset to a c function.
So we can expand it easily later.

Hi Scott,

I still use the 256M align for the init tlb as in v1 for the following reasons:
  * This should be the most possible case in reality.
  * This is just for very early booting code and should not be a big issue
if the first tlb entry shrink to a less size later.

 arch/powerpc/Kconfig  |  2 +-
 arch/powerpc/kernel/fsl_booke_entry_mapping.S |  8 +++---
 arch/powerpc/kernel/head_fsl_booke.S  | 37 +++
 arch/powerpc/mm/fsl_booke_mmu.c   | 28 
 4 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5374776..5b2e115 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -859,7 +859,7 @@ config DYNAMIC_MEMSTART
 
 config RELOCATABLE
bool Build a relocatable kernel
-   depends on ADVANCED_OPTIONS  FLATMEM  44x
+   depends on ADVANCED_OPTIONS  FLATMEM  (44x || FSL_BOOKE)
select NONSTATIC_KERNEL
help
  This builds a kernel image that is capable of running at the
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S 
b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index 2201f84..211e507 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -167,10 +167,10 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_BIG_PHYS)
lis r6,0x1000   /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 
*/
mtspr   SPRN_MAS0,r6
lis r6,(MAS1_VALID|MAS1_IPROT)@h
-   ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
+   ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_256M))@l
mtspr   SPRN_MAS1,r6
-   lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@h
-   ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@l
+   lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_256M, M_IF_SMP)@h
+   ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_256M, M_IF_SMP)@l
mtspr   SPRN_MAS2,r6
mtspr   SPRN_MAS3,r8
tlbwe
@@ -178,6 +178,8 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_BIG_PHYS)
 /* 7. Jump to KERNELBASE mapping */
lis r6,(KERNELBASE  ~0xfff)@h
ori r6,r6,(KERNELBASE  ~0xfff)@l
+   rlwinm  r7,r25,0,0x0fff
+   add r6,r7,r6
 
 #elif defined(ENTRY_MAPPING_KEXEC_SETUP)
 /*
diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index dab091e..134064d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -73,6 +73,33 @@ _ENTRY(_start);
li  r24,0   /* CPU number */
li  r23,0   /* phys kernel start (high) */
 
+#ifdef CONFIG_RELOCATABLE
+   bl  0f  /* Get our runtime address */
+0: mflrr3  /* Make it accessible */
+   addis   r3,r3,(_stext - 0b)@ha
+   addir3,r3,(_stext - 0b)@l   /* Get our current runtime base */
+
+   /* Translate _stext address to physical, save in r23/r25 */
+   bl  get_phys_addr
+   mr  r23,r3
+   mr  r25,r4
+
+   /*
+* We have the runtime (virutal) address of our base.
+* We calculate our shift of offset from a 256M page.
+* We could map the 256M page we belong to at PAGE_OFFSET and
+* get going from there.
+*/
+   lis r4,KERNELBASE@h
+   ori r4,r4,KERNELBASE@l
+   rlwinm  r6,r25,0,0xfff  /* r6 = PHYS_START % 256M */
+   rlwinm  r5,r4,0,0xfff   /* r5 = KERNELBASE % 256M */
+   subfr3,r5,r6/* r3 = r6 - r5 */
+   add r3,r4,r3/* Required Virutal Address */
+
+   bl  relocate
+#endif
+
 /* We try to not make any assumptions about how the boot loader
  * setup or used the TLBs.  We invalidate all mappings from the
  * boot loader and load a single entry in TLB1[0] to map the
@@ -182,6 +209,16 @@ _ENTRY(__early_start)
 
bl  early_init
 
+#ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_PHYS_64BIT
+   mr  r3,r23
+   mr  r4,r25
+#else
+   mr  r3,r25
+#endif
+   bl  relocate_init
+#endif
+
 #ifdef CONFIG_DYNAMIC_MEMSTART
lis r3,kernstart_addr@ha
la  r3,kernstart_addr@l(r3)
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 07ba45b..5fe271c 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -241,4 +241,32 @@ void setup_initial_memory_limit(phys_addr_t 
first_memblock_base,
/* 64M mapped 

[PATCH v2 4/8] powerpc/fsl_booke: set the tlb entry for the kernel address in AS1

2013-07-04 Thread Kevin Hao
We use the tlb1 entries to map low mem to the kernel space. In the
current code, it assumes that the first tlb entry would cover the
kernel image. But this is not true for some special cases, such as
when we run a relocatable kernel above the 256M or set
CONFIG_KERNEL_START above 256M. So we choose to switch to address
space 1 before setting these tlb entries.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
A new patch in v2.

 arch/powerpc/kernel/head_fsl_booke.S | 81 
 arch/powerpc/mm/fsl_booke_mmu.c  |  2 +
 arch/powerpc/mm/mmu_decl.h   |  2 +
 3 files changed, 85 insertions(+)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index 134064d..0cbfe95 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -1172,6 +1172,87 @@ __secondary_hold_acknowledge:
 #endif
 
 /*
+ * Create a tbl entry with the same effective and physical address as
+ * the tlb entry used by the current running code. But set the TS to 1.
+ * Then switch to the address space 1. It will return with the r3 set to
+ * the ESEL of the new created tlb.
+ */
+_GLOBAL(switch_to_as1)
+   mflrr5
+
+   /* Find a entry not used */
+   mfspr   r3,SPRN_TLB1CFG
+   andi.   r3,r3,0xfff
+   mfspr   r4,SPRN_PID
+   rlwinm  r4,r4,16,0x3fff /* turn PID into MAS6[SPID] */
+   mtspr   SPRN_MAS6,r4
+1: lis r4,0x1000   /* Set MAS0(TLBSEL) = 1 */
+   addir3,r3,-1
+   rlwimi  r4,r3,16,4,15   /* Setup MAS0 = TLBSEL | ESEL(r3) */
+   mtspr   SPRN_MAS0,r4
+   tlbre
+   mfspr   r4,SPRN_MAS1
+   andis.  r4,r4,MAS1_VALID@h
+   bne 1b
+
+   /* Get the tlb entry used by the current running code */
+   bl  0f
+0: mflrr4
+   tlbsx   0,r4
+
+   mfspr   r4,SPRN_MAS1
+   ori r4,r4,MAS1_TS   /* Set the TS = 1 */
+   mtspr   SPRN_MAS1,r4
+
+   mfspr   r4,SPRN_MAS0
+   rlwinm  r4,r4,0,~MAS0_ESEL_MASK
+   rlwimi  r4,r3,16,4,15   /* Setup MAS0 = TLBSEL | ESEL(r3) */
+   mtspr   SPRN_MAS0,r4
+   tlbwe
+   isync
+   sync
+
+   mfmsr   r4
+   ori r4,r4,MSR_IS | MSR_DS
+   mtspr   SPRN_SRR0,r5
+   mtspr   SPRN_SRR1,r4
+   sync
+   rfi
+
+/*
+ * Restore to the address space 0 and also invalidate the tlb entry created
+ * by switch_to_as1.
+*/
+_GLOBAL(restore_to_as0)
+   mflrr0
+
+   bl  0f
+0: mflrr9
+   addir9,r9,1f - 0b
+
+   mfmsr   r7
+   li  r8,(MSR_IS | MSR_DS)
+   andcr7,r7,r8
+
+   mtspr   SPRN_SRR0,r9
+   mtspr   SPRN_SRR1,r7
+   sync
+   rfi
+
+   /* Invalidate the temporary tlb entry for AS1 */
+1: lis r9,0x1000   /* Set MAS0(TLBSEL) = 1 */
+   rlwimi  r9,r3,16,4,15   /* Setup MAS0 = TLBSEL | ESEL(r3) */
+   mtspr   SPRN_MAS0,r9
+   tlbre
+   mfspr   r9,SPRN_MAS1
+   rlwinm  r9,r9,0,2,31/* Clear MAS1 Valid and IPPROT */
+   mtspr   SPRN_MAS1,r9
+   tlbwe
+   isync
+   mtlrr0
+   blr
+
+/*
  * We put a few things here that have to be page-aligned. This stuff
  * goes at the beginning of the data segment, which is page-aligned.
  */
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 5fe271c..8f60ef8 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -222,7 +222,9 @@ void __init adjust_total_lowmem(void)
/* adjust lowmem size to __max_low_memory */
ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
 
+   i = switch_to_as1();
__max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM);
+   restore_to_as0(i);
 
pr_info(Memory CAM mapping: );
for (i = 0; i  tlbcam_index - 1; i++)
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 83eb5d5..3a65644 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -148,6 +148,8 @@ extern unsigned long calc_cam_sz(unsigned long ram, 
unsigned long virt,
 extern void MMU_init_hw(void);
 extern unsigned long mmu_mapin_ram(unsigned long top);
 extern void adjust_total_lowmem(void);
+extern int switch_to_as1(void);
+extern void restore_to_as0(int);
 #endif
 extern void loadcam_entry(unsigned int index);
 
-- 
1.8.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 5/8] memblock: introduce the memblock_reinit function

2013-07-04 Thread Kevin Hao
In the current code, the data used by memblock are initialized
statically. But in some special cases we may scan the memory twice.
So we should have a way to reinitialize these data before the second
time.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
A new patch in v2.

 include/linux/memblock.h |  1 +
 mm/memblock.c| 33 +++--
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index f388203..9d55311 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -58,6 +58,7 @@ int memblock_remove(phys_addr_t base, phys_addr_t size);
 int memblock_free(phys_addr_t base, phys_addr_t size);
 int memblock_reserve(phys_addr_t base, phys_addr_t size);
 void memblock_trim_memory(phys_addr_t align);
+void memblock_reinit(void);
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
diff --git a/mm/memblock.c b/mm/memblock.c
index c5fad93..9406ce6 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -23,23 +23,36 @@
 static struct memblock_region 
memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 static struct memblock_region 
memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 
-struct memblock memblock __initdata_memblock = {
-   .memory.regions = memblock_memory_init_regions,
-   .memory.cnt = 1,/* empty dummy entry */
-   .memory.max = INIT_MEMBLOCK_REGIONS,
-
-   .reserved.regions   = memblock_reserved_init_regions,
-   .reserved.cnt   = 1,/* empty dummy entry */
-   .reserved.max   = INIT_MEMBLOCK_REGIONS,
+#define INIT_MEMBLOCK {
\
+   .memory.regions = memblock_memory_init_regions, \
+   .memory.cnt = 1,/* empty dummy entry */ \
+   .memory.max = INIT_MEMBLOCK_REGIONS,\
+   \
+   .reserved.regions   = memblock_reserved_init_regions,   \
+   .reserved.cnt   = 1,/* empty dummy entry */ \
+   .reserved.max   = INIT_MEMBLOCK_REGIONS,\
+   \
+   .current_limit  = MEMBLOCK_ALLOC_ANYWHERE,  \
+}
 
-   .current_limit  = MEMBLOCK_ALLOC_ANYWHERE,
-};
+struct memblock memblock __initdata_memblock = INIT_MEMBLOCK;
 
 int memblock_debug __initdata_memblock;
 static int memblock_can_resize __initdata_memblock;
 static int memblock_memory_in_slab __initdata_memblock = 0;
 static int memblock_reserved_in_slab __initdata_memblock = 0;
 
+void __init memblock_reinit(void)
+{
+   memset(memblock_memory_init_regions, 0,
+   sizeof(memblock_memory_init_regions));
+   memset(memblock_reserved_init_regions, 0,
+   sizeof(memblock_reserved_init_regions));
+
+   memset(memblock, 0, sizeof(memblock));
+   memblock = (struct memblock) INIT_MEMBLOCK;
+}
+
 /* inline so we don't get a warning when pr_debug is compiled out */
 static __init_memblock const char *
 memblock_type_name(struct memblock_type *type)
-- 
1.8.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 7/8] powerpc/fsl_booke: make sure PAGE_OFFSET map to memstart_addr for relocatable kernel

2013-07-04 Thread Kevin Hao
This is always true for a non-relocatable kernel. Otherwise the kernel
would get stuck. But for a relocatable kernel, it seems a little
complicated. When booting a relocatable kernel, we just align the
kernel start addr to 256M and map the PAGE_OFFSET from there. The
relocation will base on this virtual address. But if this address
is not the same as the memstart_addr, we will have to change the
map of PAGE_OFFSET to the real memstart_addr and do another relocation
again.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
A new patch in v2.

 arch/powerpc/kernel/head_fsl_booke.S | 75 +---
 arch/powerpc/mm/fsl_booke_mmu.c  | 68 ++--
 arch/powerpc/mm/mmu_decl.h   |  2 +-
 3 files changed, 118 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index 0cbfe95..00cfb7e 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -84,6 +84,39 @@ _ENTRY(_start);
mr  r23,r3
mr  r25,r4
 
+   bl  0f
+0: mflrr8
+   addis   r3,r8,(is_second_reloc - 0b)@ha
+   lwz r19,(is_second_reloc - 0b)@l(r3)
+
+   /* Check if this is the second relocation. */
+   cmpwi   r19,1
+   bne 1f
+
+   /*
+* For the second relocation, we already get the real memstart_addr
+* from device tree. So we will map PAGE_OFFSET to memstart_addr,
+* then the virtual address of start kernel should be:
+*  PAGE_OFFSET + (kernstart_addr - memstart_addr)
+* Since the offset between kernstart_addr and memstart_addr should
+* never be beyond 1G, so we can just use the lower 32bit of them
+* for the calculation.
+*/
+   lis r3,PAGE_OFFSET@h
+
+   addis   r4,r8,(kernstart_addr - 0b)@ha
+   addir4,r4,(kernstart_addr - 0b)@l
+   lwz r5,4(r4)
+
+   addis   r6,r8,(memstart_addr - 0b)@ha
+   addir6,r6,(memstart_addr - 0b)@l
+   lwz r7,4(r6)
+
+   subfr5,r7,r5
+   add r3,r3,r5
+   b   2f
+
+1:
/*
 * We have the runtime (virutal) address of our base.
 * We calculate our shift of offset from a 256M page.
@@ -97,7 +130,7 @@ _ENTRY(_start);
subfr3,r5,r6/* r3 = r6 - r5 */
add r3,r4,r3/* Required Virutal Address */
 
-   bl  relocate
+2: bl  relocate
 #endif
 
 /* We try to not make any assumptions about how the boot loader
@@ -121,10 +154,19 @@ _ENTRY(_start);
 
 _ENTRY(__early_start)
 
+#ifdef CONFIG_RELOCATABLE
+   /*
+* For the second relocation, we already set the right tlb entries
+* for the kernel space, so skip the code in fsl_booke_entry_mapping.S
+   */
+   cmpwi   r19,1
+   beq set_ivor
+#endif
 #define ENTRY_MAPPING_BOOT_SETUP
 #include fsl_booke_entry_mapping.S
 #undef ENTRY_MAPPING_BOOT_SETUP
 
+set_ivor:
/* Establish the interrupt vector offsets */
SET_IVOR(0,  CriticalInput);
SET_IVOR(1,  MachineCheck);
@@ -210,11 +252,13 @@ _ENTRY(__early_start)
bl  early_init
 
 #ifdef CONFIG_RELOCATABLE
+   mr  r3,r30
+   mr  r4,r31
 #ifdef CONFIG_PHYS_64BIT
-   mr  r3,r23
-   mr  r4,r25
+   mr  r5,r23
+   mr  r6,r25
 #else
-   mr  r3,r25
+   mr  r5,r25
 #endif
bl  relocate_init
 #endif
@@ -1222,6 +1266,9 @@ _GLOBAL(switch_to_as1)
 /*
  * Restore to the address space 0 and also invalidate the tlb entry created
  * by switch_to_as1.
+ * r3 - the tlb entry which should be invalidated
+ * r4 - __pa(PAGE_OFFSET in AS0) - pa(PAGE_OFFSET in AS1)
+ * r5 - device tree virtual address
 */
 _GLOBAL(restore_to_as0)
mflrr0
@@ -1230,7 +1277,15 @@ _GLOBAL(restore_to_as0)
 0: mflrr9
addir9,r9,1f - 0b
 
-   mfmsr   r7
+   /*
+* We may map the PAGE_OFFSET in AS0 to a different physical address,
+* so we need calculate the right jump and device tree address based
+* on the offset passed by r4.
+   */
+   subfr9,r4,r9
+   subfr5,r4,r5
+
+2: mfmsr   r7
li  r8,(MSR_IS | MSR_DS)
andcr7,r7,r8
 
@@ -1249,9 +1304,19 @@ _GLOBAL(restore_to_as0)
mtspr   SPRN_MAS1,r9
tlbwe
isync
+
+   cmpwi   r4,0
+   bne 3f
mtlrr0
blr
 
+   /*
+* The PAGE_OFFSET will map to a different physical address,
+* jump to _start to do another relocation again.
+   */
+3: mr  r3,r5
+   bl  _start
+
 /*
  * We put a few things here that have to be page-aligned. This stuff
  * goes at the beginning of the data segment, which is page-aligned.
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 8f60ef8..dd283fd 100644
--- 

[PATCH v2 6/8] powerpc: introduce early_get_first_memblock_info

2013-07-04 Thread Kevin Hao
For a relocatable kernel since it can be loaded at any place, there
is no any relation between the kernel start addr and the memstart_addr.
So we can't calculate the memstart_addr from kernel start addr. And
also we can't wait to do the relocation after we get the real
memstart_addr from device tree because it is so late. So introduce
a new function we can use to get the first memblock address and size
in a very early stage (before machine_init).

Signed-off-by: Kevin Hao haoke...@gmail.com
---
A new patch in v2.

 arch/powerpc/kernel/prom.c | 24 
 include/linux/of_fdt.h |  1 +
 2 files changed, 25 insertions(+)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index eb23ac9..9a69d2d 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -753,6 +753,30 @@ void __init early_init_devtree(void *params)
DBG( - early_init_devtree()\n);
 }
 
+#ifdef CONFIG_RELOCATABLE
+/*
+ * This function run before early_init_devtree, so we have to init
+ * initial_boot_params. Since early_init_dt_scan_memory_ppc will be
+ * executed again in early_init_devtree, we have to reinitialize the
+ * memblock data before return.
+ */
+void __init early_get_first_memblock_info(void *params, phys_addr_t *size)
+{
+   /* Setup flat device-tree pointer */
+   initial_boot_params = params;
+
+   /* Scan memory nodes and rebuild MEMBLOCKs */
+   of_scan_flat_dt(early_init_dt_scan_root, NULL);
+   of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+
+   if (size)
+   *size = first_memblock_size;
+
+   /* Undo what early_init_dt_scan_memory_ppc does to memblock */
+   memblock_reinit();
+}
+#endif
+
 /***
  *
  * New implementation of the OF find APIs, return a refcounted
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index ed136ad..befe744 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -117,6 +117,7 @@ extern int early_init_dt_scan_root(unsigned long node, 
const char *uname,
 /* Other Prototypes */
 extern void unflatten_device_tree(void);
 extern void early_init_devtree(void *);
+extern void early_get_first_memblock_info(void *, phys_addr_t *);
 #else /* CONFIG_OF_FLATTREE */
 static inline void unflatten_device_tree(void) {}
 #endif /* CONFIG_OF_FLATTREE */
-- 
1.8.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 8/8] powerpc/fsl_booke: enable the relocatable for the kdump kernel

2013-07-04 Thread Kevin Hao
The RELOCATABLE is more flexible and without any alignment restriction.
And it is a superset of DYNAMIC_MEMSTART. So use it by default for
a kdump kernel.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
A new patch in v2.

 arch/powerpc/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5b2e115..885bf06 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -375,8 +375,7 @@ config KEXEC
 config CRASH_DUMP
bool Build a kdump crash kernel
depends on PPC64 || 6xx || FSL_BOOKE || (44x  !SMP)
-   select RELOCATABLE if PPC64 || 44x
-   select DYNAMIC_MEMSTART if FSL_BOOKE
+   select RELOCATABLE if PPC64 || 44x || FSL_BOOKE
help
  Build a kernel suitable for use as a kdump capture kernel.
  The same kernel binary can be used as production kernel and dump
-- 
1.8.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v2 2/2] perf tools: Make Power7 events available for perf

2013-07-04 Thread Peter Zijlstra
On Thu, Jul 04, 2013 at 10:52:18PM +1000, Michael Ellerman wrote:
 I don't think it even needs libpfm4, just some csv files in tools/perf
 would do the trick.

Right; I think Stephane and Jiri are in favour of creating a 'new' project that
includes just the event definitions in a plain text format and a little library
with parser to be used by all interested parties.

Its just not something that's moving along at any pace at all atm :/
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] of: Fix address decoding on Bimini and js2x machines

2013-07-04 Thread Grant Likely
On Wed, 3 Jul 2013 15:37:56 +0100, Grant Likely grant.lik...@linaro.org wrote:
 On Wed, Jul 3, 2013 at 3:10 PM, Rob Herring robherri...@gmail.com wrote:
  On 07/03/2013 01:01 AM, Benjamin Herrenschmidt wrote:
   Commit:
 
e38c0a1fbc5803cbacdaac0557c70ac8ca5152e7
of/address: Handle #address-cells  2 specially
 
  broke real time clock access on Bimini, js2x, and similar powerpc
  machines using the maple platform. That code was indirectly relying
  on the old (broken) behaviour of the translation for the hypertransport
  to ISA bridge.
 
  This fixes it by treating hypertransport as a PCI bus
 
  Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
  CC: sta...@vger.kernel.org [v3.6+]
  ---
 
  Rob, if you have no objection I will put that in powerpc -next
 
  NP.
 
  Acked-by: Rob Herring rob.herr...@calxeda.com
 
 I'll include this in my 3.11 pull request for Linus

Oops. Ben, I misread what you wrote. It would have been just fine to
include it in your powerpc -next branch. Sorry for the confusion.

Anyway, I saw your powerpc pull req and that this patch wasn't in it, so
I've picked it up and will send it to Linus as soon as the test build
completes.

g.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 8/8] powerpc/perf: Add power8 EBB support

2013-07-04 Thread Adhemerval Zanella
Hi Michael,

I believe you forgot to add the cpu_user_features2 bit to announce the EBB 
support
for P8, patch following:

Signed-off-by: Adhemerval Zanella azane...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/cputable.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 2a45d0f..5f0c80a 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -105,6 +105,7 @@ extern void __restore_cpu_e6500(void);
 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
 #define COMMON_USER2_POWER8(PPC_FEATURE2_ARCH_2_07 | \
 PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \
+PPC_FEATURE2_EBB | \
 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR)
 #define COMMON_USER_PA6T   (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
 PPC_FEATURE_TRUE_LE | \


On 28-06-2013 01:15, Anshuman Khandual wrote:
 On 06/27/2013 05:22 PM, Michael Ellerman wrote:
 On Wed, 2013-06-26 at 15:28 +0530, Anshuman Khandual wrote:
 @@ -117,6 +117,7 @@
 (EVENT_UNIT_MASK   EVENT_UNIT_SHIFT) |   \
 (EVENT_COMBINE_MASKEVENT_COMBINE_SHIFT)  |   \
 (EVENT_MARKED_MASK EVENT_MARKED_SHIFT)   |   \
 +   (1ull  EVENT_CONFIG_EBB_SHIFT)   |   \
 We should define this macro like EVENT_MARKED_MASK

 #define EVENT_EBB_MASK   0x1

 Numeric value of 1ull stands out odd in the scheme.
 Yeah I guess.

 Would you like it in blue? :)

 :) No, I meant probably a macro definition would be cool.

 ___
 Linuxppc-dev mailing list
 Linuxppc-dev@lists.ozlabs.org
 https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc/math-emu: fix load/store indexed emulation

2013-07-04 Thread James Yang
Load/store indexed instructions where the index register RA=R0, such
as lfdx f1,0,r3, are not illegal.

Load/store indexed with update instructions where the index register
RA=R0, such as lfdux f1,0,r3, are invalid, and, to be consistent
with existing math-emu behavior for other invalid instruction forms,
will signal as illegal.
---
 arch/powerpc/math-emu/math.c |   15 +--
 1 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c
index 164d559..eabce90 100644
--- a/arch/powerpc/math-emu/math.c
+++ b/arch/powerpc/math-emu/math.c
@@ -410,21 +410,16 @@ do_mathemu(struct pt_regs *regs)
case XE:
idx = (insn  16)  0x1f;
op0 = (void *)current-thread.TS_FPR((insn  21)  0x1f);
-   if (!idx) {
-   if (((insn  1)  0x3ff) == STFIWX)
-   op1 = (void *)(regs-gpr[(insn  11)  0x1f]);
-   else
-   goto illegal;
-   } else {
-   op1 = (void *)(regs-gpr[idx] + regs-gpr[(insn  11) 
 0x1f]);
-   }
-
+   op1 = (void *)((idx ? regs-gpr[idx] : 0)
+   + regs-gpr[(insn  11)  0x1f]);
break;
 
case XEU:
idx = (insn  16)  0x1f;
+   if (!idx)
+   goto illegal;
op0 = (void *)current-thread.TS_FPR((insn  21)  0x1f);
-   op1 = (void *)((idx ? regs-gpr[idx] : 0)
+   op1 = (void *)(regs-gpr[idx]
+ regs-gpr[(insn  11)  0x1f]);
break;
 
-- 
1.7.0.4


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2] powerpc/math-emu: fix load/store indexed emulation

2013-07-04 Thread James Yang
Load/store indexed instructions where the index register RA=R0, such
as lfdx f1,0,r3, are not illegal.

Load/store indexed with update instructions where the index register
RA=R0, such as lfdux f1,0,r3, are invalid, and, to be consistent
with existing math-emu behavior for other invalid instruction forms,
will signal as illegal.

Signed-off-by: James Yang james.y...@freescale.com
---
 arch/powerpc/math-emu/math.c |   15 +--
 1 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c
index 164d559..eabce90 100644
--- a/arch/powerpc/math-emu/math.c
+++ b/arch/powerpc/math-emu/math.c
@@ -410,21 +410,16 @@ do_mathemu(struct pt_regs *regs)
case XE:
idx = (insn  16)  0x1f;
op0 = (void *)current-thread.TS_FPR((insn  21)  0x1f);
-   if (!idx) {
-   if (((insn  1)  0x3ff) == STFIWX)
-   op1 = (void *)(regs-gpr[(insn  11)  0x1f]);
-   else
-   goto illegal;
-   } else {
-   op1 = (void *)(regs-gpr[idx] + regs-gpr[(insn  11) 
 0x1f]);
-   }
-
+   op1 = (void *)((idx ? regs-gpr[idx] : 0)
+   + regs-gpr[(insn  11)  0x1f]);
break;
 
case XEU:
idx = (insn  16)  0x1f;
+   if (!idx)
+   goto illegal;
op0 = (void *)current-thread.TS_FPR((insn  21)  0x1f);
-   op1 = (void *)((idx ? regs-gpr[idx] : 0)
+   op1 = (void *)(regs-gpr[idx]
+ regs-gpr[(insn  11)  0x1f]);
break;
 
-- 
1.7.0.4


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] of: Fix address decoding on Bimini and js2x machines

2013-07-04 Thread Benjamin Herrenschmidt
On Thu, 2013-07-04 at 17:18 +0100, Grant Likely wrote:
  I'll include this in my 3.11 pull request for Linus
 
 Oops. Ben, I misread what you wrote. It would have been just fine to
 include it in your powerpc -next branch. Sorry for the confusion.
 
 Anyway, I saw your powerpc pull req and that this patch wasn't in it, so
 I've picked it up and will send it to Linus as soon as the test build
 completes.

Yeah, I was about to add it to a subsequent fixes branch but since you
picked it up I'll just leave it.

Thanks !

Cheers,
Ben.
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] Emulate sync instruction variants

2013-07-04 Thread James Yang
On Thu, 4 Jul 2013, Benjamin Herrenschmidt wrote:

 On Thu, 2013-07-04 at 09:31 +0100, David Laight wrote:
  Do you need to execute 'sync' here?
  It is worth checking whether the trap entry/exit doesn't
  do an implicit one for you.
 
 Not really. It does an implicit isync (more than one even) but not a 
 sync.

The execution of a sync is required because the original sync variant 
instruction is not actually executed, and, per Ben, no other explicit 
sync exists in the exception handler.  If the ISA extends the sync 
instruction using its reserved bits, the intent would also be that 
executing a heavyweight sync is suitably compatible, otherwise they 
should have modified some other synchronization instruction.  This 
patch ensures at least one sync instruction is executed for each sync 
variant instruction executed that triggers a program exception.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH V2 2/2] powerpc/85xx: add the P1020RDB-PD DTS support

2013-07-04 Thread Haijun Zhang
Overview of P1020RDB-PD device:
- DDR3 2GB
- NOR flash 64MB
- NAND flash 128MB
- SPI flash 16MB
- I2C EEPROM 256Kb
- eTSEC1 (RGMII PHY) connected to VSC7385 L2 switch
- eTSEC2 (SGMII PHY)
- eTSEC3 (RGMII PHY)
- SDHC
- 2 USB ports
- 4 TDM ports
- PCIe

Signed-off-by: Haijun Zhang haijun.zh...@freescale.com
Signed-off-by: Jerry Huang chang-ming.hu...@freescale.com
CC: Scott Wood scottw...@freescale.com
---
changes for v2:
- Remove address cells and size cells for pc/pd board

 arch/powerpc/boot/dts/p1020rdb-pc.dtsi|   4 +-
 arch/powerpc/boot/dts/p1020rdb-pd.dtsi| 253 ++
 arch/powerpc/boot/dts/p1020rdb-pd_32b.dts |  90 +++
 3 files changed, 344 insertions(+), 3 deletions(-)
 create mode 100644 arch/powerpc/boot/dts/p1020rdb-pd.dtsi
 create mode 100644 arch/powerpc/boot/dts/p1020rdb-pd_32b.dts

diff --git a/arch/powerpc/boot/dts/p1020rdb-pc.dtsi 
b/arch/powerpc/boot/dts/p1020rdb-pc.dtsi
index c952cd3..9d24501 100644
--- a/arch/powerpc/boot/dts/p1020rdb-pc.dtsi
+++ b/arch/powerpc/boot/dts/p1020rdb-pc.dtsi
@@ -131,9 +131,7 @@
};
 
cpld@3,0 {
-   #address-cells = 1;
-   #size-cells = 1;
-   compatible = cpld;
+   compatible = fsl, p1020rdb-cpld;
reg = 0x3 0x0 0x2;
read-only;
};
diff --git a/arch/powerpc/boot/dts/p1020rdb-pd.dtsi 
b/arch/powerpc/boot/dts/p1020rdb-pd.dtsi
new file mode 100644
index 000..03b308d
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020rdb-pd.dtsi
@@ -0,0 +1,253 @@
+/*
+ * P1020RDB-PD Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ *   names of its contributors may be used to endorse or promote products
+ *   derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License (GPL) as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 
THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+lbc {
+   nor@0,0 {
+   #address-cells = 1;
+   #size-cells = 1;
+   compatible = cfi-flash;
+   reg = 0x0 0x0 0x400;
+   bank-width = 2;
+   device-width = 1;
+
+   partition@0 {
+   /* 128KB for DTB Image */
+   reg = 0x0 0x0002;
+   label = NOR DTB Image;
+   };
+
+   partition@2 {
+   /* 3.875 MB for Linux Kernel Image */
+   reg = 0x0002 0x003e;
+   label = NOR Linux Kernel Image;
+   };
+
+   partition@40 {
+   /* 58MB for Root file System */
+   reg = 0x0040 0x03a0;
+   label = NOR Root File System;
+   };
+
+   partition@3e0 {
+   /* This location must not be altered  */
+   /* 1M for Vitesse 7385 Switch firmware */
+   reg = 0x3e0 0x0010;
+   label = NOR Vitesse-7385 Firmware;
+   read-only;
+   };
+
+   partition@3f0 {
+   /* This location must not be altered  */
+   /* 512KB for u-boot Bootloader Image */
+   /* 512KB for u-boot Environment Variables */
+   reg = 0x03f0 0x0010;
+ 

Re: [PATCH 8/8] powerpc/perf: Add power8 EBB support

2013-07-04 Thread Michael Ellerman
On Thu, Jul 04, 2013 at 03:58:01PM -0300, Adhemerval Zanella wrote:
 Hi Michael,
 
 I believe you forgot to add the cpu_user_features2 bit to announce the EBB 
 support
 for P8, patch following:

Hi Adhemerval,

You're right, I haven't added it. I was wondering how best to do it.

It's possible to configure the kernel so that it doesn't have PMU
support, and in that case EBB is unsupported. It's also possible that something
goes wrong with the PMU registration (kernel bug or OOM), and again EBB is then
unsupported.

So I think it might be better if we add PPC_FEATURE2_EBB at runtime in
init_power8_pmu().

What do you think?

Something like:

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index c7f8ccc..fd9ed89 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -620,10 +682,19 @@ static struct power_pmu power8_pmu = {
 
 static int __init init_power8_pmu(void)
 {
+   int rc;
+
if (!cur_cpu_spec-oprofile_cpu_type ||
strcmp(cur_cpu_spec-oprofile_cpu_type, ppc64/power8))
return -ENODEV;
 
-   return register_power_pmu(power8_pmu);
+   rc = register_power_pmu(power8_pmu);
+   if (rc)
+   return rc;
+
+   /* Tell userspace that EBB is supported */
+   cur_cpu_spec-cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+   return 0;
 }
 early_initcall(init_power8_pmu);


cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v1 0/8] EEH Followup Fixes (II)

2013-07-04 Thread Gavin Shan
The series of patches bases on linux-poerpc-next and intends to resolve
the following problems:
 
- On pSeries platform, the EEH doesn't work after PHB hotplug
  with drmgr. The root cause is that the EEH resources (
  EEH devices, EEH caches) aren't released correctly. For the
  problem, we add one hook (pcibios_stop_dev), which is called
  on pci_stop_and_remove_device(). In pcibios_stop_dev(), we
  release the EEH resources.
- Another issue is that we need put the domain (PE or PHB) into
  quite state while doing reset on that domain. However, some
  deivces in the domain might not have EEH sensitive drivers, or
  even don't have driver. Those deivces can't be put into quite
  state and possibly keep issuing PCI-CFG or MMIO request during
  resetting the domain. That possibly causes the failure of reset
  and eventually failure of EEH recovery. For the issue, we introduces
  so-called partial hotplug. That means, those devices without driver 
or
  without EEH sensitive driver are removed before doing reset, and
  plugged (probed) into the system after reset.
- We need traverse EEH devices of one specific PE with safe variant
  of list tranverse function. The EEH device might be removed while
  doing iteration.
- When doing plug for PCI bus, we need check if we need reassign the
  resources for subordinate devices (PCI_REASSIGN_ALL_RSRC) and do that
  accordingly.

The patchset is verified on pSeires and PowerNV platforms:

pSeries Platform
-

drmgr -c phb -r -s PHB 513
drmgr -c phb -a -s PHB 513
errinjct eeh -f 1 -s net/eth2

PowerNV Platform
-

cd 
/sys/devices/pci0005:00/0005:00:00.0/0005:01:00.0/0005:02:08.0/0005:80:00.0/0005:90:01.0
while true; do od -x config  /dev/null; sleep 1; done
echo 1  /sys/kernel/debug/powerpc/PCI0005/err_injct

---

arch/powerpc/include/asm/eeh.h|   24 +--
arch/powerpc/include/asm/pci-bridge.h |3 +-
arch/powerpc/include/asm/pci.h|2 +
arch/powerpc/kernel/eeh.c |   56 ++-
arch/powerpc/kernel/eeh_driver.c  |  106 ++-
arch/powerpc/kernel/eeh_pe.c  |   43 ++-
arch/powerpc/kernel/pci-common.c  |8 ++-
arch/powerpc/kernel/pci-hotplug.c |  129 +++--
arch/powerpc/kernel/pci_of_scan.c |   43 ---
drivers/pci/hotplug/rpadlpar_core.c   |1 -
drivers/pci/probe.c   |4 +
drivers/pci/remove.c  |2 +
include/linux/pci.h   |1 +
13 files changed, 322 insertions(+), 100 deletions(-)

Thanks,
Gavin

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 3/8] powerpc/pci: Override pcibios_stop_dev()

2013-07-04 Thread Gavin Shan
The patch overrides the weak function pcibios_stop_dev() to destroy
the EEH device and cache while stopping and removing the corresponding
PCI device.

Signed-off-by: Gavin Shan sha...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/pci-hotplug.c |   13 -
 1 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index 3f60880..617d2df 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -22,6 +22,18 @@
 #include asm/eeh.h
 
 /**
+ * pcibios_stop_dev - stop the PCI device
+ * @dev: the indicated PCI device
+ *
+ * Stop the PCI device. The function should be called before
+ * stopping the specified PCI device.
+ */
+void pcibios_stop_dev(struct pci_dev *dev)
+{
+   eeh_remove_device(dev, 1);
+}
+
+/**
  * __pcibios_remove_pci_devices - remove all devices under this bus
  * @bus: the indicated PCI bus
  * @purge_pe: destroy the PE on removal of PCI devices
@@ -45,7 +57,6 @@ void __pcibios_remove_pci_devices(struct pci_bus *bus, int 
purge_pe)
 pci_domain_nr(bus),  bus-number);
list_for_each_entry_safe(dev, tmp, bus-devices, bus_list) {
pr_debug( * Removing %s...\n, pci_name(dev));
-   eeh_remove_bus_device(dev, purge_pe);
pci_stop_and_remove_bus_device(dev);
}
 }
-- 
1.7.5.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 2/8] powerpc/eeh: Export functions for hotplug

2013-07-04 Thread Gavin Shan
Make some functions public in order to support hotplug on either specific
PCI bus or PCI device in future.

Signed-off-by: Gavin Shan sha...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/eeh.h |9 +
 arch/powerpc/kernel/eeh.c  |6 +++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 09a8743..d9d35c2 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -209,9 +209,12 @@ unsigned long eeh_check_failure(const volatile void 
__iomem *token,
unsigned long val);
 int eeh_dev_check_failure(struct eeh_dev *edev);
 void eeh_addr_cache_build(void);
+void eeh_add_device_early(struct device_node *);
 void eeh_add_device_tree_early(struct device_node *);
+void eeh_add_device_late(struct pci_dev *);
 void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_add_sysfs_files(struct pci_bus *);
+void eeh_remove_device(struct pci_dev *, int);
 void eeh_remove_bus_device(struct pci_dev *, int);
 
 /**
@@ -252,12 +255,18 @@ static inline unsigned long eeh_check_failure(const 
volatile void __iomem *token
 
 static inline void eeh_addr_cache_build(void) { }
 
+static inline void eeh_add_device_early(struct device_node *dn) { }
+
 static inline void eeh_add_device_tree_early(struct device_node *dn) { }
 
+static inline void eeh_add_device_late(struct pci_dev *dev) { }
+
 static inline void eeh_add_device_tree_late(struct pci_bus *bus) { }
 
 static inline void eeh_add_sysfs_files(struct pci_bus *bus) { }
 
+static inline void eeh_remove_device(struct pci_dev *dev, int purge_pe) { }
+
 static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { }
 
 #define EEH_POSSIBLE_ERROR(val, type) (0)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 39954fe..a186de8 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -838,7 +838,7 @@ core_initcall_sync(eeh_init);
  * on the CEC architecture, type of the device, on earlier boot
  * command-line arguments  etc.
  */
-static void eeh_add_device_early(struct device_node *dn)
+void eeh_add_device_early(struct device_node *dn)
 {
struct pci_controller *phb;
 
@@ -886,7 +886,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
  * This routine must be used to complete EEH initialization for PCI
  * devices that were added after system boot (e.g. hotplug, dlpar).
  */
-static void eeh_add_device_late(struct pci_dev *dev)
+void eeh_add_device_late(struct pci_dev *dev)
 {
struct device_node *dn;
struct eeh_dev *edev;
@@ -975,7 +975,7 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
  * this device will no longer be detected after this call; thus,
  * i/o errors affecting this slot may leave this device unusable.
  */
-static void eeh_remove_device(struct pci_dev *dev, int purge_pe)
+void eeh_remove_device(struct pci_dev *dev, int purge_pe)
 {
struct eeh_dev *edev;
 
-- 
1.7.5.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 6/8] powerpc/eeh: Tranverse EEH devices with safe mode

2013-07-04 Thread Gavin Shan
Currently, we're transversing EEH devices by list_for_each_entry().
That's not safe enough because the EEH devices might be removed from
its parent PE while doing iteration. The patch replaces that with
list_for_each_entry_safe().

Signed-off-by: Gavin Shan sha...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/eeh.h |4 ++--
 arch/powerpc/kernel/eeh.c  |4 ++--
 arch/powerpc/kernel/eeh_pe.c   |   10 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 2ce22d7..e8c411b 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -74,8 +74,8 @@ struct eeh_pe {
struct list_head child; /* Child PEs*/
 };
 
-#define eeh_pe_for_each_dev(pe, edev) \
-   list_for_each_entry(edev, pe-edevs, list)
+#define eeh_pe_for_each_dev(pe, edev, tmp) \
+   list_for_each_entry_safe(edev, tmp, pe-edevs, list)
 
 /*
  * The struct is used to trace EEH state for the associated
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index b074b2a..b518c49 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -231,7 +231,7 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, 
char * buf, size_t len)
 void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 {
size_t loglen = 0;
-   struct eeh_dev *edev;
+   struct eeh_dev *edev, *tmp;
bool valid_cfg_log = true;
 
/*
@@ -251,7 +251,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
eeh_pe_restore_bars(pe);
 
pci_regs_buf[0] = 0;
-   eeh_pe_for_each_dev(pe, edev) {
+   eeh_pe_for_each_dev(pe, edev, tmp) {
loglen += eeh_gather_pci_data(edev, pci_regs_buf + 
loglen,
  EEH_PCI_REGS_LOG_LEN - 
loglen);
}
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 32ef409..c8b815e 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -176,7 +176,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
eeh_traverse_func fn, void *flag)
 {
struct eeh_pe *pe;
-   struct eeh_dev *edev;
+   struct eeh_dev *edev, *tmp;
void *ret;
 
if (!root) {
@@ -186,7 +186,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
 
/* Traverse root PE */
for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
-   eeh_pe_for_each_dev(pe, edev) {
+   eeh_pe_for_each_dev(pe, edev, tmp) {
ret = fn(edev, flag);
if (ret)
return ret;
@@ -501,7 +501,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
 {
struct eeh_pe *pe = (struct eeh_pe *)data;
int state = *((int *)flag);
-   struct eeh_dev *tmp;
+   struct eeh_dev *edev, *tmp;
struct pci_dev *pdev;
 
/*
@@ -511,8 +511,8 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
 * the PCI device driver.
 */
pe-state |= state;
-   eeh_pe_for_each_dev(pe, tmp) {
-   pdev = eeh_dev_to_pci_dev(tmp);
+   eeh_pe_for_each_dev(pe, edev, tmp) {
+   pdev = eeh_dev_to_pci_dev(edev);
if (pdev)
pdev-error_state = pci_channel_io_frozen;
}
-- 
1.7.5.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 7/8] powerpc/pci: Partial hotplug support

2013-07-04 Thread Gavin Shan
When EEH error happens to one specific PE, the device drivers
of its attached EEH devices (PCI devices) are checked to see
the further action: reset with complete hotplug, or reset without
hotplug. However, that's not enough for those PCI devices whose
drivers can't support EEH, or those PCI devices without driver.
So we need do so-called partial hotplug on basis of PCI devices.
In the situation, part of PCI devices of the specific PE are
unplugged and plugged again after PE reset.

The patch adds functions to support scanning signle PCI device
(function) either based on device-tree or hardware for plugging.
The existing function pci_stop_and_remove_bus_device() is enough
for unplugging. Besides, the patch also fixes the issue that we
need reassign the resources if we had flag PCI_REASSIGN_ALL_RSRC.
Otherwise, to claim the resources of attached devices of the PCI
bus should fail and the newly added devices in complete hotplug
can't be enabled.

Signed-off-by: Gavin Shan sha...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/pci-bridge.h |2 +-
 arch/powerpc/include/asm/pci.h|2 +
 arch/powerpc/kernel/pci-common.c  |8 ++-
 arch/powerpc/kernel/pci-hotplug.c |   92 +
 arch/powerpc/kernel/pci_of_scan.c |   43 +++
 5 files changed, 132 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 32d0d20..070aed3 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -213,7 +213,7 @@ extern void pcibios_remove_pci_devices(struct pci_bus *bus);
 
 /** Discover new pci devices under this bus, and add them */
 extern void pcibios_add_pci_devices(struct pci_bus *bus);
-
+void pcibios_scan_pci_dev(struct pci_bus *bus, struct device_node *dn);
 
 extern void isa_bridge_find_early(struct pci_controller *hose);
 
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 6653f27..28cfc95 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -167,6 +167,8 @@ extern struct pci_dev *of_create_pci_dev(struct device_node 
*node,
struct pci_bus *bus, int devfn);
 
 extern void of_scan_pci_bridge(struct pci_dev *dev);
+extern struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
+  struct device_node *dn);
 
 extern void of_scan_bus(struct device_node *node, struct pci_bus *bus);
 extern void of_rescan_bus(struct device_node *node, struct pci_bus *bus);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f46914a..6f3a1cb 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1460,8 +1460,12 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
 pci_domain_nr(bus), bus-number);
 
/* Allocate bus and devices resources */
-   pcibios_allocate_bus_resources(bus);
-   pcibios_claim_one_bus(bus);
+   if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
+   pci_assign_unassigned_bus_resources(bus);
+   } else {
+   pcibios_allocate_bus_resources(bus);
+   pcibios_claim_one_bus(bus);
+   }
 
/* Fixup EEH */
eeh_add_device_tree_late(bus);
diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index 96c9ab8..bd21c40 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -104,3 +104,95 @@ void pcibios_add_pci_devices(struct pci_bus * bus)
pcibios_finish_adding_to_bus(bus);
 }
 EXPORT_SYMBOL_GPL(pcibios_add_pci_devices);
+
+static void pcibios_of_scan_dev(struct pci_bus *bus, struct device_node *dn)
+{
+   struct pci_dev *dev;
+   int ret;
+
+   dev = of_scan_pci_dev(bus, dn);
+   if (!dev)
+   return;
+
+   eeh_add_device_early(dn);
+   pcibios_add_device(dev);
+   eeh_add_device_late(dev);
+
+   ret = pci_bus_add_device(dev);
+   if (ret) {
+   pr_info(%s: Failed to add PCI dev %s\n,
+   __func__, pci_name(dev));
+   return;
+   }
+
+   eeh_sysfs_add_device(dev);
+}
+
+static void pcibios_scan_dev(struct pci_bus *bus, struct device_node *dn)
+{
+   struct pci_dn *pdn = PCI_DN(dn);
+   struct pci_dev *dev;
+   struct resource *r;
+   int i, ret;
+
+   eeh_add_device_early(dn);
+   dev = pci_scan_single_device(bus, pdn-devfn);
+   if (!dev) {
+   pr_warn(%s: Failed to probe %04x:%02x:%2x.%01x\n,
+   __func__, pci_domain_nr(bus), bus-number,
+   PCI_SLOT(pdn-devfn), PCI_FUNC(pdn-devfn));
+   return;
+   }
+
+   /*
+* If we already requested to reassign resources, the
+* start address of individual resources is zero'ed
+* during PCI header fixup time. So we need reassign
+* 

[PATCH 8/8] powerpc/eeh: Support partial hotplug

2013-07-04 Thread Gavin Shan
When EEH error happens to one specific PE, some devices with drivers
supporting EEH won't except hotplug on the deivce. However, there
might have other deivces without driver, or with driver without EEH
support. For the case, we need do partial hotplug in order to make
sure that the PE becomes absolutely quite during reset. Otherise,
the PE reset might fail and leads to failure of error recovery.

The patch intends to support so-called partial hotplug for EEH:
Before we do reset, we stop and remove those PCI devices without
EEH sensitive driver. The corresponding EEH devices are not detached
from its PE, but with special flag. After the reset is done, those
EEH devices with the special flag will be scanned one by one.

Signed-off-by: Gavin Shan sha...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/eeh.h   |6 ++-
 arch/powerpc/kernel/eeh.c|   22 ++--
 arch/powerpc/kernel/eeh_driver.c |  109 --
 arch/powerpc/kernel/eeh_pe.c |   26 +
 4 files changed, 141 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index e8c411b..f54a601 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -84,7 +84,8 @@ struct eeh_pe {
  * another tree except the currently existing tree of PCI
  * buses and PCI devices
  */
-#define EEH_DEV_IRQ_DISABLED   (10)  /* Interrupt disabled   */
+#define EEH_DEV_IRQ_DISABLED   (1  0)/* Interrupt disabled   */
+#define EEH_DEV_DISCONNECTED   (1  1)/* Removing from PE */
 
 struct eeh_dev {
int mode;   /* EEH mode */
@@ -97,6 +98,7 @@ struct eeh_dev {
struct pci_controller *phb; /* Associated PHB   */
struct device_node *dn; /* Associated device node   */
struct pci_dev *pdev;   /* Associated PCI device*/
+   struct pci_bus *bus;/* PCI bus for partial hotplug  */
 };
 
 static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev)
@@ -197,6 +199,8 @@ struct eeh_pe *eeh_pe_get(struct eeh_dev *edev);
 int eeh_add_to_parent_pe(struct eeh_dev *edev);
 int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
 void eeh_pe_update_time_stamp(struct eeh_pe *pe);
+void *eeh_pe_traverse(struct eeh_pe *root,
+   eeh_traverse_func fn, void *flag);
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
eeh_traverse_func fn, void *flag);
 void eeh_pe_restore_bars(struct eeh_pe *pe);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index b518c49..8b414b3 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -985,15 +985,27 @@ void eeh_remove_device(struct pci_dev *dev)
/* Unregister the device with the EEH/PCI address search system */
pr_debug(EEH: Removing device %s\n, pci_name(dev));
 
-   if (!edev || !edev-pdev) {
+   if (!edev || !edev-pdev || !edev-pe) {
pr_debug(EEH: Not referenced !\n);
return;
}
-   edev-pdev = NULL;
-   dev-dev.archdata.edev = NULL;
-   pci_dev_put(dev);
 
-   eeh_rmv_from_parent_pe(edev);
+   /*
+* During the hotplug for EEH error recovery, we need the EEH
+* device attached to the parent PE in order for BAR restore
+* a bit later. So we keep it for BAR restore and remove it
+* from the parent PE during the BAR resotre.
+*/
+   if (!(edev-pe-state  EEH_PE_KEEP)) {
+   edev-pdev = NULL;
+   dev-dev.archdata.edev = NULL;
+   pci_dev_put(dev);
+
+   eeh_rmv_from_parent_pe(edev);
+   } else {
+   edev-mode |= EEH_DEV_DISCONNECTED;
+   }
+
eeh_addr_cache_rmv_dev(dev);
eeh_sysfs_remove_device(dev);
 }
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 9ef3bbb..807d2bb 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -338,6 +338,92 @@ static void *eeh_report_failure(void *data, void *userdata)
return NULL;
 }
 
+static void *eeh_rmv_device(void *data, void *userdata)
+{
+   struct pci_driver *driver;
+   struct eeh_dev *edev = (struct eeh_dev *)data;
+   struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+   int *removed = (int *)userdata;
+
+   /*
+* Actually, we should remove the PCI bridges as well.
+* However, that's lots of complexity to do that,
+* particularly some of devices under the bridge might
+* support EEH. So we just care about PCI devices for
+* simplicity here.
+*/
+   if (!dev || (dev-hdr_type  PCI_HEADER_TYPE_BRIDGE))
+   return NULL;
+   driver = eeh_pcid_get(dev);
+   if (driver  driver-err_handler)
+   return NULL;
+
+   /* Remove it from PCI subsystem */
+   pr_debug(EEH: Removing %s without EEH 

[PATCH 5/8] powerpc/eeh: Keep PE during hotplug

2013-07-04 Thread Gavin Shan
When we do normal hotplug, the PE shouldn't be kept. However, we
need the PE if the hotplug caused by EEH errors. Since we remove
EEH device through the PCI hook pcibios_stop_dev(), the flag
purge_pe passed to various functions is meaningless. So the patch
removes the meaningless flag and introduce new flag EEH_PE_KEEP
to save the PE while doing hotplug during EEH error recovery.

Signed-off-by: Gavin Shan sha...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/eeh.h|   11 +--
 arch/powerpc/include/asm/pci-bridge.h |1 -
 arch/powerpc/kernel/eeh.c |   28 ++--
 arch/powerpc/kernel/eeh_driver.c  |7 +--
 arch/powerpc/kernel/eeh_pe.c  |7 +++
 arch/powerpc/kernel/pci-hotplug.c |   26 +-
 6 files changed, 20 insertions(+), 60 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d9d35c2..2ce22d7 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -55,6 +55,8 @@ struct device_node;
 #define EEH_PE_RECOVERING  (1  1)/* Recovering PE*/
 #define EEH_PE_PHB_DEAD(1  2)/* Dead PHB 
*/
 
+#define EEH_PE_KEEP(1  8)/* Keep PE on hotplug   */
+
 struct eeh_pe {
int type;   /* PE type: PHB/Bus/Device  */
int state;  /* PE EEH dependent mode*/
@@ -193,7 +195,7 @@ int eeh_phb_pe_create(struct pci_controller *phb);
 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
 struct eeh_pe *eeh_pe_get(struct eeh_dev *edev);
 int eeh_add_to_parent_pe(struct eeh_dev *edev);
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe);
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
 void eeh_pe_update_time_stamp(struct eeh_pe *pe);
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
eeh_traverse_func fn, void *flag);
@@ -214,8 +216,7 @@ void eeh_add_device_tree_early(struct device_node *);
 void eeh_add_device_late(struct pci_dev *);
 void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_add_sysfs_files(struct pci_bus *);
-void eeh_remove_device(struct pci_dev *, int);
-void eeh_remove_bus_device(struct pci_dev *, int);
+void eeh_remove_device(struct pci_dev *);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
@@ -265,9 +266,7 @@ static inline void eeh_add_device_tree_late(struct pci_bus 
*bus) { }
 
 static inline void eeh_add_sysfs_files(struct pci_bus *bus) { }
 
-static inline void eeh_remove_device(struct pci_dev *dev, int purge_pe) { }
-
-static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { }
+static inline void eeh_remove_device(struct pci_dev *dev) { }
 
 #define EEH_POSSIBLE_ERROR(val, type) (0)
 #define EEH_IO_ERROR_VALUE(size) (-1UL)
diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 2c1d8cb..32d0d20 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -209,7 +209,6 @@ static inline struct eeh_dev *of_node_to_eeh_dev(struct 
device_node *dn)
 extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn);
 
 /** Remove all of the PCI devices under this bus */
-extern void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe);
 extern void pcibios_remove_pci_devices(struct pci_bus *bus);
 
 /** Discover new pci devices under this bus, and add them */
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index a186de8..b074b2a 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -967,7 +967,6 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
 /**
  * eeh_remove_device - Undo EEH setup for the indicated pci device
  * @dev: pci device to be removed
- * @purge_pe: remove the PE or not
  *
  * This routine should be called when a device is removed from
  * a running system (e.g. by hotplug or dlpar).  It unregisters
@@ -975,7 +974,7 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
  * this device will no longer be detected after this call; thus,
  * i/o errors affecting this slot may leave this device unusable.
  */
-void eeh_remove_device(struct pci_dev *dev, int purge_pe)
+void eeh_remove_device(struct pci_dev *dev)
 {
struct eeh_dev *edev;
 
@@ -994,34 +993,11 @@ void eeh_remove_device(struct pci_dev *dev, int purge_pe)
dev-dev.archdata.edev = NULL;
pci_dev_put(dev);
 
-   eeh_rmv_from_parent_pe(edev, purge_pe);
+   eeh_rmv_from_parent_pe(edev);
eeh_addr_cache_rmv_dev(dev);
eeh_sysfs_remove_device(dev);
 }
 
-/**
- * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
- * @dev: PCI device
- * @purge_pe: remove the corresponding PE or not
- *
- * This routine must be called when a device is removed from the
- * running system through hotplug or dlpar. The corresponding
- * PCI address cache will be removed.
- */
-void 

[PATCH 1/8] PCI: Add pcibios_stop_dev()

2013-07-04 Thread Gavin Shan
When stopping and removing one specific PCI device, the platform
might need take some actions. One example is that EEH already had
eeh cache and eeh device attached to the PCI device, and we need
release eeh cache and device during the time. The patch introduces
hook pcibios_stop_dev() for the purpose.

Cc: Bjorn Helgaas bhelg...@google.com
Cc: linux-...@vger.kernel.org
Signed-off-by: Gavin Shan sha...@linux.vnet.ibm.com
---
 drivers/pci/probe.c  |4 
 drivers/pci/remove.c |2 ++
 include/linux/pci.h  |1 +
 3 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 70f10fa..7167dc4 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1669,6 +1669,10 @@ void __weak pcibios_remove_bus(struct pci_bus *bus)
 {
 }
 
+void __weak pcibios_stop_dev(struct pci_dev *dev)
+{
+}
+
 struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
struct pci_ops *ops, void *sysdata, struct list_head *resources)
 {
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 8fc54b7..e329efc 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -21,6 +21,8 @@ static void pci_stop_dev(struct pci_dev *dev)
 {
pci_pme_active(dev, false);
 
+   pcibios_stop_dev(dev);
+
if (dev-is_added) {
pci_proc_detach_device(dev);
pci_remove_sysfs_dev_files(dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 3a24e4f..40df783 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -696,6 +696,7 @@ int no_pci_devices(void);
 void pcibios_resource_survey_bus(struct pci_bus *bus);
 void pcibios_add_bus(struct pci_bus *bus);
 void pcibios_remove_bus(struct pci_bus *bus);
+void pcibios_stop_dev(struct pci_dev *dev);
 void pcibios_fixup_bus(struct pci_bus *);
 int __must_check pcibios_enable_device(struct pci_dev *, int mask);
 /* Architecture specific versions may override this (weak) */
-- 
1.7.5.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 4/8] PCI/hotplug: Needn't remove EEH cache again

2013-07-04 Thread Gavin Shan
Since pci_stop_and_remove_bus_device() has removed the EEH cache,
we needn't do that again.

Cc: Bjorn Helgaas bhelg...@google.com
Cc: linux-...@vger.kernel.org
Signed-off-by: Gavin Shan sha...@linux.vnet.ibm.com
---
 drivers/pci/hotplug/rpadlpar_core.c |1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/pci/hotplug/rpadlpar_core.c 
b/drivers/pci/hotplug/rpadlpar_core.c
index b29e20b..bb7af78 100644
--- a/drivers/pci/hotplug/rpadlpar_core.c
+++ b/drivers/pci/hotplug/rpadlpar_core.c
@@ -388,7 +388,6 @@ int dlpar_remove_pci_slot(char *drc_name, struct 
device_node *dn)
/* Remove the EADS bridge device itself */
BUG_ON(!bus-self);
pr_debug(PCI: Now removing bridge device %s\n, pci_name(bus-self));
-   eeh_remove_bus_device(bus-self, true);
pci_stop_and_remove_bus_device(bus-self);
 
return 0;
-- 
1.7.5.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/8] PCI: Add pcibios_stop_dev()

2013-07-04 Thread Benjamin Herrenschmidt
On Fri, 2013-07-05 at 10:57 +0800, Gavin Shan wrote:
 When stopping and removing one specific PCI device, the platform
 might need take some actions. One example is that EEH already had
 eeh cache and eeh device attached to the PCI device, and we need
 release eeh cache and device during the time. The patch introduces
 hook pcibios_stop_dev() for the purpose.

Bjorn, any objection ? Ack ? Nack ? :-)

I'd like to put that in my tree, it's part of a series that fixes a
number of bugs with our hotplug and EEH code which I'd like to send
to Linus soonish.

Cheers,
Ben.

 Cc: Bjorn Helgaas bhelg...@google.com
 Cc: linux-...@vger.kernel.org
 Signed-off-by: Gavin Shan sha...@linux.vnet.ibm.com
 ---
  drivers/pci/probe.c  |4 
  drivers/pci/remove.c |2 ++
  include/linux/pci.h  |1 +
  3 files changed, 7 insertions(+), 0 deletions(-)
 
 diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
 index 70f10fa..7167dc4 100644
 --- a/drivers/pci/probe.c
 +++ b/drivers/pci/probe.c
 @@ -1669,6 +1669,10 @@ void __weak pcibios_remove_bus(struct pci_bus *bus)
  {
  }
  
 +void __weak pcibios_stop_dev(struct pci_dev *dev)
 +{
 +}
 +
  struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
   struct pci_ops *ops, void *sysdata, struct list_head *resources)
  {
 diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
 index 8fc54b7..e329efc 100644
 --- a/drivers/pci/remove.c
 +++ b/drivers/pci/remove.c
 @@ -21,6 +21,8 @@ static void pci_stop_dev(struct pci_dev *dev)
  {
   pci_pme_active(dev, false);
  
 + pcibios_stop_dev(dev);
 +
   if (dev-is_added) {
   pci_proc_detach_device(dev);
   pci_remove_sysfs_dev_files(dev);
 diff --git a/include/linux/pci.h b/include/linux/pci.h
 index 3a24e4f..40df783 100644
 --- a/include/linux/pci.h
 +++ b/include/linux/pci.h
 @@ -696,6 +696,7 @@ int no_pci_devices(void);
  void pcibios_resource_survey_bus(struct pci_bus *bus);
  void pcibios_add_bus(struct pci_bus *bus);
  void pcibios_remove_bus(struct pci_bus *bus);
 +void pcibios_stop_dev(struct pci_dev *dev);
  void pcibios_fixup_bus(struct pci_bus *);
  int __must_check pcibios_enable_device(struct pci_dev *, int mask);
  /* Architecture specific versions may override this (weak) */


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev