[PATCH 3/4] ppc32/kprobe: complete kprobe and migrate exception frame

2011-12-12 Thread Tiejun Chen
We can't emulate stwu since that may corrupt current exception stack.
So we will have to do real store operation in the exception return code.

Firstly we'll allocate a trampoline exception frame below the kprobed
function stack and copy the current exception frame to the trampoline.
Then we can do this real store operation to implement 'stwu', and reroute
the trampoline frame to r1 to complete this exception migration.

Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
---
 arch/powerpc/kernel/entry_32.S |   26 ++
 1 files changed, 26 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 56212bc..d56e311 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -1185,6 +1185,8 @@ recheck:
bne-do_resched
andi.   r0,r9,_TIF_USER_WORK_MASK
beq restore_user
+   andis.  r0,r9,_TIF_DELAYED_KPROBE@h
+   bne-restore_kprobe
 do_user_signal:/* r10 contains MSR_KERNEL here */
ori r10,r10,MSR_EE
SYNC
@@ -1202,6 +1204,30 @@ do_user_signal:  /* r10 contains 
MSR_KERNEL here */
REST_NVGPRS(r1)
b   recheck
 
+restore_kprobe:
+   lwz r3,GPR1(r1)
+   subir3,r3,INT_FRAME_SIZE; /* Allocate a trampoline exception frame 
*/
+   mr  r4,r1
+   bl  copy_exc_stack  /* Copy from the original to the trampoline */
+
+   /* Do real stw operation to complete stwu */
+   mr  r4,r1
+   addir4,r4,INT_FRAME_SIZE/* Get kprobed entry */
+   lwz r5,GPR1(r1) /* Backup r1 */
+   stw r4,GPR1(r1) /* Now store that safely */
+
+   /* Reroute the trampoline frame to r1 */
+   subir5,r5,INT_FRAME_SIZE
+   mr  r1,r5
+
+   /* Clear _TIF_DELAYED_KPROBE flag */
+   rlwinm  r9,r1,0,0,(31-THREAD_SHIFT)
+   lwz r0,TI_FLAGS(r9)
+   rlwinm  r0,r0,0,_TIF_DELAYED_KPROBE
+   stw r0,TI_FLAGS(r9)
+
+   b   restore
+
 /*
  * We come here when we are at the end of handling an exception
  * that occurred at a place where taking an exception will lose
-- 
1.5.6

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 1/4] powerpc/kprobe: introduce a new thread flag

2011-12-12 Thread Tiejun Chen
We need to add a new thread flag, TIF_KPROBE/_TIF_DELAYED_KPROBE,
for handling kprobe operation while exiting exception.

Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
---
 arch/powerpc/include/asm/thread_info.h |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/thread_info.h 
b/arch/powerpc/include/asm/thread_info.h
index 836f231..3378734 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -112,6 +112,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_FREEZE 14  /* Freezing for suspend */
 #define TIF_SYSCALL_TRACEPOINT 15  /* syscall tracepoint instrumentation */
 #define TIF_RUNLATCH   16  /* Is the runlatch enabled? */
+#define TIF_KPROBE 17  /* Is the delayed kprobe operation? */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE (1TIF_SYSCALL_TRACE)
@@ -130,6 +131,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_FREEZE(1TIF_FREEZE)
 #define _TIF_SYSCALL_TRACEPOINT(1TIF_SYSCALL_TRACEPOINT)
 #define _TIF_RUNLATCH  (1TIF_RUNLATCH)
+#define _TIF_DELAYED_KPROBE(1TIF_KPROBE)
 #define _TIF_SYSCALL_T_OR_A(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
 
-- 
1.5.6

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 2/4] ppc32/kprobe: introduce copy_exc_stack

2011-12-12 Thread Tiejun Chen
We need a copy mechanism to migrate exception stack. But looks copy_page()
already implement this well so we can complete copy_exc_stack() based on
that directly.

Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
---
 arch/powerpc/include/asm/page_32.h |1 +
 arch/powerpc/kernel/misc_32.S  |   16 +++-
 arch/powerpc/kernel/ppc_ksyms.c|1 +
 3 files changed, 17 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/include/asm/page_32.h 
b/arch/powerpc/include/asm/page_32.h
index 68d73b2..2c1fd84 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -40,6 +40,7 @@ struct page;
 extern void clear_pages(void *page, int order);
 static inline void clear_page(void *page) { clear_pages(page, 0); }
 extern void copy_page(void *to, void *from);
+extern void copy_exc_stack(void *to, void *from);
 
 #include asm-generic/getorder.h
 
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 998a100..aa02545 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -527,7 +527,7 @@ _GLOBAL(clear_pages)
stw r8,12(r3);  \
stwur9,16(r3)
 
-_GLOBAL(copy_page)
+ready_copy:
addir3,r3,-4
addir4,r4,-4
 
@@ -544,7 +544,21 @@ _GLOBAL(copy_page)
dcbtr5,r4
li  r11,L1_CACHE_BYTES+4
 #endif /* MAX_COPY_PREFETCH */
+   blr
+
+_GLOBAL(copy_exc_stack)
+   mflrr12
+   bl  ready_copy
+   mtlrr12
+   li  r0,INT_FRAME_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH
+   b   go_copy
+
+_GLOBAL(copy_page)
+   mflrr12
+   bl  ready_copy
+   mtlrr12
li  r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH
+go_copy:
crclr   4*cr0+eq
 2:
mtctr   r0
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index f5ae872..2223daf 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -88,6 +88,7 @@ EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__strnlen_user);
 EXPORT_SYMBOL(copy_page);
+EXPORT_SYMBOL(copy_exc_stack);
 
 #if defined(CONFIG_PCI)  defined(CONFIG_PPC32)
 EXPORT_SYMBOL(isa_io_base);
-- 
1.5.6

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


ppc32/kprobe: Fix a bug for kprobe stwu r1

2011-12-12 Thread Tiejun Chen
ppc32/kprobe: Fix a bug for kprobe stwu r1

There patches is used to fix that known kprobe bug,
[BUG?]3.0-rc4+ftrace+kprobe: set kprobe at instruction 'stwu' lead to system 
crash/freeze

https://lkml.org/lkml/2011/7/3/156

We withdraw the original way to provide a dedicated exception stack. Now we
implement this based on Ben's suggestion:

https://lkml.org/lkml/2011/11/30/327

Here I fix this bug only for ppc32 since Ben address another problem in ppc64
exception return codes. So I think I'd better send another patch to fix this
bug issued from ppc64 firstly. Then its convenient to merge this fix into ppc64.

Tiejun Chen (4):
  powerpc/kprobe: introduce a new thread flag
  ppc32/kprobe: introduce copy_exc_stack
  ppc32/kprobe: complete kprobe and migrate exception frame
  ppc32/kprobe: don't emulate store when kprobe stwu r1

 arch/powerpc/include/asm/page_32.h |1 +
 arch/powerpc/include/asm/thread_info.h |2 ++
 arch/powerpc/kernel/entry_32.S |   26 ++
 arch/powerpc/kernel/misc_32.S  |   16 +++-
 arch/powerpc/kernel/ppc_ksyms.c|1 +
 arch/powerpc/lib/sstep.c   |   19 +--
 6 files changed, 62 insertions(+), 3 deletions(-)

Tiejun


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 4/4] ppc32/kprobe: don't emulate store when kprobe stwu r1

2011-12-12 Thread Tiejun Chen
We don't do the real store operation for kprobing 'stwu Rx,(y)R1'
since this may corrupt the exception frame, now we will do this
operation safely in exception return code after migrate current
exception frame below the kprobed function stack.

So we only update gpr[1] here and trigger a thread flag to mask
this.

Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
---
 arch/powerpc/lib/sstep.c |   19 +--
 1 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 9a52349..78b7168 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -566,7 +566,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
unsigned long int ea;
unsigned int cr, mb, me, sh;
int err;
-   unsigned long old_ra;
+   unsigned long old_ra, val3;
long ival;
 
opcode = instr  26;
@@ -1486,10 +1486,25 @@ int __kprobes emulate_step(struct pt_regs *regs, 
unsigned int instr)
goto ldst_done;
 
case 36:/* stw */
-   case 37:/* stwu */
val = regs-gpr[rd];
err = write_mem(val, dform_ea(instr, regs), 4, regs);
goto ldst_done;
+   case 37:/* stwu */
+   val = regs-gpr[rd];
+   val3 = dform_ea(instr, regs);
+   /* For PPC32 we always use stwu to change stack point with r1. 
So
+* this emulated store may corrupt the exception frame, now we
+* have to provide the exception frame trampoline, which is 
pushed
+* below the kprobed function stack. So we only update gpr[1] 
but
+* don't emulate the real store operation. We will do real store
+* operation safely in exception return code by checking this 
flag.
+*/
+   if (ra == 1) {
+   set_thread_flag(TIF_KPROBE);
+   err = 0;
+   } else
+   err = write_mem(val, val3, 4, regs);
+   goto ldst_done;
 
case 38:/* stb */
case 39:/* stbu */
-- 
1.5.6

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 1/1] ppc64: fix missing to check all bits of _TIF_USER_WORK_MASK in preempt

2011-12-12 Thread Tiejun Chen
In entry_64.S version of ret_from_except_lite, you'll notice that
in the !preempt case, after we've checked MSR_PR we test for any
TIF flag in _TIF_USER_WORK_MASK to decide whether to go to do_work
or not. However, in the preempt case, we do a convoluted trick to
test SIGPENDING only if PR was set and always test NEED_RESCHED ...
but we forget to test any other bit of _TIF_USER_WORK_MASK !!! So
that means that with preempt, we completely fail to test for things
like single step, syscall tracing, etc...

This should be fixed as the following path:

 - Test PR. If set, go to test_work_user, else continue.

 - In test_work_user, always test for _TIF_USER_WORK_MASK to decide to
go to do_work, maybe call it do_user_work

 - In test_work_kernel, test for _TIF_KERNEL_WORK_MASK which is set to
our new flag along with NEED_RESCHED if preempt is enabled and branch to
do_kernel_work.

Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
---
 arch/powerpc/kernel/entry_64.S |   33 +++--
 1 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d834425..9e70b9a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -571,27 +571,26 @@ _GLOBAL(ret_from_except_lite)
mtmsrd  r9,1/* Update machine state */
 #endif /* CONFIG_PPC_BOOK3E */
 
-#ifdef CONFIG_PREEMPT
-   clrrdi  r9,r1,THREAD_SHIFT  /* current_thread_info() */
-   li  r0,_TIF_NEED_RESCHED/* bits to check */
-   ld  r3,_MSR(r1)
-   ld  r4,TI_FLAGS(r9)
-   /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */
-   rlwimi  r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING
-   and.r0,r4,r0/* check NEED_RESCHED and maybe SIGPENDING */
-   bne do_work
-
-#else /* !CONFIG_PREEMPT */
ld  r3,_MSR(r1) /* Returning to user mode? */
andi.   r3,r3,MSR_PR
-   beq restore /* if not, just restore regs and return */
+   bne test_work_user
 
+   clrrdi  r9,r1,THREAD_SHIFT  /* current_thread_info() */
+   li  r0,_TIF_USER_WORK_MASK
+#ifdef CONFIG_PREEMPT
+   ori r0,r0,_TIF_NEED_RESCHED
+#endif
+   ld  r4,TI_FLAGS(r9)
+   and.r0,r4,r0/* check NEED_RESCHED and maybe 
_TIF_USER_WORK_MASK */
+   bne do_kernel_work
+   b   restore /* if so, just restore regs and return */
+
+test_work_user:
/* Check current_thread_info()-flags */
clrrdi  r9,r1,THREAD_SHIFT
ld  r4,TI_FLAGS(r9)
andi.   r0,r4,_TIF_USER_WORK_MASK
-   bne do_work
-#endif
+   bne do_user_work
 
 restore:
 BEGIN_FW_FTR_SECTION
@@ -693,10 +692,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
b   .ret_from_except_lite   /* loop back and handle more */
 #endif
 
-do_work:
+do_kernel_work:
 #ifdef CONFIG_PREEMPT
-   andi.   r0,r3,MSR_PR/* Returning to user mode? */
-   bne user_work
/* Check that preempt_count() == 0 and interrupts are enabled */
lwz r8,TI_PREEMPT(r9)
cmpwi   cr1,r8,0
@@ -738,9 +735,9 @@ do_work:
bne 1b
b   restore
 
-user_work:
 #endif /* CONFIG_PREEMPT */
 
+do_user_work:
/* Enable interrupts */
 #ifdef CONFIG_PPC_BOOK3E
wrteei  1
-- 
1.5.6

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 1/2] mtd/nand: fixup for fmr initialization of Freescale NAND controller

2011-12-12 Thread Shengzhou Liu
There was a bug for fmr initialization, which lead to  fmr was always 0x100
in fsl_elbc_chip_init() and caused FCM command timeout before calling
fsl_elbc_chip_init_tail(), now we initialize CWTO to maximum timeout value
and not relying on the setting of bootloader.

Signed-off-by: Shengzhou Liu shengzhou@freescale.com
---
v3: add more descriptions.
v2: make fmr not relying on the setting of bootloader.

 drivers/mtd/nand/fsl_elbc_nand.c |   10 +-
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index eedd8ee..4f405a0 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -659,9 +659,7 @@ static int fsl_elbc_chip_init_tail(struct mtd_info *mtd)
if (chip-pagemask  0xff00)
al++;
 
-   /* add to ECCM mode set in fsl_elbc_init */
-   priv-fmr |= (12  FMR_CWTO_SHIFT) |  /* Timeout  12 ms */
-(al  FMR_AL_SHIFT);
+   priv-fmr |= al  FMR_AL_SHIFT;
 
dev_dbg(priv-dev, fsl_elbc_init: nand-numchips = %d\n,
chip-numchips);
@@ -764,8 +762,10 @@ static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv)
priv-mtd.priv = chip;
priv-mtd.owner = THIS_MODULE;
 
-   /* Set the ECCM according to the settings in bootloader.*/
-   priv-fmr = in_be32(lbc-fmr)  FMR_ECCM;
+   /* set timeout to maximum */
+   priv-fmr = 15  FMR_CWTO_SHIFT;
+   if (in_be32(lbc-bank[priv-bank].or)  OR_FCM_PGS)
+   priv-fmr |= FMR_ECCM;
 
/* fill in nand_chip structure */
/* set up function call table */
-- 
1.6.4


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 2/2] mtd/nand: Add ONFI support for FSL NAND controller

2011-12-12 Thread Shengzhou Liu
- fix NAND_CMD_READID command for ONFI detect.
- add NAND_CMD_PARAM command to read the ONFI parameter page.

Signed-off-by: Shengzhou Liu shengzhou@freescale.com
---
v3: unify the bytes of fbcr to 256.
v2: no changes

 drivers/mtd/nand/fsl_elbc_nand.c |   18 ++
 1 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 4f405a0..320584a 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -349,20 +349,22 @@ static void fsl_elbc_cmdfunc(struct mtd_info *mtd, 
unsigned int command,
fsl_elbc_run_command(mtd);
return;
 
-   /* READID must read all 5 possible bytes while CEB is active */
case NAND_CMD_READID:
-   dev_vdbg(priv-dev, fsl_elbc_cmdfunc: NAND_CMD_READID.\n);
+   case NAND_CMD_PARAM:
+   dev_vdbg(priv-dev, fsl_elbc_cmdfunc: NAND_CMD %x\n, command);
 
out_be32(lbc-fir, (FIR_OP_CM0  FIR_OP0_SHIFT) |
(FIR_OP_UA   FIR_OP1_SHIFT) |
(FIR_OP_RBW  FIR_OP2_SHIFT));
-   out_be32(lbc-fcr, NAND_CMD_READID  FCR_CMD0_SHIFT);
-   /* nand_get_flash_type() reads 8 bytes of entire ID string */
-   out_be32(lbc-fbcr, 8);
-   elbc_fcm_ctrl-read_bytes = 8;
+   out_be32(lbc-fcr, command  FCR_CMD0_SHIFT);
+   /*
+* although currently it's 8 bytes for READID, we always read
+* the maximum 256 bytes(for PARAM)
+*/
+   out_be32(lbc-fbcr, 256);
+   elbc_fcm_ctrl-read_bytes = 256;
elbc_fcm_ctrl-use_mdr = 1;
-   elbc_fcm_ctrl-mdr = 0;
-
+   elbc_fcm_ctrl-mdr = column;
set_addr(mtd, 0, 0, 0);
fsl_elbc_run_command(mtd);
return;
-- 
1.6.4


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] block/swim3: Locking fixes

2011-12-12 Thread Jens Axboe
On 2011-12-12 05:57, Benjamin Herrenschmidt wrote:
 The old PowerMac swim3 driver has some interesting locking issues,
 using a private lock and failing to lock the queue before completing
 requests, which triggered WARN_ONs among others.
 
 This rips out the private lock, makes everything operate under the
 block queue lock, and generally makes things simpler.
 
 We used to also share a queue between the two possible instances which
 was problematic since we might pick the wrong controller in some cases,
 so make the queue and the current request per-instance and use
 queuedata to point to our private data which is a lot cleaner.
 
 We still share the queue lock but then, it's nearly impossible to actually
 use 2 swim3's simultaneously: one would need to have a Wallstreet
 PowerBook, the only machine afaik with two of these on the motherboard,
 and populate both hotswap bays with a floppy drive (the machine ships
 only with one), so nobody cares...
 
 While at it, add a little fix to clear up stale interrupts when loading
 the driver or plugging a floppy drive in a bay.

Applied for current for-linus branch.

-- 
Jens Axboe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 01/16 v3] pmac_zilog: fix unexpected irq

2011-12-12 Thread Finn Thain

On Mon, 12 Dec 2011, Benjamin Herrenschmidt wrote:

 Any chance you can test this patch ? I would not be surprised if it 
 broke m68k since I had to do some of the changes in there blind, so 
 let me know... with this, I can again suspend/resume properly on a Pismo 
 while using the internal modem among other things.

The patch works on a PowerBook 520 given a few changes (below). This 
PowerBook only has one serial port that I can test (the internal modem is 
not supported on 68k Macs). Can you test a machine with two ports? The 
rest of my Mac hardware is in storage since I moved house last week.

Finn


Index: linux-git/drivers/tty/serial/pmac_zilog.c
===
--- linux-git.orig/drivers/tty/serial/pmac_zilog.c  2011-12-13 
00:18:02.0 +1100
+++ linux-git/drivers/tty/serial/pmac_zilog.c   2011-12-13 00:23:55.0 
+1100
@@ -1705,8 +1705,8 @@ static int __init pmz_init_port(struct u
struct resource *r_ports;
int irq;
 
-   r_ports = platform_get_resource(uap-node, IORESOURCE_MEM, 0);
-   irq = platform_get_irq(uap-node, 0);
+   r_ports = platform_get_resource(uap-pdev, IORESOURCE_MEM, 0);
+   irq = platform_get_irq(uap-pdev, 0);
if (!r_ports || !irq)
return -ENODEV;
 
@@ -1763,8 +1763,10 @@ static void pmz_dispose_port(struct uart
 
 static int __init pmz_attach(struct platform_device *pdev)
 {
+   struct uart_pmac_port *uap;
int i;
 
+   /* Iterate the pmz_ports array to find a matching entry */
for (i = 0; i  pmz_ports_count; i++)
if (pmz_ports[i].pdev == pdev)
break;
@@ -1773,15 +1775,23 @@ static int __init pmz_attach(struct plat
 
uap = pmz_ports[i];
uap-port.dev = pdev-dev;
-   dev_set_drvdata(mdev-ofdev.dev, uap);
+   platform_set_drvdata(pdev, uap);
 
-   return uart_add_one_port(pmz_uart_reg,
-pmz_ports[i]-port);
+   return uart_add_one_port(pmz_uart_reg, uap-port);
 }
 
 static int __exit pmz_detach(struct platform_device *pdev)
 {
+   struct uart_pmac_port *uap = platform_get_drvdata(pdev);
+
+   if (!uap)
+   return -ENODEV;
+
uart_remove_one_port(pmz_uart_reg, uap-port);
+
+   platform_set_drvdata(pdev, NULL);
+   uap-port.dev = NULL;
+
return 0;
 }
 
@@ -1918,8 +1928,13 @@ static void __exit exit_pmz(void)
 
for (i = 0; i  pmz_ports_count; i++) {
struct uart_pmac_port *uport = pmz_ports[i];
+#ifdef CONFIG_PPC_PMAC
if (uport-node != NULL)
pmz_dispose_port(uport);
+#else
+   if (uport-pdev != NULL)
+   pmz_dispose_port(uport);
+#endif
}
/* Unregister UART driver */
uart_unregister_driver(pmz_uart_reg);
@@ -1993,6 +2008,9 @@ static int __init pmz_console_setup(stru
 #ifdef CONFIG_PPC_PMAC
if (uap-node == NULL)
return -ENODEV;
+#else
+   if (uap-pdev == NULL)
+   return -ENODEV;
 #endif
port = uap-port;
 
Index: linux-git/drivers/tty/serial/pmac_zilog.h
===
--- linux-git.orig/drivers/tty/serial/pmac_zilog.h  2011-12-13 
00:18:02.0 +1100
+++ linux-git/drivers/tty/serial/pmac_zilog.h   2011-12-13 00:23:55.0 
+1100
@@ -1,18 +1,9 @@
 #ifndef __PMAC_ZILOG_H__
 #define __PMAC_ZILOG_H__
 
-#ifdef CONFIG_PPC_PMAC
-/* We cannot use dev_* because this can be called early, way before
- * we are matched with a device (when using it as a kernel console)
- */
 #define pmz_debug(fmt, arg...) pr_debug(ttyPZ%d:  fmt, uap-port.line, ## 
arg)
 #define pmz_error(fmt, arg...) pr_err(ttyPZ%d:  fmt, uap-port.line, ## arg)
 #define pmz_info(fmt, arg...)  pr_info(ttyPZ%d:  fmt, uap-port.line, ## arg)
-#else
-#define pmz_debug(fmt, arg...) dev_dbg(uap-node-dev, fmt, ## arg)
-#define pmz_error(fmt, arg...) dev_err(uap-node-dev, fmt, ## arg)
-#define pmz_info(fmt, arg...)  dev_info(uap-node-dev, fmt, ## arg)
-#endif
 
 /*
  * At most 2 ESCCs with 2 ports each
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v3 2/3] hvc_init(): Enforce one-time initialization.

2011-12-12 Thread Miche Baker-Harvey
So on a CONSOLE_PORT_ADD message, we would take the
(existing)ports_device::ports_lock, and for other control messages we
would justtake the (new) port::port_lock?  You are concerned that just
takingthe ports_lock for all control messages could be too
restrictive?  Iwouldn't have expected these messages to be frequent
occurrences, butI'll defer to your experience here.
The CONSOLE_CONSOLE_PORT message calls hvc_alloc, which also
needsserialization.  That's in another one of these three patches; are
youthinking we could leave that patch be, or that we would we use
theport_lock for CONSOLE_CONSOLE_PORT?  Using the port_lock
wouldprovide the HVC serialization for free but it would be cleaner
if weput HVC related synchronization in hvc_console.c.
On Thu, Dec 8, 2011 at 4:08 AM, Amit Shah amit.s...@redhat.com wrote:
 On (Tue) 06 Dec 2011 [09:05:38], Miche Baker-Harvey wrote:
 Amit,

 Ah, indeed.  I am not using MSI-X, so virtio_pci::vp_try_to_find_vqs()
 calls vp_request_intx() and sets up an interrupt callback.  From
 there, when an interrupt occurs, the stack looks something like this:

 virtio_pci::vp_interrupt()
   virtio_pci::vp_vring_interrupt()
     virtio_ring::vring_interrupt()
       vq-vq.callback()  -- in this case, that's 
 virtio_console::control_intr()
         workqueue::schedule_work()
           workqueue::queue_work()
             queue_work_on(get_cpu())  -- queues the work on the current CPU.

 I'm not doing anything to keep multiple control message from being
 sent concurrently to the guest, and we will take those interrupts on
 any CPU. I've confirmed that the two instances of
 handle_control_message() are occurring on different CPUs.

 So let's have a new helper, port_lock() that takes the port-specific
 spinlock.  There has to be a new helper, since the port lock should
 depend on the portdev lock being taken too.  For the port addition
 case, just the portdev lock should be taken.  For any other
 operations, the port lock should be taken.

 My assumption was that we would be able to serialise the work items,
 but that will be too restrictive.  Taking port locks sounds like a
 better idea.

 We'd definitely need the port lock in the control work handler.  We
 might need it in a few more places (like module removal), but we'll
 worry about that later.

 Does this sound fine?

                Amit
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v3 2/3] hvc_init(): Enforce one-time initialization.

2011-12-12 Thread Amit Shah
On (Mon) 12 Dec 2011 [11:11:55], Miche Baker-Harvey wrote:
 So on a CONSOLE_PORT_ADD message, we would take the
 (existing)ports_device::ports_lock, and for other control messages we
 would justtake the (new) port::port_lock?  You are concerned that just
 takingthe ports_lock for all control messages could be too
 restrictive?  Iwouldn't have expected these messages to be frequent
 occurrences, butI'll defer to your experience here.

No, I mean we'll have to take the new port_lock() everywhere we
currently take the port lock, plus in a few more places.  I only
suggest using port_lock() helper since we'll need a dependency on the
portdev lock as well.

 The CONSOLE_CONSOLE_PORT message calls hvc_alloc, which also
 needsserialization.  That's in another one of these three patches; are
 youthinking we could leave that patch be, or that we would we use
 theport_lock for CONSOLE_CONSOLE_PORT?  Using the port_lock
 wouldprovide the HVC serialization for free but it would be cleaner
 if weput HVC related synchronization in hvc_console.c.

Yes, definitely, since other users of hvc_console may get bitten in
similar ways.  However, I'm not too familiar with the hvc code, the
people at linux-ppc can be of help.

 On Thu, Dec 8, 2011 at 4:08 AM, Amit Shah amit.s...@redhat.com wrote:
  On (Tue) 06 Dec 2011 [09:05:38], Miche Baker-Harvey wrote:
  Amit,
 
  Ah, indeed.  I am not using MSI-X, so virtio_pci::vp_try_to_find_vqs()
  calls vp_request_intx() and sets up an interrupt callback.  From
  there, when an interrupt occurs, the stack looks something like this:
 
  virtio_pci::vp_interrupt()
    virtio_pci::vp_vring_interrupt()
      virtio_ring::vring_interrupt()
        vq-vq.callback()  -- in this case, that's 
  virtio_console::control_intr()
          workqueue::schedule_work()
            workqueue::queue_work()
              queue_work_on(get_cpu())  -- queues the work on the current 
  CPU.
 
  I'm not doing anything to keep multiple control message from being
  sent concurrently to the guest, and we will take those interrupts on
  any CPU. I've confirmed that the two instances of
  handle_control_message() are occurring on different CPUs.
 
  So let's have a new helper, port_lock() that takes the port-specific
  spinlock.  There has to be a new helper, since the port lock should
  depend on the portdev lock being taken too.  For the port addition
  case, just the portdev lock should be taken.  For any other
  operations, the port lock should be taken.
 
  My assumption was that we would be able to serialise the work items,
  but that will be too restrictive.  Taking port locks sounds like a
  better idea.
 
  We'd definitely need the port lock in the control work handler.  We
  might need it in a few more places (like module removal), but we'll
  worry about that later.
 
  Does this sound fine?
 
                 Amit

Amit
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 01/16 v3] pmac_zilog: fix unexpected irq

2011-12-12 Thread Benjamin Herrenschmidt
On Tue, 2011-12-13 at 00:34 +1100, Finn Thain wrote:
 On Mon, 12 Dec 2011, Benjamin Herrenschmidt wrote:
 
  Any chance you can test this patch ? I would not be surprised if it 
  broke m68k since I had to do some of the changes in there blind, so 
  let me know... with this, I can again suspend/resume properly on a Pismo 
  while using the internal modem among other things.
 
 The patch works on a PowerBook 520 given a few changes (below). This 
 PowerBook only has one serial port that I can test (the internal modem is 
 not supported on 68k Macs).

Interesting. The modem is a soft-modem geoport or a hw serial modem ?
In the later case it's probably just a matter of finding the right GPIO
bit in Apple ASIC to turn the power on :-)

  Can you test a machine with two ports? The 
 rest of my Mac hardware is in storage since I moved house last week.

I tried on 2 port powermacs, but I only have one adapter, so I've
basically been running with one serial port open and shooting irda frame
on the other (with nothing to check wether I got the frames on the other
hand), oh well ...

I'll apply your patch and commit via my tree.

Cheers,
Ben.

 Finn
 
 
 Index: linux-git/drivers/tty/serial/pmac_zilog.c
 ===
 --- linux-git.orig/drivers/tty/serial/pmac_zilog.c2011-12-13 
 00:18:02.0 +1100
 +++ linux-git/drivers/tty/serial/pmac_zilog.c 2011-12-13 00:23:55.0 
 +1100
 @@ -1705,8 +1705,8 @@ static int __init pmz_init_port(struct u
   struct resource *r_ports;
   int irq;
  
 - r_ports = platform_get_resource(uap-node, IORESOURCE_MEM, 0);
 - irq = platform_get_irq(uap-node, 0);
 + r_ports = platform_get_resource(uap-pdev, IORESOURCE_MEM, 0);
 + irq = platform_get_irq(uap-pdev, 0);
   if (!r_ports || !irq)
   return -ENODEV;
  
 @@ -1763,8 +1763,10 @@ static void pmz_dispose_port(struct uart
  
  static int __init pmz_attach(struct platform_device *pdev)
  {
 + struct uart_pmac_port *uap;
   int i;
  
 + /* Iterate the pmz_ports array to find a matching entry */
   for (i = 0; i  pmz_ports_count; i++)
   if (pmz_ports[i].pdev == pdev)
   break;
 @@ -1773,15 +1775,23 @@ static int __init pmz_attach(struct plat
  
   uap = pmz_ports[i];
   uap-port.dev = pdev-dev;
 - dev_set_drvdata(mdev-ofdev.dev, uap);
 + platform_set_drvdata(pdev, uap);
  
 - return uart_add_one_port(pmz_uart_reg,
 -  pmz_ports[i]-port);
 + return uart_add_one_port(pmz_uart_reg, uap-port);
  }
  
  static int __exit pmz_detach(struct platform_device *pdev)
  {
 + struct uart_pmac_port *uap = platform_get_drvdata(pdev);
 +
 + if (!uap)
 + return -ENODEV;
 +
   uart_remove_one_port(pmz_uart_reg, uap-port);
 +
 + platform_set_drvdata(pdev, NULL);
 + uap-port.dev = NULL;
 +
   return 0;
  }
  
 @@ -1918,8 +1928,13 @@ static void __exit exit_pmz(void)
  
   for (i = 0; i  pmz_ports_count; i++) {
   struct uart_pmac_port *uport = pmz_ports[i];
 +#ifdef CONFIG_PPC_PMAC
   if (uport-node != NULL)
   pmz_dispose_port(uport);
 +#else
 + if (uport-pdev != NULL)
 + pmz_dispose_port(uport);
 +#endif
   }
   /* Unregister UART driver */
   uart_unregister_driver(pmz_uart_reg);
 @@ -1993,6 +2008,9 @@ static int __init pmz_console_setup(stru
  #ifdef CONFIG_PPC_PMAC
   if (uap-node == NULL)
   return -ENODEV;
 +#else
 + if (uap-pdev == NULL)
 + return -ENODEV;
  #endif
   port = uap-port;
  
 Index: linux-git/drivers/tty/serial/pmac_zilog.h
 ===
 --- linux-git.orig/drivers/tty/serial/pmac_zilog.h2011-12-13 
 00:18:02.0 +1100
 +++ linux-git/drivers/tty/serial/pmac_zilog.h 2011-12-13 00:23:55.0 
 +1100
 @@ -1,18 +1,9 @@
  #ifndef __PMAC_ZILOG_H__
  #define __PMAC_ZILOG_H__
  
 -#ifdef CONFIG_PPC_PMAC
 -/* We cannot use dev_* because this can be called early, way before
 - * we are matched with a device (when using it as a kernel console)
 - */
  #define pmz_debug(fmt, arg...)   pr_debug(ttyPZ%d:  fmt, 
 uap-port.line, ## arg)
  #define pmz_error(fmt, arg...)   pr_err(ttyPZ%d:  fmt, uap-port.line, 
 ## arg)
  #define pmz_info(fmt, arg...)pr_info(ttyPZ%d:  fmt, 
 uap-port.line, ## arg)
 -#else
 -#define pmz_debug(fmt, arg...)   dev_dbg(uap-node-dev, fmt, ## arg)
 -#define pmz_error(fmt, arg...)   dev_err(uap-node-dev, fmt, ## arg)
 -#define pmz_info(fmt, arg...)dev_info(uap-node-dev, fmt, ## arg)
 -#endif
  
  /*
   * At most 2 ESCCs with 2 ports each


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Rework gpio phandle parsing

2011-12-12 Thread Grant Likely
I originally posted this as part of the DT clock bindings.  I'm reposting
now since I've fixed up some bugs and I'm planning to put them into
linux-next.

The DT clock binding patches will be posted separately.

Cheers,
g.


 arch/arm/boot/dts/testcases/tests-phandle.dtsi |   37 ++
 arch/arm/boot/dts/testcases/tests.dtsi |1 +
 arch/arm/boot/dts/versatile-pb.dts |2 +
 arch/microblaze/kernel/reset.c |   43 +---
 arch/powerpc/sysdev/qe_lib/gpio.c  |   42 ++--
 drivers/gpio/gpiolib.c |2 +-
 drivers/of/Kconfig |9 ++
 drivers/of/Makefile|1 +
 drivers/of/base.c  |  146 
 drivers/of/gpio.c  |   43 +++
 drivers/of/selftest.c  |  139 ++
 include/asm-generic/gpio.h |6 +-
 include/linux/of.h |   11 ++-
 include/linux/of_gpio.h|   10 +-
 14 files changed, 313 insertions(+), 179 deletions(-)

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 2/4] gpio/powerpc: Eliminate duplication of of_get_named_gpio_flags()

2011-12-12 Thread Grant Likely
A large chunk of qe_pin_request() is unnecessarily cut-and-paste
directly from of_get_named_gpio_flags().  This patch cuts out the
duplicate code and replaces it with a call to of_get_gpio().

v2: fixed compile error due to missing gpio_to_chip()

Signed-off-by: Grant Likely grant.lik...@secretlab.ca
Cc: Benjamin Herrenschmidt b...@kernel.crashing.org
Cc: Kumar Gala ga...@kernel.crashing.org
---
 arch/powerpc/sysdev/qe_lib/gpio.c |   42 +++-
 drivers/gpio/gpiolib.c|2 +-
 include/asm-generic/gpio.h|1 +
 3 files changed, 10 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/sysdev/qe_lib/gpio.c 
b/arch/powerpc/sysdev/qe_lib/gpio.c
index e23f23c..521e67a 100644
--- a/arch/powerpc/sysdev/qe_lib/gpio.c
+++ b/arch/powerpc/sysdev/qe_lib/gpio.c
@@ -139,14 +139,10 @@ struct qe_pin {
 struct qe_pin *qe_pin_request(struct device_node *np, int index)
 {
struct qe_pin *qe_pin;
-   struct device_node *gpio_np;
struct gpio_chip *gc;
struct of_mm_gpio_chip *mm_gc;
struct qe_gpio_chip *qe_gc;
int err;
-   int size;
-   const void *gpio_spec;
-   const u32 *gpio_cells;
unsigned long flags;
 
qe_pin = kzalloc(sizeof(*qe_pin), GFP_KERNEL);
@@ -155,45 +151,25 @@ struct qe_pin *qe_pin_request(struct device_node *np, int 
index)
return ERR_PTR(-ENOMEM);
}
 
-   err = of_parse_phandles_with_args(np, gpios, #gpio-cells, index,
- gpio_np, gpio_spec);
-   if (err) {
-   pr_debug(%s: can't parse gpios property\n, __func__);
+   err = of_get_gpio(np, index);
+   if (err  0)
+   goto err0;
+   gc = gpio_to_chip(err);
+   if (WARN_ON(!gc))
goto err0;
-   }
 
-   if (!of_device_is_compatible(gpio_np, fsl,mpc8323-qe-pario-bank)) {
+   if (!of_device_is_compatible(gc-of_node, fsl,mpc8323-qe-pario-bank)) 
{
pr_debug(%s: tried to get a non-qe pin\n, __func__);
err = -EINVAL;
-   goto err1;
-   }
-
-   gc = of_node_to_gpiochip(gpio_np);
-   if (!gc) {
-   pr_debug(%s: gpio controller %s isn't registered\n,
-np-full_name, gpio_np-full_name);
-   err = -ENODEV;
-   goto err1;
-   }
-
-   gpio_cells = of_get_property(gpio_np, #gpio-cells, size);
-   if (!gpio_cells || size != sizeof(*gpio_cells) ||
-   *gpio_cells != gc-of_gpio_n_cells) {
-   pr_debug(%s: wrong #gpio-cells for %s\n,
-np-full_name, gpio_np-full_name);
-   err = -EINVAL;
-   goto err1;
+   goto err0;
}
 
-   err = gc-of_xlate(gc, np, gpio_spec, NULL);
-   if (err  0)
-   goto err1;
-
mm_gc = to_of_mm_gpio_chip(gc);
qe_gc = to_qe_gpio_chip(mm_gc);
 
spin_lock_irqsave(qe_gc-lock, flags);
 
+   err -= gc-base;
if (test_and_set_bit(QE_PIN_REQUESTED, qe_gc-pin_flags[err]) == 0) {
qe_pin-controller = qe_gc;
qe_pin-num = err;
@@ -206,8 +182,6 @@ struct qe_pin *qe_pin_request(struct device_node *np, int 
index)
 
if (!err)
return qe_pin;
-err1:
-   of_node_put(gpio_np);
 err0:
kfree(qe_pin);
pr_debug(%s failed with status %d\n, __func__, err);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index a971e3d..dc315e9 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -114,7 +114,7 @@ static int gpio_ensure_requested(struct gpio_desc *desc, 
unsigned offset)
 }
 
 /* caller holds gpio_lock *OR* gpio is marked as requested */
-static inline struct gpio_chip *gpio_to_chip(unsigned gpio)
+struct gpio_chip *gpio_to_chip(unsigned gpio)
 {
return gpio_desc[gpio].chip;
 }
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 8c86210..6b10bdc 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -135,6 +135,7 @@ struct gpio_chip {
 
 extern const char *gpiochip_is_requested(struct gpio_chip *chip,
unsigned offset);
+extern struct gpio_chip *gpio_to_chip(unsigned gpio);
 extern int __must_check gpiochip_reserve(int start, int ngpio);
 
 /* add/remove chips */
-- 
1.7.5.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 3/4] of: create of_phandle_args to simplify return of phandle parsing data

2011-12-12 Thread Grant Likely
of_parse_phandle_with_args() needs to return quite a bit of data.  Rather
than making each datum a separate **out_ argument, this patch creates
struct of_phandle_args to contain all the returned data and reworks the
user of the function.  This patch also enables of_parse_phandle_with_args()
to return the device node pointer for the phandle node.

This patch also ends up being fairly major surgery to
of_parse_handle_with_args().  The existing structure didn't work well
when extending to use of_phandle_args, and I discovered bugs during testing.
I also took the opportunity to rename the function to be like the
existing of_parse_phandle().

v2: - moved declaration of of_phandle_args to fix compile on non-DT builds
- fixed incorrect index in example usage
- fixed incorrect return code handling for empty entries

Reviewed-by: Shawn Guo shawn@freescale.com
Signed-off-by: Grant Likely grant.lik...@secretlab.ca
---
 drivers/of/base.c  |  146 ++-
 drivers/of/gpio.c  |   43 ++---
 include/asm-generic/gpio.h |5 +-
 include/linux/of.h |   11 +++-
 include/linux/of_gpio.h|   10 ++-
 5 files changed, 112 insertions(+), 103 deletions(-)

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 9b6588e..c6db9ab 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -824,17 +824,19 @@ of_parse_phandle(struct device_node *np, const char 
*phandle_name, int index)
 EXPORT_SYMBOL(of_parse_phandle);
 
 /**
- * of_parse_phandles_with_args - Find a node pointed by phandle in a list
+ * of_parse_phandle_with_args() - Find a node pointed by phandle in a list
  * @np:pointer to a device tree node containing a list
  * @list_name: property name that contains a list
  * @cells_name:property name that specifies phandles' arguments count
  * @index: index of a phandle to parse out
- * @out_node:  optional pointer to device_node struct pointer (will be filled)
- * @out_args:  optional pointer to arguments pointer (will be filled)
+ * @out_args:  optional pointer to output arguments structure (will be filled)
  *
  * This function is useful to parse lists of phandles and their arguments.
- * Returns 0 on success and fills out_node and out_args, on error returns
- * appropriate errno value.
+ * Returns 0 on success and fills out_args, on error returns appropriate
+ * errno value.
+ *
+ * Caller is responsible to call of_node_put() on the returned out_args-node
+ * pointer.
  *
  * Example:
  *
@@ -851,94 +853,96 @@ EXPORT_SYMBOL(of_parse_phandle);
  * }
  *
  * To get a device_node of the `node2' node you may call this:
- * of_parse_phandles_with_args(node3, list, #list-cells, 2, node2, args);
+ * of_parse_phandle_with_args(node3, list, #list-cells, 1, args);
  */
-int of_parse_phandles_with_args(struct device_node *np, const char *list_name,
+int of_parse_phandle_with_args(struct device_node *np, const char *list_name,
const char *cells_name, int index,
-   struct device_node **out_node,
-   const void **out_args)
+   struct of_phandle_args *out_args)
 {
-   int ret = -EINVAL;
-   const __be32 *list;
-   const __be32 *list_end;
-   int size;
-   int cur_index = 0;
+   const __be32 *list, *list_end;
+   int size, cur_index = 0;
+   uint32_t count = 0;
struct device_node *node = NULL;
-   const void *args = NULL;
+   phandle phandle;
 
+   /* Retrieve the phandle list property */
list = of_get_property(np, list_name, size);
-   if (!list) {
-   ret = -ENOENT;
-   goto err0;
-   }
+   if (!list)
+   return -EINVAL;
list_end = list + size / sizeof(*list);
 
+   /* Loop over the phandles until all the requested entry is found */
while (list  list_end) {
-   const __be32 *cells;
-   phandle phandle;
+   count = 0;
 
+   /*
+* If phandle is 0, then it is an empty entry with no
+* arguments.  Skip forward to the next entry.
+*/
phandle = be32_to_cpup(list++);
-   args = list;
-
-   /* one cell hole in the list = ; */
-   if (!phandle)
-   goto next;
-
-   node = of_find_node_by_phandle(phandle);
-   if (!node) {
-   pr_debug(%s: could not find phandle\n,
-np-full_name);
-   goto err0;
-   }
+   if (phandle) {
+   /*
+* Find the provider node and parse the #*-cells
+* property to determine the argument length
+*/
+   node = of_find_node_by_phandle(phandle);
+   if (!node) {

[PATCH 1/4] gpio/microblaze: Eliminate duplication of of_get_named_gpio_flags()

2011-12-12 Thread Grant Likely
of_reset_gpio_handle() is largely a cut-and-paste copy of
of_get_named_gpio_flags(). There really isn't any reason for the
split, so this patch deletes the duplicate function

Signed-off-by: Grant Likely grant.lik...@secretlab.ca
Cc: Michal Simek mon...@monstr.eu
---
 arch/microblaze/kernel/reset.c |   43 +--
 1 files changed, 2 insertions(+), 41 deletions(-)

diff --git a/arch/microblaze/kernel/reset.c b/arch/microblaze/kernel/reset.c
index bd8ccab..88a0163 100644
--- a/arch/microblaze/kernel/reset.c
+++ b/arch/microblaze/kernel/reset.c
@@ -19,50 +19,11 @@
 static int handle; /* reset pin handle */
 static unsigned int reset_val;
 
-static int of_reset_gpio_handle(void)
-{
-   int ret; /* variable which stored handle reset gpio pin */
-   struct device_node *root; /* root node */
-   struct device_node *gpio; /* gpio node */
-   struct gpio_chip *gc;
-   u32 flags;
-   const void *gpio_spec;
-
-   /* find out root node */
-   root = of_find_node_by_path(/);
-
-   /* give me handle for gpio node to be possible allocate pin */
-   ret = of_parse_phandles_with_args(root, hard-reset-gpios,
-   #gpio-cells, 0, gpio, gpio_spec);
-   if (ret) {
-   pr_debug(%s: can't parse gpios property\n, __func__);
-   goto err0;
-   }
-
-   gc = of_node_to_gpiochip(gpio);
-   if (!gc) {
-   pr_debug(%s: gpio controller %s isn't registered\n,
-root-full_name, gpio-full_name);
-   ret = -ENODEV;
-   goto err1;
-   }
-
-   ret = gc-of_xlate(gc, root, gpio_spec, flags);
-   if (ret  0)
-   goto err1;
-
-   ret += gc-base;
-err1:
-   of_node_put(gpio);
-err0:
-   pr_debug(%s exited with status %d\n, __func__, ret);
-   return ret;
-}
-
 void of_platform_reset_gpio_probe(void)
 {
int ret;
-   handle = of_reset_gpio_handle();
+   handle = of_get_named_gpio(of_find_node_by_path(/),
+  hard-reset-gpios, 0);
 
if (!gpio_is_valid(handle)) {
printk(KERN_INFO Skipping unavailable RESET gpio %d (%s)\n,
-- 
1.7.5.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/2] mtd/nand : set Nand flash page address to FBAR and FPAR correctly

2011-12-12 Thread Artem Bityutskiy
On Fri, 2011-12-09 at 17:42 +0800, shuo@freescale.com wrote:
 From: Liu Shuo b35...@freescale.com
 
 If we use the Nand flash chip whose number of pages in a block is greater
 than 64(for large page), we must treat the low bit of FBAR as being the
 high bit of the page address due to the limitation of FCM, it simply uses
 the low 6-bits (for large page) of the combined block/page address as the
 FPAR component, rather than considering the actual block size.

Pushed this one to l2-mtd-2.6.git, thanks!

Artem.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip

2011-12-12 Thread Artem Bityutskiy
On Tue, 2011-12-06 at 18:09 -0600, Scott Wood wrote:
 On 12/03/2011 10:31 PM, shuo@freescale.com wrote:
  From: Liu Shuo shuo@freescale.com
  
  Freescale FCM controller has a 2K size limitation of buffer RAM. In order
  to support the Nand flash chip whose page size is larger than 2K bytes,
  we read/write 2k data repeatedly by issuing FIR_OP_RB/FIR_OP_WB and save
  them to a large buffer.
  
  Signed-off-by: Liu Shuo shuo@freescale.com
  ---
  v3:
  -remove page_size of struct fsl_elbc_mtd.
  -do a oob write by NAND_CMD_RNDIN. 
  
   drivers/mtd/nand/fsl_elbc_nand.c |  243 
  ++
   1 files changed, 218 insertions(+), 25 deletions(-)
 
 What is the plan for bad block marker migration?

Why it should be migrated? I thought that you support 2KiB pages, and
this adds 4 and 8 KiB pages support, which you never supported before.
What is the migration you guys are talking about?

Artem.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip

2011-12-12 Thread Scott Wood
On 12/12/2011 03:09 PM, Artem Bityutskiy wrote:
 On Tue, 2011-12-06 at 18:09 -0600, Scott Wood wrote:
 On 12/03/2011 10:31 PM, shuo@freescale.com wrote:
 From: Liu Shuo shuo@freescale.com

 Freescale FCM controller has a 2K size limitation of buffer RAM. In order
 to support the Nand flash chip whose page size is larger than 2K bytes,
 we read/write 2k data repeatedly by issuing FIR_OP_RB/FIR_OP_WB and save
 them to a large buffer.

 Signed-off-by: Liu Shuo shuo@freescale.com
 ---
 v3:
 -remove page_size of struct fsl_elbc_mtd.
 -do a oob write by NAND_CMD_RNDIN. 

  drivers/mtd/nand/fsl_elbc_nand.c |  243 
 ++
  1 files changed, 218 insertions(+), 25 deletions(-)

 What is the plan for bad block marker migration?
 
 Why it should be migrated? I thought that you support 2KiB pages, and
 this adds 4 and 8 KiB pages support, which you never supported before.
 What is the migration you guys are talking about?

NAND chips come from the factory with bad blocks marked at a certain
offset into each page.  This offset is normally in the OOB area, but
since we change the layout from 4k data, 128 byte oob to 2k data, 64
byte oob, 2k data, 64 byte oob the marker is no longer in the oob.  On
first use we need to migrate the markers so that they are still in the oob.

-Scott

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip

2011-12-12 Thread Artem Bityutskiy
On Mon, 2011-12-12 at 15:15 -0600, Scott Wood wrote:
 NAND chips come from the factory with bad blocks marked at a certain
 offset into each page.  This offset is normally in the OOB area, but
 since we change the layout from 4k data, 128 byte oob to 2k data, 64
 byte oob, 2k data, 64 byte oob the marker is no longer in the oob.  On
 first use we need to migrate the markers so that they are still in the oob.

Ah, I see, thanks. Are you planning to implement in-kernel migration or
use a user-space tool?

Artem.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip

2011-12-12 Thread Scott Wood
On 12/12/2011 03:19 PM, Artem Bityutskiy wrote:
 On Mon, 2011-12-12 at 15:15 -0600, Scott Wood wrote:
 NAND chips come from the factory with bad blocks marked at a certain
 offset into each page.  This offset is normally in the OOB area, but
 since we change the layout from 4k data, 128 byte oob to 2k data, 64
 byte oob, 2k data, 64 byte oob the marker is no longer in the oob.  On
 first use we need to migrate the markers so that they are still in the oob.
 
 Ah, I see, thanks. Are you planning to implement in-kernel migration or
 use a user-space tool?

That's the kind of answer I was hoping to get from Shuo. :-)

Most likely is a firmware-based tool, but I'd like there to be some way
for the tool to mark that this has happened, so that the Linux driver
can refuse to do non-raw accesses to a chip that isn't marked as having
been migrated (or at least yell loudly in the log).

Speaking of raw accesses, these are currently broken in the eLBC
driver... we need some way for the generic layer to tell us what kind of
access it is before the transaction starts, not once it wants to read
out the buffer (unless we add more hacks to delay the start of a read
transaction until first buffer access...).  We'd be better off with a
high-level read page/write page function that does the whole thing
(not just buffer access, but command issuance as well).

-Scott

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v3] ipc: provide generic compat versions of IPC syscalls

2011-12-12 Thread Chris Metcalf
When using the compat APIs, architectures will generally want to
be able to make direct syscalls to msgsnd(), shmctl(), etc., and
in the kernel we would want them to be handled directly by
compat_sys_xxx() functions, as is true for other compat syscalls.

However, for historical reasons, several of the existing compat IPC
syscalls do not do this.  semctl() expects a pointer to the fourth
argument, instead of the fourth argument itself.  msgsnd(), msgrcv()
and shmat() expect arguments in different order.

This change adds an ARCH_WANT_OLD_COMPAT_IPC config option that can be
set to preserve this behavior for ports that use it (x86, sparc, powerpc,
s390, and mips).  No actual semantics are changed for those architectures,
and there is only a minimal amount of code refactoring in ipc/compat.c.

Newer architectures like tile (and perhaps future architectures such
as arm64 and unicore64) should not select this option, and thus can
avoid having any IPC-specific code at all in their architecture-specific
compat layer.  In the same vein, if this option is not selected, IPC_64
mode is assumed, since that's what the asm-generic headers expect.

The workaround code in tile for msgsnd() and msgrcv() is removed
with this change; it also fixes the bug that shmat() and semctl() were
not being properly handled.

Signed-off-by: Chris Metcalf cmetc...@tilera.com
---
 arch/Kconfig   |3 ++
 arch/mips/Kconfig  |1 +
 arch/powerpc/Kconfig   |1 +
 arch/s390/Kconfig  |1 +
 arch/sparc/Kconfig |1 +
 arch/tile/include/asm/compat.h |   11 --
 arch/tile/kernel/compat.c  |   43 
 arch/x86/Kconfig   |1 +
 include/linux/compat.h |   12 ++-
 ipc/compat.c   |   70 ---
 10 files changed, 83 insertions(+), 61 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 4b0669c..dfb1e07 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -181,4 +181,7 @@ config HAVE_RCU_TABLE_FREE
 config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
 
+config ARCH_WANT_OLD_COMPAT_IPC
+   bool
+
 source kernel/gcov/Kconfig
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d46f1da..ad2af82 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2420,6 +2420,7 @@ config MIPS32_COMPAT
 config COMPAT
bool
depends on MIPS32_COMPAT
+   select ARCH_WANT_OLD_COMPAT_IPC
default y
 
 config SYSVIPC_COMPAT
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 951e18f..e2be710 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -146,6 +146,7 @@ config COMPAT
bool
default y if PPC64
select COMPAT_BINFMT_ELF
+   select ARCH_WANT_OLD_COMPAT_IPC
 
 config SYSVIPC_COMPAT
bool
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 373679b..2fc3bca 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -221,6 +221,7 @@ config COMPAT
prompt Kernel support for 31 bit emulation
depends on 64BIT
select COMPAT_BINFMT_ELF
+   select ARCH_WANT_OLD_COMPAT_IPC
help
  Select this option if you want to enable your system kernel to
  handle system-calls from ELF binaries for 31 bit ESA.  This option
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index f92602e..846cb5c 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -577,6 +577,7 @@ config COMPAT
depends on SPARC64
default y
select COMPAT_BINFMT_ELF
+   select ARCH_WANT_OLD_COMPAT_IPC
 
 config SYSVIPC_COMPAT
bool
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index bf95f55..4b4b289 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -242,17 +242,6 @@ long compat_sys_fallocate(int fd, int mode,
 long compat_sys_sched_rr_get_interval(compat_pid_t pid,
  struct compat_timespec __user *interval);
 
-/* Versions of compat functions that differ from generic Linux. */
-struct compat_msgbuf;
-long tile_compat_sys_msgsnd(int msqid,
-   struct compat_msgbuf __user *msgp,
-   size_t msgsz, int msgflg);
-long tile_compat_sys_msgrcv(int msqid,
-   struct compat_msgbuf __user *msgp,
-   size_t msgsz, long msgtyp, int msgflg);
-long tile_compat_sys_ptrace(compat_long_t request, compat_long_t pid,
-   compat_long_t addr, compat_long_t data);
-
 /* Tilera Linux syscalls that don't have compat versions. */
 #define compat_sys_flush_cache sys_flush_cache
 
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index bf5e9d7..d67459b 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c
@@ -16,7 +16,6 @@
 #define __SYSCALL_COMPAT
 
 #include linux/compat.h
-#include linux/msg.h
 #include linux/syscalls.h
 #include linux/kdev_t.h
 

[PATCH v3 10/14] KVM: PPC: Maintain a doubly-linked list of guest HPTEs for each gfn

2011-12-12 Thread Paul Mackerras
This expands the reverse mapping array to contain two links for each
HPTE which are used to link together HPTEs that correspond to the
same guest logical page.  Each circular list of HPTEs is pointed to
by the rmap array entry for the guest logical page, pointed to by
the relevant memslot.  Links are 32-bit HPT entry indexes rather than
full 64-bit pointers, to save space.  We use 3 of the remaining 32
bits in the rmap array entries as a lock bit, a referenced bit and
a present bit (the present bit is needed since HPTE index 0 is valid).
The bit lock for the rmap chain nests inside the HPTE lock bit.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   18 ++
 arch/powerpc/include/asm/kvm_host.h  |   17 ++-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  |   84 +-
 3 files changed, 117 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 18b590d..9508c03 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -113,6 +113,11 @@ static inline unsigned long hpte_page_size(unsigned long 
h, unsigned long l)
return 0;   /* error */
 }
 
+static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
+{
+   return ((ptel  HPTE_R_RPN)  ~(psize - 1))  PAGE_SHIFT;
+}
+
 static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long 
io_type)
 {
unsigned int wimg = ptel  HPTE_R_WIMG;
@@ -139,6 +144,19 @@ static inline unsigned long hpte_cache_bits(unsigned long 
pte_val)
 #endif
 }
 
+static inline void lock_rmap(unsigned long *rmap)
+{
+   do {
+   while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap))
+   cpu_relax();
+   } while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap));
+}
+
+static inline void unlock_rmap(unsigned long *rmap)
+{
+   __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmap);
+}
+
 static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
   unsigned long pagesize)
 {
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 243bc80..97cb2d7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -170,12 +170,27 @@ struct kvmppc_rma_info {
 /*
  * The reverse mapping array has one entry for each HPTE,
  * which stores the guest's view of the second word of the HPTE
- * (including the guest physical address of the mapping).
+ * (including the guest physical address of the mapping),
+ * plus forward and backward pointers in a doubly-linked ring
+ * of HPTEs that map the same host page.  The pointers in this
+ * ring are 32-bit HPTE indexes, to save space.
  */
 struct revmap_entry {
unsigned long guest_rpte;
+   unsigned int forw, back;
 };
 
+/*
+ * We use the top bit of each memslot-rmap entry as a lock bit,
+ * and bit 32 as a present flag.  The bottom 32 bits are the
+ * index in the guest HPT of a HPTE that points to the page.
+ */
+#define KVMPPC_RMAP_LOCK_BIT   63
+#define KVMPPC_RMAP_REF_BIT33
+#define KVMPPC_RMAP_REFERENCED (1ul  KVMPPC_RMAP_REF_BIT)
+#define KVMPPC_RMAP_PRESENT0x1ul
+#define KVMPPC_RMAP_INDEX  0xul
+
 /* Low-order bits in kvm-arch.slot_phys[][] */
 #define KVMPPC_PAGE_ORDER_MASK 0x1f
 #define KVMPPC_PAGE_NO_CACHE   HPTE_R_I/* 0x20 */
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c 
b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 3f5b016..5b31caa 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -54,6 +54,70 @@ static void *real_vmalloc_addr(void *x)
return __va(addr);
 }
 
+/*
+ * Add this HPTE into the chain for the real page.
+ * Must be called with the chain locked; it unlocks the chain.
+ */
+static void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
+unsigned long *rmap, long pte_index, int realmode)
+{
+   struct revmap_entry *head, *tail;
+   unsigned long i;
+
+   if (*rmap  KVMPPC_RMAP_PRESENT) {
+   i = *rmap  KVMPPC_RMAP_INDEX;
+   head = kvm-arch.revmap[i];
+   if (realmode)
+   head = real_vmalloc_addr(head);
+   tail = kvm-arch.revmap[head-back];
+   if (realmode)
+   tail = real_vmalloc_addr(tail);
+   rev-forw = i;
+   rev-back = head-back;
+   tail-forw = pte_index;
+   head-back = pte_index;
+   } else {
+   rev-forw = rev-back = pte_index;
+   i = pte_index;
+   }
+   smp_wmb();
+   *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
+}
+
+/* Remove this HPTE from the chain for a real page */
+static void remove_revmap_chain(struct kvm *kvm, long pte_index,
+ 

[PATCH v3 02/14] KVM: PPC: Move kvm_vcpu_ioctl_[gs]et_one_reg down to platform-specific code

2011-12-12 Thread Paul Mackerras
This moves the get/set_one_reg implementation down from powerpc.c into
booke.c, book3s_pr.c and book3s_hv.c.  This avoids #ifdefs in C code,
but more importantly, it fixes a bug on Book3s HV where we were
accessing beyond the end of the kvm_vcpu struct (via the to_book3s()
macro) and corrupting memory, causing random crashes and file corruption.

On Book3s HV we only accept setting the HIOR to zero, since the guest
runs in supervisor mode and its vectors are never offset from zero.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_ppc.h |3 ++
 arch/powerpc/kvm/book3s_hv.c   |   33 ++
 arch/powerpc/kvm/book3s_pr.c   |   33 ++
 arch/powerpc/kvm/booke.c   |   10 +
 arch/powerpc/kvm/powerpc.c |   39 
 5 files changed, 79 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 5192c2e..fc2d696 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -176,6 +176,9 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs);
 void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
+
 void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b1e3b9c..da7db14 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -392,6 +392,39 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
 }
 
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+   int r = -EINVAL;
+
+   switch (reg-id) {
+   case KVM_ONE_REG_PPC_HIOR:
+   reg-u.reg64 = 0;
+   r = 0;
+   break;
+   default:
+   break;
+   }
+
+   return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+   int r = -EINVAL;
+
+   switch (reg-id) {
+   case KVM_ONE_REG_PPC_HIOR:
+   /* Only allow this to be set to zero */
+   if (reg-u.reg64 == 0)
+   r = 0;
+   break;
+   default:
+   break;
+   }
+
+   return r;
+}
+
 int kvmppc_core_check_processor_compat(void)
 {
if (cpu_has_feature(CPU_FTR_HVMODE))
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index ae6a034..ddd92a5 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -863,6 +863,39 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
 }
 
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+   int r = -EINVAL;
+
+   switch (reg-id) {
+   case KVM_ONE_REG_PPC_HIOR:
+   reg-u.reg64 = to_book3s(vcpu)-hior;
+   r = 0;
+   break;
+   default:
+   break;
+   }
+
+   return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+   int r = -EINVAL;
+
+   switch (reg-id) {
+   case KVM_ONE_REG_PPC_HIOR:
+   to_book3s(vcpu)-hior = reg-u.reg64;
+   to_book3s(vcpu)-hior_explicit = true;
+   r = 0;
+   break;
+   default:
+   break;
+   }
+
+   return r;
+}
+
 int kvmppc_core_check_processor_compat(void)
 {
return 0;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 9e41f45..ee9e1ee 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -887,6 +887,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return kvmppc_core_set_sregs(vcpu, sregs);
 }
 
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+   return -EINVAL;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+   return -EINVAL;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
return -ENOTSUPP;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index b939b8a..69367ac 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -624,45 +624,6 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return r;
 }
 
-static int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
- struct kvm_one_reg *reg)
-{
-   int r = -EINVAL;
-
-   switch (reg-id) {
-#ifdef CONFIG_PPC_BOOK3S
-   case KVM_ONE_REG_PPC_HIOR:
-   reg-u.reg64 = to_book3s(vcpu)-hior;
-   r = 0;
- 

[PATCH v3 08/14] KVM: PPC: Allow use of small pages to back Book3S HV guests

2011-12-12 Thread Paul Mackerras
This relaxes the requirement that the guest memory be provided as
16MB huge pages, allowing it to be provided as normal memory, i.e.
in pages of PAGE_SIZE bytes (4k or 64k).  To allow this, we index
the kvm-arch.slot_phys[] arrays with a small page index, even if
huge pages are being used, and use the low-order 5 bits of each
entry to store the order of the enclosing page with respect to
normal pages, i.e. log_2(enclosing_page_size / PAGE_SIZE).

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   10 +++
 arch/powerpc/include/asm/kvm_host.h  |3 +-
 arch/powerpc/include/asm/kvm_ppc.h   |2 +-
 arch/powerpc/include/asm/reg.h   |1 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c  |  122 --
 arch/powerpc/kvm/book3s_hv.c |   57 --
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  |6 +-
 7 files changed, 132 insertions(+), 69 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 7e6f2ed..10920f7 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -113,4 +113,14 @@ static inline unsigned long hpte_page_size(unsigned long 
h, unsigned long l)
return 0;   /* error */
 }
 
+static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
+  unsigned long pagesize)
+{
+   unsigned long mask = (pagesize  PAGE_SHIFT) - 1;
+
+   if (pagesize = PAGE_SIZE)
+   return 1;
+   return !(memslot-base_gfn  mask)  !(memslot-npages  mask);
+}
+
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index beb22ba..9252d5e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -177,14 +177,13 @@ struct revmap_entry {
 };
 
 /* Low-order bits in kvm-arch.slot_phys[][] */
+#define KVMPPC_PAGE_ORDER_MASK 0x1f
 #define KVMPPC_GOT_PAGE0x80
 
 struct kvm_arch {
 #ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long hpt_virt;
struct revmap_entry *revmap;
-   unsigned long ram_psize;
-   unsigned long ram_porder;
unsigned int lpid;
unsigned int host_lpid;
unsigned long host_lpcr;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 111e1b4..a61b5b5 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -122,7 +122,7 @@ extern void kvmppc_free_hpt(struct kvm *kvm);
 extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
 extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
-   struct kvm_memory_slot *memslot);
+   struct kvm_memory_slot *memslot, unsigned long porder);
 extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 559da19..4599d12 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -237,6 +237,7 @@
 #define   LPCR_ISL (1ul  (63-2))
 #define   LPCR_VC_SH   (63-2)
 #define   LPCR_DPFD_SH (63-11)
+#define   LPCR_VRMASD  (0x1ful  (63-16))
 #define   LPCR_VRMA_L  (1ul  (63-12))
 #define   LPCR_VRMA_LP0(1ul  (63-15))
 #define   LPCR_VRMA_LP1(1ul  (63-16))
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 87016cc..cc18f3d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -34,8 +34,6 @@
 #include asm/ppc-opcode.h
 #include asm/cputable.h
 
-/* Pages in the VRMA are 16MB pages */
-#define VRMA_PAGE_ORDER24
 #define VRMA_VSID  0x1ffUL /* 1TB VSID reserved for VRMA */
 
 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
@@ -95,17 +93,31 @@ void kvmppc_free_hpt(struct kvm *kvm)
free_pages(kvm-arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
 }
 
-void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
+/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
+{
+   return (pgsize  0x1000) ? HPTE_V_LARGE : 0;
+}
+
+/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
+{
+   return (pgsize == 0x1) ? 0x1000 : 0;
+}
+
+void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
+unsigned long porder)
 {
-   struct kvm *kvm = vcpu-kvm;
unsigned long i;
unsigned long npages;
unsigned long hp_v, hp_r;
unsigned long addr, hash;
-   unsigned long porder = 

[PATCH v3 01/14] KVM: PPC: Make wakeups work again for Book3S HV guests

2011-12-12 Thread Paul Mackerras
When commit f43fdc15fa (KVM: PPC: booke: Improve timer register
emulation) factored out some code in arch/powerpc/kvm/powerpc.c
into a new helper function, kvm_vcpu_kick(), an error crept in
which causes Book3s HV guest vcpus to stall.  This fixes it.
On POWER7 machines, guest vcpus are grouped together into virtual
CPU cores that share a single waitqueue, so it's important to use
vcpu-arch.wqp rather than vcpu-wq.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/kvm/powerpc.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index ef8c990..b939b8a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -561,7 +561,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 int cpu = vcpu-cpu;
 
 me = get_cpu();
-   if (waitqueue_active(vcpu-wq)) {
+   if (waitqueue_active(vcpu-arch.wqp)) {
wake_up_interruptible(vcpu-arch.wqp);
vcpu-stat.halt_wakeup++;
} else if (cpu != me  cpu != -1) {
-- 
1.7.7.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v3 13/14] KVM: PPC: Implement MMU notifiers for Book3S HV guests

2011-12-12 Thread Paul Mackerras
This adds the infrastructure to enable us to page out pages underneath
a Book3S HV guest, on processors that support virtualized partition
memory, that is, POWER7.  Instead of pinning all the guest's pages,
we now look in the host userspace Linux page tables to find the
mapping for a given guest page.  Then, if the userspace Linux PTE
gets invalidated, kvm_unmap_hva() gets called for that address, and
we replace all the guest HPTEs that refer to that page with absent
HPTEs, i.e. ones with the valid bit clear and the HPTE_V_ABSENT bit
set, which will cause an HDSI when the guest tries to access them.
Finally, the page fault handler is extended to reinstantiate the
guest HPTE when the guest tries to access a page which has been paged
out.

Since we can't intercept the guest DSI and ISI interrupts on PPC970,
we still have to pin all the guest pages on PPC970.  We have a new flag,
kvm-arch.using_mmu_notifiers, that indicates whether we can page
guest pages out.  If it is not set, the MMU notifier callbacks do
nothing and everything operates as before.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s.h|4 +
 arch/powerpc/include/asm/kvm_book3s_64.h |   31 
 arch/powerpc/include/asm/kvm_host.h  |   16 ++
 arch/powerpc/include/asm/reg.h   |3 +
 arch/powerpc/kvm/Kconfig |1 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c  |  290 +++---
 arch/powerpc/kvm/book3s_hv.c |   25 ++--
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  |  140 +++---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  |   49 +
 arch/powerpc/kvm/powerpc.c   |3 +
 arch/powerpc/mm/hugetlbpage.c|2 +
 11 files changed, 499 insertions(+), 65 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index f6329bb..ea9539c 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -145,6 +145,10 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct 
kvmppc_bat *bat,
 extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu 
*vcpu);
 extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
+extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
+   unsigned long *rmap, long pte_index, int realmode);
+extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
+   unsigned long pte_index);
 extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
unsigned long *nb_ret);
 extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 79dc37f..c21e46d 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -136,6 +136,37 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, 
unsigned long io_type)
return (wimg  (HPTE_R_W | HPTE_R_I)) == io_type;
 }
 
+/*
+ * Lock and read a linux PTE.  If it's present and writable, atomically
+ * set dirty and referenced bits and return the PTE, otherwise return 0.
+ */
+static inline pte_t kvmppc_read_update_linux_pte(pte_t *p)
+{
+   pte_t pte, tmp;
+
+   /* wait until _PAGE_BUSY is clear then set it atomically */
+   __asm__ __volatile__ (
+   1: ldarx   %0,0,%3\n
+  andi.   %1,%0,%4\n
+  bne-1b\n
+  ori %1,%0,%4\n
+  stdcx.  %1,0,%3\n
+  bne-1b
+   : =r (pte), =r (tmp), =m (*p)
+   : r (p), i (_PAGE_BUSY)
+   : cc);
+
+   if (pte_present(pte)) {
+   pte = pte_mkyoung(pte);
+   if (pte_write(pte))
+   pte = pte_mkdirty(pte);
+   }
+
+   *p = pte;   /* clears _PAGE_BUSY */
+
+   return pte;
+}
+
 /* Return HPTE cache control bits corresponding to Linux pte bits */
 static inline unsigned long hpte_cache_bits(unsigned long pte_val)
 {
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 937caca..968f3aa 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -32,6 +32,7 @@
 #include linux/atomic.h
 #include asm/kvm_asm.h
 #include asm/processor.h
+#include asm/page.h
 
 #define KVM_MAX_VCPUS  NR_CPUS
 #define KVM_MAX_VCORES NR_CPUS
@@ -44,6 +45,19 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #endif
 
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+#include linux/mmu_notifier.h
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+
+struct kvm;
+extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_test_age_hva(struct kvm *kvm, 

[PATCH v3 03/14] KVM: PPC: Keep a record of HV guest view of hashed page table entries

2011-12-12 Thread Paul Mackerras
This adds an array that parallels the guest hashed page table (HPT),
that is, it has one entry per HPTE, used to store the guest's view
of the second doubleword of the corresponding HPTE.  The first
doubleword in the HPTE is the same as the guest's idea of it, so we
don't need to store a copy, but the second doubleword in the HPTE has
the real page number rather than the guest's logical page number.
This allows us to remove the back_translate() and reverse_xlate()
functions.

This reverse mapping array is vmalloc'd, meaning that to access it
in real mode we have to walk the kernel's page tables explicitly.
That is done by the new real_vmalloc_addr() function.  (In fact this
returns an address in the linear mapping, so the result is usable
both in real mode and in virtual mode.)

There are also some minor cleanups here: moving the definitions of
HPT_ORDER etc. to a header file and defining HPT_NPTE for HPT_NPTEG  3.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s_64.h |8 +++
 arch/powerpc/include/asm/kvm_host.h  |   10 
 arch/powerpc/kvm/book3s_64_mmu_hv.c  |   44 +++
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  |   87 ++
 4 files changed, 103 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 2054e47..fa3dc79 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -35,6 +35,14 @@ static inline void svcpu_put(struct 
kvmppc_book3s_shadow_vcpu *svcpu)
 
 #define SPAPR_TCE_SHIFT12
 
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+/* For now use fixed-size 16MB page table */
+#define HPT_ORDER  24
+#define HPT_NPTEG  (1ul  (HPT_ORDER - 7))/* 128B per pteg */
+#define HPT_NPTE   (HPT_NPTEG  3)/* 8 PTEs per PTEG */
+#define HPT_HASH_MASK  (HPT_NPTEG - 1)
+#endif
+
 static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 unsigned long pte_index)
 {
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 66c75cd..629df2e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -166,9 +166,19 @@ struct kvmppc_rma_info {
atomic_t use_count;
 };
 
+/*
+ * The reverse mapping array has one entry for each HPTE,
+ * which stores the guest's view of the second word of the HPTE
+ * (including the guest physical address of the mapping).
+ */
+struct revmap_entry {
+   unsigned long guest_rpte;
+};
+
 struct kvm_arch {
 #ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long hpt_virt;
+   struct revmap_entry *revmap;
unsigned long ram_npages;
unsigned long ram_psize;
unsigned long ram_porder;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index bc3a2ea..80ece8d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -23,6 +23,7 @@
 #include linux/gfp.h
 #include linux/slab.h
 #include linux/hugetlb.h
+#include linux/vmalloc.h
 
 #include asm/tlbflush.h
 #include asm/kvm_ppc.h
@@ -33,11 +34,6 @@
 #include asm/ppc-opcode.h
 #include asm/cputable.h
 
-/* For now use fixed-size 16MB page table */
-#define HPT_ORDER  24
-#define HPT_NPTEG  (1ul  (HPT_ORDER - 7))/* 128B per pteg */
-#define HPT_HASH_MASK  (HPT_NPTEG - 1)
-
 /* Pages in the VRMA are 16MB pages */
 #define VRMA_PAGE_ORDER24
 #define VRMA_VSID  0x1ffUL /* 1TB VSID reserved for VRMA */
@@ -51,7 +47,9 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
 {
unsigned long hpt;
unsigned long lpid;
+   struct revmap_entry *rev;
 
+   /* Allocate guest's hashed page table */
hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN,
   HPT_ORDER - PAGE_SHIFT);
if (!hpt) {
@@ -60,12 +58,20 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
}
kvm-arch.hpt_virt = hpt;
 
+   /* Allocate reverse map array */
+   rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE);
+   if (!rev) {
+   pr_err(kvmppc_alloc_hpt: Couldn't alloc reverse map array\n);
+   goto out_freehpt;
+   }
+   kvm-arch.revmap = rev;
+
+   /* Allocate the guest's logical partition ID */
do {
lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
if (lpid = NR_LPIDS) {
pr_err(kvm_alloc_hpt: No LPIDs free\n);
-   free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
-   return -ENOMEM;
+   goto out_freeboth;
}
} while (test_and_set_bit(lpid, lpid_inuse));
 
@@ -74,11 +80,18 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
 
pr_info(KVM guest htab at %lx, LPID %lx\n, hpt, lpid);
return 0;
+
+ 

[PATCH v3 00/14] KVM: PPC: Update Book3S HV memory handling

2011-12-12 Thread Paul Mackerras
This series of patches updates the Book3S-HV KVM code that manages the
guest hashed page table (HPT) to enable several things:

* MMIO emulation and MMIO pass-through

* Use of small pages (4kB or 64kB, depending on config) to back the
  guest memory

* Pageable guest memory - i.e. backing pages can be removed from the
  guest and reinstated on demand, using the MMU notifier mechanism

* Guests can be given read-only access to pages even though they think
  they have mapped them read/write.  When they try to write to them
  their access is upgraded to read/write.  This allows KSM to share
  pages between guests.

On PPC970 we have no way to get DSIs and ISIs to come to the
hypervisor, so we can't do MMIO emulation or pageable guest memory.
On POWER7 we set the VPM1 bit in the LPCR to make all DSIs and ISIs
come to the hypervisor (host) as HDSIs or HISIs.

This code is working well in my tests.  The sporadic crashes that I
was seeing earlier are fixed by the second patch in the series.
Somewhat to my surprise, when I implemented the last patch in the
series I started to see KSM coalescing pages without any further
effort on my part -- my tests were on a machine with Fedora 16
installed, and it has ksmtuned running by default.

This series is on top of Alex Graf's kvm-ppc-next branch.  The first
patch in my series fixes a bug in one of the patches in that branch
(KVM: PPC: booke: Improve timer register emulation).

These patches only touch arch/powerpc except for patch 12, which adds
a couple of barriers to allow mmu_notifier_retry() to be used outside
of the kvm-mmu_lock.

Paul.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v3 09/14] KVM: PPC: Allow I/O mappings in memory slots

2011-12-12 Thread Paul Mackerras
This provides for the case where userspace maps an I/O device into the
address range of a memory slot using a VM_PFNMAP mapping.  In that
case, we work out the pfn from vma-vm_pgoff, and record the cache
enable bits from vma-vm_page_prot in two low-order bits in the
slot_phys array entries.  Then, in kvmppc_h_enter() we check that the
cache bits in the HPTE that the guest wants to insert match the cache
bits in the slot_phys array entry.  However, we do allow the guest to
create what it thinks is a non-cacheable or write-through mapping to
memory that is actually cacheable, so that we can use normal system
memory as part of an emulated device later on.  In that case the actual
HPTE we insert is a cacheable HPTE.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   26 
 arch/powerpc/include/asm/kvm_host.h  |2 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c  |   65 --
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  |   15 +-
 4 files changed, 84 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 10920f7..18b590d 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -113,6 +113,32 @@ static inline unsigned long hpte_page_size(unsigned long 
h, unsigned long l)
return 0;   /* error */
 }
 
+static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long 
io_type)
+{
+   unsigned int wimg = ptel  HPTE_R_WIMG;
+
+   /* Handle SAO */
+   if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) 
+   cpu_has_feature(CPU_FTR_ARCH_206))
+   wimg = HPTE_R_M;
+
+   if (!io_type)
+   return wimg == HPTE_R_M;
+
+   return (wimg  (HPTE_R_W | HPTE_R_I)) == io_type;
+}
+
+/* Return HPTE cache control bits corresponding to Linux pte bits */
+static inline unsigned long hpte_cache_bits(unsigned long pte_val)
+{
+#if _PAGE_NO_CACHE == HPTE_R_I  _PAGE_WRITETHRU == HPTE_R_W
+   return pte_val  (HPTE_R_W | HPTE_R_I);
+#else
+   return ((pte_val  _PAGE_NO_CACHE) ? HPTE_R_I : 0) +
+   ((pte_val  _PAGE_WRITETHRU) ? HPTE_R_W : 0);
+#endif
+}
+
 static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
   unsigned long pagesize)
 {
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 9252d5e..243bc80 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -178,6 +178,8 @@ struct revmap_entry {
 
 /* Low-order bits in kvm-arch.slot_phys[][] */
 #define KVMPPC_PAGE_ORDER_MASK 0x1f
+#define KVMPPC_PAGE_NO_CACHE   HPTE_R_I/* 0x20 */
+#define KVMPPC_PAGE_WRITETHRU  HPTE_R_W/* 0x40 */
 #define KVMPPC_GOT_PAGE0x80
 
 struct kvm_arch {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index cc18f3d..b904c40 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -199,7 +199,8 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned 
long gfn,
struct page *page, *hpage, *pages[1];
unsigned long s, pgsize;
unsigned long *physp;
-   unsigned int got, pgorder;
+   unsigned int is_io, got, pgorder;
+   struct vm_area_struct *vma;
unsigned long pfn, i, npages;
 
physp = kvm-arch.slot_phys[memslot-id];
@@ -208,34 +209,51 @@ static long kvmppc_get_guest_page(struct kvm *kvm, 
unsigned long gfn,
if (physp[gfn - memslot-base_gfn])
return 0;
 
+   is_io = 0;
+   got = 0;
page = NULL;
pgsize = psize;
+   err = -EINVAL;
start = gfn_to_hva_memslot(memslot, gfn);
 
/* Instantiate and get the page we want access to */
np = get_user_pages_fast(start, 1, 1, pages);
-   if (np != 1)
-   return -EINVAL;
-   page = pages[0];
-   got = KVMPPC_GOT_PAGE;
+   if (np != 1) {
+   /* Look up the vma for the page */
+   down_read(current-mm-mmap_sem);
+   vma = find_vma(current-mm, start);
+   if (!vma || vma-vm_start  start ||
+   start + psize  vma-vm_end ||
+   !(vma-vm_flags  VM_PFNMAP))
+   goto up_err;
+   is_io = hpte_cache_bits(pgprot_val(vma-vm_page_prot));
+   pfn = vma-vm_pgoff + ((start - vma-vm_start)  PAGE_SHIFT);
+   /* check alignment of pfn vs. requested page size */
+   if (psize  PAGE_SIZE  (pfn  ((psize  PAGE_SHIFT) - 1)))
+   goto up_err;
+   up_read(current-mm-mmap_sem);
 
-   /* See if this is a large page */
-   s = PAGE_SIZE;
-   if (PageHuge(page)) {
-   hpage = compound_head(page);
-   s = compound_order(hpage);
-   

[PATCH v3 12/14] KVM: Add barriers to allow mmu_notifier_retry to be used locklessly

2011-12-12 Thread Paul Mackerras
This adds an smp_wmb in kvm_mmu_notifier_invalidate_range_end() and an
smp_rmb in mmu_notifier_retry() so that mmu_notifier_retry() will give
the correct answer when called without kvm-mmu_lock being held.
PowerPC Book3S HV KVM wants to use a bitlock per guest page rather than
a single global spinlock in order to improve the scalability of updates
to the guest MMU hashed page table, and so needs this.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 include/linux/kvm_host.h |   14 +-
 virt/kvm/kvm_main.c  |6 +++---
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8c5c303..ec79a45 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -700,12 +700,16 @@ static inline int mmu_notifier_retry(struct kvm_vcpu 
*vcpu, unsigned long mmu_se
if (unlikely(vcpu-kvm-mmu_notifier_count))
return 1;
/*
-* Both reads happen under the mmu_lock and both values are
-* modified under mmu_lock, so there's no need of smb_rmb()
-* here in between, otherwise mmu_notifier_count should be
-* read before mmu_notifier_seq, see
-* mmu_notifier_invalidate_range_end write side.
+* Ensure the read of mmu_notifier_count happens before the read
+* of mmu_notifier_seq.  This interacts with the smp_wmb() in
+* mmu_notifier_invalidate_range_end to make sure that the caller
+* either sees the old (non-zero) value of mmu_notifier_count or
+* the new (incremented) value of mmu_notifier_seq.
+* PowerPC Book3s HV KVM calls this under a per-page lock
+* rather than under kvm-mmu_lock, for scalability, so
+* can't rely on kvm-mmu_lock to keep things ordered.
 */
+   smp_rmb();
if (vcpu-kvm-mmu_notifier_seq != mmu_seq)
return 1;
return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e289486..c144132 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -357,11 +357,11 @@ static void kvm_mmu_notifier_invalidate_range_end(struct 
mmu_notifier *mn,
 * been freed.
 */
kvm-mmu_notifier_seq++;
+   smp_wmb();
/*
 * The above sequence increase must be visible before the
-* below count decrease but both values are read by the kvm
-* page fault under mmu_lock spinlock so we don't need to add
-* a smb_wmb() here in between the two.
+* below count decrease, which is ensured by the smp_wmb above
+* in conjunction with the smp_rmb in mmu_notifier_retry().
 */
kvm-mmu_notifier_count--;
spin_unlock(kvm-mmu_lock);
-- 
1.7.7.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v3 05/14] KVM: PPC: Add an interface for pinning guest pages in Book3s HV guests

2011-12-12 Thread Paul Mackerras
This adds two new functions, kvmppc_pin_guest_page() and
kvmppc_unpin_guest_page(), and uses them to pin the guest pages where
the guest has registered areas of memory for the hypervisor to update,
(i.e. the per-cpu virtual processor areas, SLB shadow buffers and
dispatch trace logs) and then unpin them when they are no longer
required.

Although it is not strictly necessary to pin the pages at this point,
since all guest pages are already pinned, later commits in this series
will mean that guest pages aren't all pinned.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s.h |3 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c   |   38 ++
 arch/powerpc/kvm/book3s_hv.c  |   67 ++---
 3 files changed, 78 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index e8c78ac..a2a89c6 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -140,6 +140,9 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct 
kvmppc_bat *bat,
 extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu 
*vcpu);
 extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
+extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
+   unsigned long *nb_ret);
+extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
 
 extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index e4c6069..dcd39dc 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -184,6 +184,44 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu 
*vcpu, gva_t eaddr,
return -ENOENT;
 }
 
+void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
+   unsigned long *nb_ret)
+{
+   struct kvm_memory_slot *memslot;
+   unsigned long gfn = gpa  PAGE_SHIFT;
+   struct page *page;
+   unsigned long offset;
+   unsigned long pfn, pa;
+   unsigned long *physp;
+
+   memslot = gfn_to_memslot(kvm, gfn);
+   if (!memslot || (memslot-flags  KVM_MEMSLOT_INVALID))
+   return NULL;
+   physp = kvm-arch.slot_phys[memslot-id];
+   if (!physp)
+   return NULL;
+   physp += (gfn - memslot-base_gfn) 
+   (kvm-arch.ram_porder - PAGE_SHIFT);
+   pa = *physp;
+   if (!pa)
+   return NULL;
+   pfn = pa  PAGE_SHIFT;
+   page = pfn_to_page(pfn);
+   get_page(page);
+   offset = gpa  (kvm-arch.ram_psize - 1);
+   if (nb_ret)
+   *nb_ret = kvm-arch.ram_psize - offset;
+   return page_address(page) + offset;
+}
+
+void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
+{
+   struct page *page = virt_to_page(va);
+
+   page = compound_head(page);
+   put_page(page);
+}
+
 void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
 {
struct kvmppc_mmu *mmu = vcpu-arch.mmu;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 86d3e4b..bd82789 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -139,12 +139,10 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu 
*vcpu,
   unsigned long vcpuid, unsigned long vpa)
 {
struct kvm *kvm = vcpu-kvm;
-   unsigned long gfn, pg_index, ra, len;
-   unsigned long pg_offset;
+   unsigned long len, nb;
void *va;
struct kvm_vcpu *tvcpu;
-   struct kvm_memory_slot *memslot;
-   unsigned long *physp;
+   int err = H_PARAMETER;
 
tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
if (!tvcpu)
@@ -157,51 +155,41 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu 
*vcpu,
if (flags  4) {
if (vpa  0x7f)
return H_PARAMETER;
+   if (flags = 2  !tvcpu-arch.vpa)
+   return H_RESOURCE;
/* registering new area; convert logical addr to real */
-   gfn = vpa  PAGE_SHIFT;
-   memslot = gfn_to_memslot(kvm, gfn);
-   if (!memslot || !(memslot-flags  KVM_MEMSLOT_INVALID))
-   return H_PARAMETER;
-   physp = kvm-arch.slot_phys[memslot-id];
-   if (!physp)
-   return H_PARAMETER;
-   pg_index = (gfn - memslot-base_gfn) 
-   (kvm-arch.ram_porder - PAGE_SHIFT);
-   pg_offset = vpa  (kvm-arch.ram_psize - 1);
-   ra = physp[pg_index];
-   if (!ra)
+   va = kvmppc_pin_guest_page(kvm, vpa, nb);
+   if (va == NULL)
return H_PARAMETER;
-   ra = 

[PATCH v3 07/14] KVM: PPC: Only get pages when actually needed, not in prepare_memory_region()

2011-12-12 Thread Paul Mackerras
This removes the code from kvmppc_core_prepare_memory_region() that
looked up the VMA for the region being added and called hva_to_page
to get the pfns for the memory.  We have no guarantee that there will
be anything mapped there at the time of the KVM_SET_USER_MEMORY_REGION
ioctl call; userspace can do that ioctl and then map memory into the
region later.

Instead we defer looking up the pfn for each memory page until it is
needed, which generally means when the guest does an H_ENTER hcall on
the page.  Since we can't call get_user_pages in real mode, if we don't
already have the pfn for the page, kvmppc_h_enter() will return
H_TOO_HARD and we then call kvmppc_virtmode_h_enter() once we get back
to kernel context.  That calls kvmppc_get_guest_page() to get the pfn
for the page, and then calls back to kvmppc_h_enter() to redo the HPTE
insertion.

When the first vcpu starts executing, we need to have the RMO or VRMA
region mapped so that the guest's real mode accesses will work.  Thus
we now have a check in kvmppc_vcpu_run() to see if the RMO/VRMA is set
up and if not, call kvmppc_hv_setup_rma().  It checks if the memslot
starting at guest physical 0 now has RMO memory mapped there; if so it
sets it up for the guest, otherwise on POWER7 it sets up the VRMA.
The function that does that, kvmppc_map_vrma, is now a bit simpler,
as it calls kvmppc_virtmode_h_enter instead of creating the HPTE itself.

Since we are now potentially updating entries in the slot_phys[]
arrays from multiple vcpu threads, we now have a spinlock protecting
those updates to ensure that we don't lose track of any references
to pages.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s.h|4 +
 arch/powerpc/include/asm/kvm_book3s_64.h |   12 ++
 arch/powerpc/include/asm/kvm_host.h  |2 +
 arch/powerpc/include/asm/kvm_ppc.h   |4 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c  |  130 +---
 arch/powerpc/kvm/book3s_hv.c |  244 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  |   54 
 7 files changed, 290 insertions(+), 160 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index a2a89c6..5329c21 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -143,6 +143,10 @@ extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, 
gfn_t gfn);
 extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
unsigned long *nb_ret);
 extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
+extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+   long pte_index, unsigned long pteh, unsigned long ptel);
+extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+   long pte_index, unsigned long pteh, unsigned long ptel);
 
 extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 300ec04..7e6f2ed 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -101,4 +101,16 @@ static inline unsigned long compute_tlbie_rb(unsigned long 
v, unsigned long r,
return rb;
 }
 
+static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
+{
+   /* only handle 4k, 64k and 16M pages for now */
+   if (!(h  HPTE_V_LARGE))
+   return 1ul  12;   /* 4k page */
+   if ((l  0xf000) == 0x1000  cpu_has_feature(CPU_FTR_ARCH_206))
+   return 1ul  16;   /* 64k page */
+   if ((l  0xff000) == 0)
+   return 1ul  24;   /* 16M page */
+   return 0;   /* error */
+}
+
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 7a17ab5..beb22ba 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -194,7 +194,9 @@ struct kvm_arch {
unsigned long lpcr;
unsigned long rmor;
struct kvmppc_rma_info *rma;
+   int rma_setup_done;
struct list_head spapr_tce_tables;
+   spinlock_t slot_phys_lock;
unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
int slot_npages[KVM_MEM_SLOTS_NUM];
unsigned short last_vcpu[NR_CPUS];
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index fc2d696..111e1b4 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -121,8 +121,8 @@ extern long kvmppc_alloc_hpt(struct kvm *kvm);
 extern void kvmppc_free_hpt(struct kvm *kvm);
 extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
-extern void kvmppc_map_vrma(struct kvm 

[PATCH v3 11/14] KVM: PPC: Implement MMIO emulation support for Book3S HV guests

2011-12-12 Thread Paul Mackerras
This provides the low-level support for MMIO emulation in Book3S HV
guests.  When the guest tries to map a page which is not covered by
any memslot, that page is taken to be an MMIO emulation page.  Instead
of inserting a valid HPTE, we insert an HPTE that has the valid bit
clear but another hypervisor software-use bit set, which we call
HPTE_V_ABSENT, to indicate that this is an absent page.  An
absent page is treated much like a valid page as far as guest hcalls
(H_ENTER, H_REMOVE, H_READ etc.) are concerned, except of course that
an absent HPTE doesn't need to be invalidated with tlbie since it
was never valid as far as the hardware is concerned.

When the guest accesses a page for which there is an absent HPTE, it
will take a hypervisor data storage interrupt (HDSI) since we now set
the VPM1 bit in the LPCR.  Our HDSI handler for HPTE-not-present faults
looks up the hash table and if it finds an absent HPTE mapping the
requested virtual address, will switch to kernel mode and handle the
fault in kvmppc_book3s_hv_page_fault(), which at present just calls
kvmppc_hv_emulate_mmio() to set up the MMIO emulation.

This is based on an earlier patch by Benjamin Herrenschmidt, but since
heavily reworked.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s.h|5 +
 arch/powerpc/include/asm/kvm_book3s_64.h |   26 +++
 arch/powerpc/include/asm/kvm_host.h  |5 +
 arch/powerpc/include/asm/mmu-hash64.h|2 +-
 arch/powerpc/include/asm/ppc-opcode.h|4 +-
 arch/powerpc/include/asm/reg.h   |1 +
 arch/powerpc/kernel/asm-offsets.c|1 +
 arch/powerpc/kernel/exceptions-64s.S |8 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c  |  228 +--
 arch/powerpc/kvm/book3s_hv.c |   21 ++-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  |  262 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  |  127 ---
 12 files changed, 607 insertions(+), 83 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 5329c21..f6329bb 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -121,6 +121,11 @@ extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu 
*vcpu);
 extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
+extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run,
+   struct kvm_vcpu *vcpu, unsigned long addr,
+   unsigned long status);
+extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
+   unsigned long slb_v, unsigned long valid);
 
 extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache 
*pte);
 extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 9508c03..79dc37f 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -43,12 +43,15 @@ static inline void svcpu_put(struct 
kvmppc_book3s_shadow_vcpu *svcpu)
 #define HPT_HASH_MASK  (HPT_NPTEG - 1)
 #endif
 
+#define VRMA_VSID  0x1ffUL /* 1TB VSID reserved for VRMA */
+
 /*
  * We use a lock bit in HPTE dword 0 to synchronize updates and
  * accesses to each HPTE, and another bit to indicate non-present
  * HPTEs.
  */
 #define HPTE_V_HVLOCK  0x40UL
+#define HPTE_V_ABSENT  0x20UL
 
 static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
 {
@@ -144,6 +147,29 @@ static inline unsigned long hpte_cache_bits(unsigned long 
pte_val)
 #endif
 }
 
+static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
+{
+   if (key)
+   return PP_RWRX = pp  pp = PP_RXRX;
+   return 1;
+}
+
+static inline bool hpte_write_permission(unsigned long pp, unsigned long key)
+{
+   if (key)
+   return pp == PP_RWRW;
+   return pp = PP_RWRW;
+}
+
+static inline int hpte_get_skey_perm(unsigned long hpte_r, unsigned long amr)
+{
+   unsigned long skey;
+
+   skey = ((hpte_r  HPTE_R_KEY_HI)  57) |
+   ((hpte_r  HPTE_R_KEY_LO)  9);
+   return (amr  (62 - 2 * skey))  3;
+}
+
 static inline void lock_rmap(unsigned long *rmap)
 {
do {
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 97cb2d7..937caca 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -210,6 +210,7 @@ struct kvm_arch {
unsigned long lpcr;
unsigned long rmor;
struct kvmppc_rma_info *rma;
+   unsigned long vrma_slb_v;
int rma_setup_done;
struct list_head spapr_tce_tables;
spinlock_t slot_phys_lock;
@@ -452,6 +453,10 @@ struct 

[PATCH v3 04/14] KVM: PPC: Keep page physical addresses in per-slot arrays

2011-12-12 Thread Paul Mackerras
This allocates an array for each memory slot that is added to store
the physical addresses of the pages in the slot.  This array is
vmalloc'd and accessed in kvmppc_h_enter using real_vmalloc_addr().
This allows us to remove the ram_pginfo field from the kvm_arch
struct, and removes the 64GB guest RAM limit that we had.

We use the low-order bits of the array entries to store a flag
indicating that we have done get_page on the corresponding page,
and therefore need to call put_page when we are finished with the
page.  Currently this is set for all pages except those in our
special RMO regions.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_host.h |9 ++-
 arch/powerpc/kvm/book3s_64_mmu_hv.c |   18 +++---
 arch/powerpc/kvm/book3s_hv.c|  114 +--
 arch/powerpc/kvm/book3s_hv_rm_mmu.c |   41 +++-
 4 files changed, 107 insertions(+), 75 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 629df2e..7a17ab5 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -38,6 +38,7 @@
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 
 #ifdef CONFIG_KVM_MMIO
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -175,25 +176,27 @@ struct revmap_entry {
unsigned long guest_rpte;
 };
 
+/* Low-order bits in kvm-arch.slot_phys[][] */
+#define KVMPPC_GOT_PAGE0x80
+
 struct kvm_arch {
 #ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long hpt_virt;
struct revmap_entry *revmap;
-   unsigned long ram_npages;
unsigned long ram_psize;
unsigned long ram_porder;
-   struct kvmppc_pginfo *ram_pginfo;
unsigned int lpid;
unsigned int host_lpid;
unsigned long host_lpcr;
unsigned long sdr1;
unsigned long host_sdr1;
int tlbie_lock;
-   int n_rma_pages;
unsigned long lpcr;
unsigned long rmor;
struct kvmppc_rma_info *rma;
struct list_head spapr_tce_tables;
+   unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
+   int slot_npages[KVM_MEM_SLOTS_NUM];
unsigned short last_vcpu[NR_CPUS];
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 80ece8d..e4c6069 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -98,16 +98,16 @@ void kvmppc_free_hpt(struct kvm *kvm)
 void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
 {
unsigned long i;
-   unsigned long npages = kvm-arch.ram_npages;
-   unsigned long pfn;
+   unsigned long npages;
+   unsigned long pa;
unsigned long *hpte;
unsigned long hash;
unsigned long porder = kvm-arch.ram_porder;
struct revmap_entry *rev;
-   struct kvmppc_pginfo *pginfo = kvm-arch.ram_pginfo;
+   unsigned long *physp;
 
-   if (!pginfo)
-   return;
+   physp = kvm-arch.slot_phys[mem-slot];
+   npages = kvm-arch.slot_npages[mem-slot];
 
/* VRMA can't be  1TB */
if (npages  1ul  (40 - porder))
@@ -117,9 +117,10 @@ void kvmppc_map_vrma(struct kvm *kvm, struct 
kvm_userspace_memory_region *mem)
npages = HPT_NPTEG;
 
for (i = 0; i  npages; ++i) {
-   pfn = pginfo[i].pfn;
-   if (!pfn)
+   pa = physp[i];
+   if (!pa)
break;
+   pa = PAGE_MASK;
/* can't use hpt_hash since va  64 bits */
hash = (i ^ (VRMA_VSID ^ (VRMA_VSID  25)))  HPT_HASH_MASK;
/*
@@ -131,8 +132,7 @@ void kvmppc_map_vrma(struct kvm *kvm, struct 
kvm_userspace_memory_region *mem)
hash = (hash  3) + 7;
hpte = (unsigned long *) (kvm-arch.hpt_virt + (hash  4));
/* HPTE low word - RPN, protection, etc. */
-   hpte[1] = (pfn  PAGE_SHIFT) | HPTE_R_R | HPTE_R_C |
-   HPTE_R_M | PP_RWXX;
+   hpte[1] = pa | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
smp_wmb();
hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID  (40 - 16)) |
(i  (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index da7db14..86d3e4b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -50,14 +50,6 @@
 #include linux/vmalloc.h
 #include linux/highmem.h
 
-/*
- * For now, limit memory to 64GB and require it to be large pages.
- * This value is chosen because it makes the ram_pginfo array be
- * 64kB in size, which is about as large as we want to be trying
- * to allocate with kmalloc.
- 

[PATCH v3 14/14] KVM: PPC: Allow for read-only pages backing a Book3S HV guest

2011-12-12 Thread Paul Mackerras
With this, if a guest does an H_ENTER with a read/write HPTE on a page
which is currently read-only, we make the actual HPTE inserted be a
read-only version of the HPTE.  We now intercept protection faults as
well as HPTE not found faults, and for a protection fault we work out
whether it should be reflected to the guest (e.g. because the guest HPTE
didn't allow write access to usermode) or handled by switching to
kernel context and calling kvmppc_book3s_hv_page_fault, which will then
request write access to the page and update the actual HPTE.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   20 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c  |   39 +++--
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  |   32 +---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  |4 +-
 4 files changed, 78 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index c21e46d..b0c08b1 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -121,6 +121,22 @@ static inline unsigned long hpte_rpn(unsigned long ptel, 
unsigned long psize)
return ((ptel  HPTE_R_RPN)  ~(psize - 1))  PAGE_SHIFT;
 }
 
+static inline int hpte_is_writable(unsigned long ptel)
+{
+   unsigned long pp = ptel  (HPTE_R_PP0 | HPTE_R_PP);
+
+   return pp != PP_RXRX  pp != PP_RXXX;
+}
+
+static inline unsigned long hpte_make_readonly(unsigned long ptel)
+{
+   if ((ptel  HPTE_R_PP0) || (ptel  HPTE_R_PP) == PP_RWXX)
+   ptel = (ptel  ~HPTE_R_PP) | PP_RXXX;
+   else
+   ptel |= PP_RXRX;
+   return ptel;
+}
+
 static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long 
io_type)
 {
unsigned int wimg = ptel  HPTE_R_WIMG;
@@ -140,7 +156,7 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, 
unsigned long io_type)
  * Lock and read a linux PTE.  If it's present and writable, atomically
  * set dirty and referenced bits and return the PTE, otherwise return 0.
  */
-static inline pte_t kvmppc_read_update_linux_pte(pte_t *p)
+static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
 {
pte_t pte, tmp;
 
@@ -158,7 +174,7 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *p)
 
if (pte_present(pte)) {
pte = pte_mkyoung(pte);
-   if (pte_write(pte))
+   if (writing  pte_write(pte))
pte = pte_mkdirty(pte);
}
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 83761dd..66d6452 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -503,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
struct page *page, *pages[1];
long index, ret, npages;
unsigned long is_io;
+   unsigned int writing, write_ok;
struct vm_area_struct *vma;
 
/*
@@ -553,8 +554,11 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
pfn = 0;
page = NULL;
pte_size = PAGE_SIZE;
+   writing = (dsisr  DSISR_ISSTORE) != 0;
+   /* If writing != 0, then the HPTE must allow writing, if we get here */
+   write_ok = writing;
hva = gfn_to_hva_memslot(memslot, gfn);
-   npages = get_user_pages_fast(hva, 1, 1, pages);
+   npages = get_user_pages_fast(hva, 1, writing, pages);
if (npages  1) {
/* Check if it's an I/O mapping */
down_read(current-mm-mmap_sem);
@@ -565,6 +569,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
((hva - vma-vm_start)  PAGE_SHIFT);
pte_size = psize;
is_io = hpte_cache_bits(pgprot_val(vma-vm_page_prot));
+   write_ok = vma-vm_flags  VM_WRITE;
}
up_read(current-mm-mmap_sem);
if (!pfn)
@@ -575,6 +580,24 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
page = compound_head(page);
pte_size = compound_order(page);
}
+   /* if the guest wants write access, see if that is OK */
+   if (!writing  hpte_is_writable(r)) {
+   pte_t *ptep, pte;
+
+   /*
+* We need to protect against page table destruction
+* while looking up and updating the pte.
+*/
+   rcu_read_lock_sched();
+   ptep = find_linux_pte_or_hugepte(current-mm-pgd,
+hva, NULL);
+   if (ptep  pte_present(*ptep)) {
+   pte = 

[PATCH v3 06/14] KVM: PPC: Make the H_ENTER hcall more reliable

2011-12-12 Thread Paul Mackerras
At present, our implementation of H_ENTER only makes one try at locking
each slot that it looks at, and doesn't even retry the ldarx/stdcx.
atomic update sequence that it uses to attempt to lock the slot.  Thus
it can return the H_PTEG_FULL error unnecessarily, particularly when
the H_EXACT flag is set, meaning that the caller wants a specific PTEG
slot.

This improves the situation by making a second pass when no free HPTE
slot is found, where we spin until we succeed in locking each slot in
turn and then check whether it is full while we hold the lock.  If the
second pass fails, then we return H_PTEG_FULL.

This also moves lock_hpte to a header file (since later commits in this
series will need to use it from other source files) and renames it to
try_lock_hpte, which is a somewhat less misleading name.

Signed-off-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   25 
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  |   63 --
 2 files changed, 59 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index fa3dc79..300ec04 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -43,6 +43,31 @@ static inline void svcpu_put(struct 
kvmppc_book3s_shadow_vcpu *svcpu)
 #define HPT_HASH_MASK  (HPT_NPTEG - 1)
 #endif
 
+/*
+ * We use a lock bit in HPTE dword 0 to synchronize updates and
+ * accesses to each HPTE, and another bit to indicate non-present
+ * HPTEs.
+ */
+#define HPTE_V_HVLOCK  0x40UL
+
+static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
+{
+   unsigned long tmp, old;
+
+   asm volatile(  ldarx   %0,0,%2\n
+  and.%1,%0,%3\n
+  bne 2f\n
+  ori %0,%0,%4\n
+  stdcx.  %0,0,%2\n
+  beq+2f\n
+  li  %1,%3\n
+2:isync
+: =r (tmp), =r (old)
+: r (hpte), r (bits), i (HPTE_V_HVLOCK)
+: cc, memory);
+   return old == 0;
+}
+
 static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 unsigned long pte_index)
 {
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c 
b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 84dae82..a28a603 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -53,26 +53,6 @@ static void *real_vmalloc_addr(void *x)
return __va(addr);
 }
 
-#define HPTE_V_HVLOCK  0x40UL
-
-static inline long lock_hpte(unsigned long *hpte, unsigned long bits)
-{
-   unsigned long tmp, old;
-
-   asm volatile(  ldarx   %0,0,%2\n
-  and.%1,%0,%3\n
-  bne 2f\n
-  ori %0,%0,%4\n
-  stdcx.  %0,0,%2\n
-  beq+2f\n
-  li  %1,%3\n
-2:isync
-: =r (tmp), =r (old)
-: r (hpte), r (bits), i (HPTE_V_HVLOCK)
-: cc, memory);
-   return old == 0;
-}
-
 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel)
 {
@@ -126,24 +106,49 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long 
flags,
pteh = ~0x60UL;
ptel = ~(HPTE_R_PP0 - kvm-arch.ram_psize);
ptel |= pa;
+
if (pte_index = HPT_NPTE)
return H_PARAMETER;
if (likely((flags  H_EXACT) == 0)) {
pte_index = ~7UL;
hpte = (unsigned long *)(kvm-arch.hpt_virt + (pte_index  4));
-   for (i = 0; ; ++i) {
-   if (i == 8)
-   return H_PTEG_FULL;
+   for (i = 0; i  8; ++i) {
if ((*hpte  HPTE_V_VALID) == 0 
-   lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
+   try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
break;
hpte += 2;
}
+   if (i == 8) {
+   /*
+* Since try_lock_hpte doesn't retry (not even stdcx.
+* failures), it could be that there is a free slot
+* but we transiently failed to lock it.  Try again,
+* actually locking each slot and checking it.
+*/
+   hpte -= 16;
+   for (i = 0; i  8; ++i) {
+   while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+   cpu_relax();
+   if ((*hpte  HPTE_V_VALID) == 0)
+   

Re: [PATCH 1/4] powerpc/kprobe: introduce a new thread flag

2011-12-12 Thread Benjamin Herrenschmidt
On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote:
 We need to add a new thread flag, TIF_KPROBE/_TIF_DELAYED_KPROBE,
 for handling kprobe operation while exiting exception.

The basic idea is sane, however the instruction emulation isn't per-se
kprobe specific. It could be used by xmon too for example. I'd rather
use a different name, something like TIF_EMULATE_STACK_STORE or
something like that.

Cheers,
Ben.

 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
 ---
  arch/powerpc/include/asm/thread_info.h |2 ++
  1 files changed, 2 insertions(+), 0 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/thread_info.h 
 b/arch/powerpc/include/asm/thread_info.h
 index 836f231..3378734 100644
 --- a/arch/powerpc/include/asm/thread_info.h
 +++ b/arch/powerpc/include/asm/thread_info.h
 @@ -112,6 +112,7 @@ static inline struct thread_info 
 *current_thread_info(void)
  #define TIF_FREEZE   14  /* Freezing for suspend */
  #define TIF_SYSCALL_TRACEPOINT   15  /* syscall tracepoint 
 instrumentation */
  #define TIF_RUNLATCH 16  /* Is the runlatch enabled? */
 +#define TIF_KPROBE   17  /* Is the delayed kprobe operation? */
  
  /* as above, but as bit values */
  #define _TIF_SYSCALL_TRACE   (1TIF_SYSCALL_TRACE)
 @@ -130,6 +131,7 @@ static inline struct thread_info 
 *current_thread_info(void)
  #define _TIF_FREEZE  (1TIF_FREEZE)
  #define _TIF_SYSCALL_TRACEPOINT  (1TIF_SYSCALL_TRACEPOINT)
  #define _TIF_RUNLATCH(1TIF_RUNLATCH)
 +#define _TIF_DELAYED_KPROBE  (1TIF_KPROBE)
  #define _TIF_SYSCALL_T_OR_A  (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
  


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/4] ppc32/kprobe: introduce copy_exc_stack

2011-12-12 Thread Benjamin Herrenschmidt
On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote:
 We need a copy mechanism to migrate exception stack. But looks copy_page()
 already implement this well so we can complete copy_exc_stack() based on
 that directly.

I'd rather you don't hijack copy_page which is quite sensitive. The
emulation isn't performance critical so a dumber routine would work
fine.

Why not use memcpy ? You can call it from assembly.

Cheers,
Ben.

 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
 ---
  arch/powerpc/include/asm/page_32.h |1 +
  arch/powerpc/kernel/misc_32.S  |   16 +++-
  arch/powerpc/kernel/ppc_ksyms.c|1 +
  3 files changed, 17 insertions(+), 1 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/page_32.h 
 b/arch/powerpc/include/asm/page_32.h
 index 68d73b2..2c1fd84 100644
 --- a/arch/powerpc/include/asm/page_32.h
 +++ b/arch/powerpc/include/asm/page_32.h
 @@ -40,6 +40,7 @@ struct page;
  extern void clear_pages(void *page, int order);
  static inline void clear_page(void *page) { clear_pages(page, 0); }
  extern void copy_page(void *to, void *from);
 +extern void copy_exc_stack(void *to, void *from);
  
  #include asm-generic/getorder.h
  
 diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
 index 998a100..aa02545 100644
 --- a/arch/powerpc/kernel/misc_32.S
 +++ b/arch/powerpc/kernel/misc_32.S
 @@ -527,7 +527,7 @@ _GLOBAL(clear_pages)
   stw r8,12(r3);  \
   stwur9,16(r3)
  
 -_GLOBAL(copy_page)
 +ready_copy:
   addir3,r3,-4
   addir4,r4,-4
  
 @@ -544,7 +544,21 @@ _GLOBAL(copy_page)
   dcbtr5,r4
   li  r11,L1_CACHE_BYTES+4
  #endif /* MAX_COPY_PREFETCH */
 + blr
 +
 +_GLOBAL(copy_exc_stack)
 + mflrr12
 + bl  ready_copy
 + mtlrr12
 + li  r0,INT_FRAME_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH
 + b   go_copy
 +
 +_GLOBAL(copy_page)
 + mflrr12
 + bl  ready_copy
 + mtlrr12
   li  r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH
 +go_copy:
   crclr   4*cr0+eq
  2:
   mtctr   r0
 diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
 index f5ae872..2223daf 100644
 --- a/arch/powerpc/kernel/ppc_ksyms.c
 +++ b/arch/powerpc/kernel/ppc_ksyms.c
 @@ -88,6 +88,7 @@ EXPORT_SYMBOL(__clear_user);
  EXPORT_SYMBOL(__strncpy_from_user);
  EXPORT_SYMBOL(__strnlen_user);
  EXPORT_SYMBOL(copy_page);
 +EXPORT_SYMBOL(copy_exc_stack);
  
  #if defined(CONFIG_PCI)  defined(CONFIG_PPC32)
  EXPORT_SYMBOL(isa_io_base);


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/4] ppc32/kprobe: complete kprobe and migrate exception frame

2011-12-12 Thread Benjamin Herrenschmidt
On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote:
 We can't emulate stwu since that may corrupt current exception stack.
 So we will have to do real store operation in the exception return code.
 
 Firstly we'll allocate a trampoline exception frame below the kprobed
 function stack and copy the current exception frame to the trampoline.
 Then we can do this real store operation to implement 'stwu', and reroute
 the trampoline frame to r1 to complete this exception migration.
 
 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
 ---
  arch/powerpc/kernel/entry_32.S |   26 ++
  1 files changed, 26 insertions(+), 0 deletions(-)
 
 diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
 index 56212bc..d56e311 100644
 --- a/arch/powerpc/kernel/entry_32.S
 +++ b/arch/powerpc/kernel/entry_32.S
 @@ -1185,6 +1185,8 @@ recheck:
   bne-do_resched
   andi.   r0,r9,_TIF_USER_WORK_MASK
   beq restore_user
 + andis.  r0,r9,_TIF_DELAYED_KPROBE@h
 + bne-restore_kprobe

Same comment as earlier about name. Note that you're not hooking in the
right place. recheck is only reached if you -already- went out of the
normal exit path and only when going back to user space unless I'm
missing something (which is really the case you don't care about).

You need to hook into resume_kernel instead.

Also, we may want to simplify the whole thing, instead of checking user
vs. kernel first etc... we could instead have a single _TIF_WORK_MASK
which includes both the bits for user work and the new bit for kernel
work. With preempt, the kernel work bits would also include
_TIF_NEED_RESCHED.

Then you have in the common exit path, a single test for that, with a
fast path that skips everything and just goes to restore for both
kernel and user.

The only possible issue is the setting of dbcr0 for BookE and 44x and we
can keep that as a special case keyed of MSR_PR in the resume path under
ifdef BOOKE (we'll probably sanitize that later with some different
rework anyway). 

So the exit path because something like:

ret_from_except:
.. hard disable interrupts (unchanged) ...
read TIF flags
andi with _TIF_WORK_MASK
nothing set - restore
check PR
set - do_work_user
no set - do_work_kernel (kprobes  preempt)
(both loop until relevant _TIF flags are all clear)
restore:
#ifdef BOOKE  44x test PR  do dbcr0 stuff if needed
... nornal restore ...

  do_user_signal:  /* r10 contains MSR_KERNEL here */
   ori r10,r10,MSR_EE
   SYNC
 @@ -1202,6 +1204,30 @@ do_user_signal:/* r10 contains 
 MSR_KERNEL here */
   REST_NVGPRS(r1)
   b   recheck
  
 +restore_kprobe:
 + lwz r3,GPR1(r1)
 + subir3,r3,INT_FRAME_SIZE; /* Allocate a trampoline exception frame 
 */
 + mr  r4,r1
 + bl  copy_exc_stack  /* Copy from the original to the trampoline */
 +
 + /* Do real stw operation to complete stwu */
 + mr  r4,r1
 + addir4,r4,INT_FRAME_SIZE/* Get kprobed entry */
 + lwz r5,GPR1(r1) /* Backup r1 */
 + stw r4,GPR1(r1) /* Now store that safely */

The above confuses me. Shouldn't you do instead something like

lwz r4,GPR1(r1)
subir3,r4,INT_FRAME_SIZE
li  r5,INT_FRAME_SIZE
bl  memcpy

To start with, then you need to know the old r1 value which may or may
not be related to your current r1. The emulation code should stash it
into the int frame in an unused slot such as orig_gpr3 (since that
only pertains to restarting syscalls which we aren't doing here).

Then you adjust your r1 and do something like

lwz r3,GPR1(r1)
lwz r0,ORIG_GPR3(r1)
stw r0,0(r3)

To perform the store, before doing the rest:
 
 + /* Reroute the trampoline frame to r1 */
 + subir5,r5,INT_FRAME_SIZE
 + mr  r1,r5
 +
 + /* Clear _TIF_DELAYED_KPROBE flag */
 + rlwinm  r9,r1,0,0,(31-THREAD_SHIFT)
 + lwz r0,TI_FLAGS(r9)
 + rlwinm  r0,r0,0,_TIF_DELAYED_KPROBE
 + stw r0,TI_FLAGS(r9)
 +
 + b   restore
 +
  /*
   * We come here when we are at the end of handling an exception
   * that occurred at a place where taking an exception will lose

Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc/85xx: re-enable the ePAPR byte channel driver in corenet32_smp_defconfig

2011-12-12 Thread Timur Tabi
Commit 7c4b2f09 (powerpc: Update mpc85xx/corenet 32-bit defconfigs) accidentally
disabled the ePAPR byte channel driver in the defconfig for Freescale CoreNet
platforms.

Signed-off-by: Timur Tabi ti...@freescale.com
---
 arch/powerpc/configs/corenet32_smp_defconfig |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/configs/corenet32_smp_defconfig 
b/arch/powerpc/configs/corenet32_smp_defconfig
index 8ff6302..ebf42ae 100644
--- a/arch/powerpc/configs/corenet32_smp_defconfig
+++ b/arch/powerpc/configs/corenet32_smp_defconfig
@@ -105,6 +105,7 @@ CONFIG_FSL_PQ_MDIO=y
 # CONFIG_INPUT_MOUSE is not set
 CONFIG_SERIO_LIBPS2=y
 # CONFIG_LEGACY_PTYS is not set
+CONFIG_PPC_EPAPR_HV_BYTECHAN=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_EXTENDED=y
-- 
1.7.3.4


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc/fsl: add MSI support for the Freescale hypervisor

2011-12-12 Thread Timur Tabi
Add support for MSIs under the Freescale hypervisor.  This involves updating
the fsl_pci driver to support vmpic-msi nodes, and updating the fsl_pci
driver to create an ATMU for the rerouted MSIIR register.

Signed-off-by: Timur Tabi ti...@freescale.com
---
 arch/powerpc/sysdev/fsl_msi.c |   68 +
 arch/powerpc/sysdev/fsl_msi.h |7 ++--
 arch/powerpc/sysdev/fsl_pci.c |   25 +++
 3 files changed, 77 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 89548e0..7dc473f 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -23,6 +23,8 @@
 #include asm/hw_irq.h
 #include asm/ppc-pci.h
 #include asm/mpic.h
+#include asm/fsl_hcalls.h
+
 #include fsl_msi.h
 #include fsl_pci.h
 
@@ -163,11 +165,13 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int 
nvec, int type)
 */
np = of_parse_phandle(hose-dn, fsl,msi, 0);
if (np) {
-   if (of_device_is_compatible(np, fsl,mpic-msi))
+   if (of_device_is_compatible(np, fsl,mpic-msi) ||
+   of_device_is_compatible(np, fsl,vmpic-msi))
phandle = np-phandle;
else {
-   dev_err(pdev-dev, node %s has an invalid fsl,msi
-phandle\n, hose-dn-full_name);
+   dev_err(pdev-dev,
+   node %s has an invalid fsl,msi phandle %u\n,
+   hose-dn-full_name, np-phandle);
return -EINVAL;
}
}
@@ -196,16 +200,14 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int 
nvec, int type)
 
if (hwirq  0) {
rc = hwirq;
-   pr_debug(%s: fail allocating msi interrupt\n,
-   __func__);
+   dev_err(pdev-dev, could not allocate MSI 
interrupt\n);
goto out_free;
}
 
virq = irq_create_mapping(msi_data-irqhost, hwirq);
 
if (virq == NO_IRQ) {
-   pr_debug(%s: fail mapping hwirq 0x%x\n,
-   __func__, hwirq);
+   dev_err(pdev-dev, fail mapping hwirq %i\n, hwirq);
msi_bitmap_free_hwirqs(msi_data-bitmap, hwirq, 1);
rc = -ENOSPC;
goto out_free;
@@ -234,6 +236,7 @@ static void fsl_msi_cascade(unsigned int irq, struct 
irq_desc *desc)
u32 intr_index;
u32 have_shift = 0;
struct fsl_msi_cascade_data *cascade_data;
+   unsigned int ret;
 
cascade_data = irq_get_handler_data(irq);
msi_data = cascade_data-msi_data;
@@ -265,6 +268,14 @@ static void fsl_msi_cascade(unsigned int irq, struct 
irq_desc *desc)
case FSL_PIC_IP_IPIC:
msir_value = fsl_msi_read(msi_data-msi_regs, msir_index * 0x4);
break;
+   case FSL_PIC_IP_VMPIC:
+   ret = fh_vmpic_get_msir(virq_to_hw(irq), msir_value);
+   if (ret) {
+   pr_err(fsl-msi: fh_vmpic_get_msir() failed for 
+  irq %u (ret=%u)\n, irq, ret);
+   msir_value = 0;
+   }
+   break;
}
 
while (msir_value) {
@@ -282,6 +293,7 @@ static void fsl_msi_cascade(unsigned int irq, struct 
irq_desc *desc)
 
switch (msi_data-feature  FSL_PIC_IP_MASK) {
case FSL_PIC_IP_MPIC:
+   case FSL_PIC_IP_VMPIC:
chip-irq_eoi(idata);
break;
case FSL_PIC_IP_IPIC:
@@ -311,7 +323,8 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
}
if (msi-bitmap.bitmap)
msi_bitmap_free(msi-bitmap);
-   iounmap(msi-msi_regs);
+   if ((msi-feature  FSL_PIC_IP_MASK) != FSL_PIC_IP_VMPIC)
+   iounmap(msi-msi_regs);
kfree(msi);
 
return 0;
@@ -383,26 +396,32 @@ static int __devinit fsl_of_msi_probe(struct 
platform_device *dev)
goto error_out;
}
 
-   /* Get the MSI reg base */
-   err = of_address_to_resource(dev-dev.of_node, 0, res);
-   if (err) {
-   dev_err(dev-dev, %s resource error!\n,
+   /*
+* Under the Freescale hypervisor, the msi nodes don't have a 'reg'
+* property.  Instead, we use hypercalls to access the MSI.
+*/
+   if ((features-fsl_pic_ip  FSL_PIC_IP_MASK) != FSL_PIC_IP_VMPIC) {
+   err = of_address_to_resource(dev-dev.of_node, 0, res);
+   if (err) {
+   dev_err(dev-dev, invalid resource for node %s\n,
dev-dev.of_node-full_name);
-   goto error_out;
-   }
+   goto error_out;
+   }
 
-   msi-msi_regs = 

Re: [PATCH] powerpc/fsl: add MSI support for the Freescale hypervisor

2011-12-12 Thread Scott Wood
On 12/12/2011 05:37 PM, Timur Tabi wrote:
 @@ -205,6 +207,29 @@ static void __init setup_pci_atmu(struct pci_controller 
 *hose,
  
   /* Setup inbound mem window */
   mem = memblock_end_of_DRAM();
 +
 + /*
 +  * The msi-address-64 property, if it exists, indicates the physical
 +  * address of the MSIIR register.  Normally, this register is located
 +  * inside CCSR, so the ATMU that covers all of CCSR is used for MSIs.
 +  * But if this property exists, then we'll normally need to create a
 +  * new ATMU for it.  For now, however, we cheat.  The only entity that
 +  * creates this property is the Freescale hypervisor, and it
 +  * always locates MSIIR in the page immediately after the end of DDR.
 +  * So we can avoid allocating a new ATMU by just extending the DDR
 +  * ATMU by one page.
 +  */

Technically, it's up to the hv config file where MSIIR gets mapped.
After main memory is just a common way of configuring it, but won't work
if we're limiting the partition's memory to end at an unusual address.

Might also want to comment that the reason for this weird remapping is
hardware limitations in the IOMMU.

-Scott

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc/fsl: add MSI support for the Freescale hypervisor

2011-12-12 Thread Tabi Timur-B04825
Scott Wood wrote:
 Technically, it's up to the hv config file where MSIIR gets mapped.
 After main memory is just a common way of configuring it, but won't work
 if we're limiting the partition's memory to end at an unusual address.

I'll change the comment to reflect this.

Why can't we have the hypervisor always put MSIIR at the end of DDR, and 
not make it configurable?

-- 
Timur Tabi
Linux kernel developer at Freescale
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc/fsl: add MSI support for the Freescale hypervisor

2011-12-12 Thread Scott Wood
On 12/12/2011 06:27 PM, Tabi Timur-B04825 wrote:
 Scott Wood wrote:
 Technically, it's up to the hv config file where MSIIR gets mapped.
 After main memory is just a common way of configuring it, but won't work
 if we're limiting the partition's memory to end at an unusual address.
 
 I'll change the comment to reflect this.
 
 Why can't we have the hypervisor always put MSIIR at the end of DDR, and 
 not make it configurable?

...but won't work if we're limiting the partition's memory to end at an
unusual address.  We have to live with PAMU's iova limitations.  PAMU
setup is user-controlled in general under Topaz.

How's the hypervisor even going to know if the mem= kernel command line
argument is used to change the end of main memory (assuming that's been
taken into account by this point in the boot sequence)?

What if the user put a shared memory region immediately after the main
partition memory?

-Scott

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc/fsl: add MSI support for the Freescale hypervisor

2011-12-12 Thread Tabi Timur-B04825
Scott Wood wrote:
 How's the hypervisor even going to know if the mem= kernel command line
 argument is used to change the end of main memory (assuming that's been
 taken into account by this point in the boot sequence)?

 What if the user put a shared memory region immediately after the main
 partition memory?

Alright, I'll need to add support for detached MSIIR addresses, but for 
now I think this patch is okay.  It's the same level of functionality that 
we provide on the SDK.

-- 
Timur Tabi
Linux kernel developer at Freescale
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 01/16 v3] pmac_zilog: fix unexpected irq

2011-12-12 Thread Finn Thain

On Tue, 13 Dec 2011, Benjamin Herrenschmidt wrote:

 On Tue, 2011-12-13 at 00:34 +1100, Finn Thain wrote:
  On Mon, 12 Dec 2011, Benjamin Herrenschmidt wrote:
  
   Any chance you can test this patch ? I would not be surprised if it 
   broke m68k since I had to do some of the changes in there blind, 
   so let me know... with this, I can again suspend/resume properly on 
   a Pismo while using the internal modem among other things.
  
  The patch works on a PowerBook 520 given a few changes (below). This 
  PowerBook only has one serial port that I can test (the internal modem 
  is not supported on 68k Macs).
 
 Interesting. The modem is a soft-modem geoport or a hw serial modem ? 

It's the latter.

 In the later case it's probably just a matter of finding the right GPIO 
 bit in Apple ASIC to turn the power on :-)

Surely feasible, but not high on the list of missing hardware support.

 
   Can you test a machine with two ports? The rest of my Mac hardware is 
  in storage since I moved house last week.
 
 I tried on 2 port powermacs, but I only have one adapter, so I've 
 basically been running with one serial port open and shooting irda frame 
 on the other (with nothing to check wether I got the frames on the other 
 hand), oh well ...
 
 I'll apply your patch and commit via my tree.

I forgot to include this fix for your logging change.

Finn

Index: linux-git/drivers/tty/serial/pmac_zilog.c
===
--- linux-git.orig/drivers/tty/serial/pmac_zilog.c  2011-12-13 
12:12:05.0 +1100
+++ linux-git/drivers/tty/serial/pmac_zilog.c   2011-12-13 12:13:29.0 
+1100
@@ -99,6 +99,10 @@ MODULE_LICENSE(GPL);
 #define PMACZILOG_NAME ttyPZ
 #endif
 
+#define pmz_debug(fmt, arg...) pr_debug(PMACZILOG_NAME %d:  fmt, 
uap-port.line, ## arg)
+#define pmz_error(fmt, arg...) pr_err(PMACZILOG_NAME %d:  fmt, 
uap-port.line, ## arg)
+#define pmz_info(fmt, arg...)  pr_info(PMACZILOG_NAME %d:  fmt, 
uap-port.line, ## arg)
+
 
 /*
  * For the sake of early serial console, we can do a pre-probe
Index: linux-git/drivers/tty/serial/pmac_zilog.h
===
--- linux-git.orig/drivers/tty/serial/pmac_zilog.h  2011-12-13 
12:12:05.0 +1100
+++ linux-git/drivers/tty/serial/pmac_zilog.h   2011-12-13 12:12:28.0 
+1100
@@ -1,10 +1,6 @@
 #ifndef __PMAC_ZILOG_H__
 #define __PMAC_ZILOG_H__
 
-#define pmz_debug(fmt, arg...) pr_debug(ttyPZ%d:  fmt, uap-port.line, ## 
arg)
-#define pmz_error(fmt, arg...) pr_err(ttyPZ%d:  fmt, uap-port.line, ## arg)
-#define pmz_info(fmt, arg...)  pr_info(ttyPZ%d:  fmt, uap-port.line, ## arg)
-
 /*
  * At most 2 ESCCs with 2 ports each
  */
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [UPDATED] [PATCH v4 3/7] [ppc] Process dynamic relocations for kernel

2011-12-12 Thread Suzuki Poulose

On 12/11/11 01:32, Segher Boessenkool wrote:

Hi Suzuki,

Looks quite good, a few comments...


+get_type:
+ /* r4 holds the relocation type */
+ extrwi r4, r4, 8, 24 /* r4 = ((char*)r4)[3] */


This comment is confusing (only makes sense together with the
lwz a long way up).


Agree, will fix them.



+nxtrela:
+ /*
+ * We have to flush the modified instructions to the
+ * main storage from the d-cache. And also, invalidate the
+ * cached instructions in i-cache which has been modified.
+ *
+ * We delay the msync / isync operation till the end, since
+ * we won't be executing the modified instructions until
+ * we return from here.
+ */
+ dcbst r4,r7
+ icbi r4,r7


You still need a sync between these two. Without it, the icbi can
complete before the dcbst for the same address does, which leaves
room for an instruction fetch from that address to get old data.


Ok.

+ cmpwi r8, 0 /* relasz = 0 ? */
+ ble done
+ add r9, r9, r6 /* move to next entry in the .rela table */
+ subf r8, r6, r8 /* relasz -= relaent */
+ b applyrela
+
+done:
+ msync /* Wait for the flush to finish */


The instruction is called sync. msync is a BookE thing.


next if (/R_PPC64_RELATIVE/ or /R_PPC64_NONE/ or
/R_PPC64_ADDR64\s+mach_/);
+ next if (/R_PPC_ADDR16_LO/ or /R_PPC_ADDR16_HI/ or
+ /R_PPC_ADDR16_HA/ or /R_PPC_RELATIVE/);


Nothing new, but these should probably have \b or \s or just
a space on each side.

Will fix this too. Also will include the R_PPC_NONE to the list
of valid relocations.

Thanks
Suzuki





Segher

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip

2011-12-12 Thread LiuShuo

于 2011年12月13日 05:30, Scott Wood 写道:

On 12/12/2011 03:19 PM, Artem Bityutskiy wrote:

On Mon, 2011-12-12 at 15:15 -0600, Scott Wood wrote:

NAND chips come from the factory with bad blocks marked at a certain
offset into each page.  This offset is normally in the OOB area, but
since we change the layout from 4k data, 128 byte oob to 2k data, 64
byte oob, 2k data, 64 byte oob the marker is no longer in the oob.  On
first use we need to migrate the markers so that they are still in the oob.

Ah, I see, thanks. Are you planning to implement in-kernel migration or
use a user-space tool?

That's the kind of answer I was hoping to get from Shuo. :-)

OK, I try to do this. Wait for a couple of days.

-LiuShuo

Most likely is a firmware-based tool, but I'd like there to be some way
for the tool to mark that this has happened, so that the Linux driver
can refuse to do non-raw accesses to a chip that isn't marked as having
been migrated (or at least yell loudly in the log).

Speaking of raw accesses, these are currently broken in the eLBC
driver... we need some way for the generic layer to tell us what kind of
access it is before the transaction starts, not once it wants to read
out the buffer (unless we add more hacks to delay the start of a read
transaction until first buffer access...).  We'd be better off with a
high-level read page/write page function that does the whole thing
(not just buffer access, but command issuance as well).

-Scott



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc: Fix swiotlb ops for ppc64

2011-12-12 Thread Becky Bruce

On Dec 7, 2011, at 11:46 PM, Kumar Gala wrote:

 
 On Dec 7, 2011, at 9:23 PM, Benjamin Herrenschmidt wrote:
 
 On Wed, 2011-12-07 at 11:19 -0600, Kumar Gala wrote:
 
 struct dma_map_ops swiotlb_dma_ops = {
 +#ifdef CONFIG_PPC64
 +   .alloc_coherent = swiotlb_alloc_coherent,
 +   .free_coherent = swiotlb_free_coherent,
 +#else
 .alloc_coherent = dma_direct_alloc_coherent,
 .free_coherent = dma_direct_free_coherent,
 +#endif
 .map_sg = swiotlb_map_sg_attrs,
 .unmap_sg = swiotlb_unmap_sg_attrs,
 .dma_supported = swiotlb_dma_supported,
 
 Do we really need the ifdef ? What happens if we use
 swiotlb_alloc_coherent() on ppc32 ? Won't it allocate lowmem, realize
 that it doesn't need bouncing and be happy ?
 
 Cheers,
 Ben.
 
 Becky any comment?
 
 I know its been a while, but wondering if you had any reason to not do what 
 Ben's suggesting ?

Well, as you say, it's been a while, and but I think:

1) dma_direct_alloc_coherent strips GFP_HIGHMEM out of the flags field when 
calling the actual allocator and the iotlb version does not.  I don't know how 
much this matters - I did a quick grep and I don't see any users that specify 
that, but somebody went through the trouble of putting it in there in the first 
place and without knowing why I wasn't willing to get rid of it.  Now, since my 
patch it looks like someone added a VM_BUG_ON into __get_free_pages() if 
GFP_HIGHMEM so this would get caught.  However, I don't know if we really want 
to throw a bug there.

2)  The iotlb code doesn't deal with the !coherent parts like 8xx.  We can work 
around that by setting up the dma_ops differently for that case instead.

-Becky

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: Fix swiotlb ops for ppc64

2011-12-12 Thread Benjamin Herrenschmidt
On Mon, 2011-12-12 at 21:55 -0600, Becky Bruce wrote:
 1) dma_direct_alloc_coherent strips GFP_HIGHMEM out of the flags field
 when calling the actual allocator and the iotlb version does not.  I
 don't know how much this matters - I did a quick grep and I don't see
 any users that specify that, but somebody went through the trouble of
 putting it in there in the first place and without knowing why I
 wasn't willing to get rid of it.  Now, since my patch it looks like
 someone added a VM_BUG_ON into __get_free_pages() if GFP_HIGHMEM so
 this would get caught.  However, I don't know if we really want to
 throw a bug there.
 
 2)  The iotlb code doesn't deal with the !coherent parts like 8xx.  We
 can work around that by setting up the dma_ops differently for that
 case instead.

Does the rest of it handle them ? I mean swiotlb_map_sg_attrs etc...

If not then it's broken anyway so may as well not care for now.

Cheers,
Ben.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/4] ppc32/kprobe: complete kprobe and migrate exception frame

2011-12-12 Thread tiejun.chen
Benjamin Herrenschmidt wrote:
 On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote:
 We can't emulate stwu since that may corrupt current exception stack.
 So we will have to do real store operation in the exception return code.

 Firstly we'll allocate a trampoline exception frame below the kprobed
 function stack and copy the current exception frame to the trampoline.
 Then we can do this real store operation to implement 'stwu', and reroute
 the trampoline frame to r1 to complete this exception migration.

 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
 ---
  arch/powerpc/kernel/entry_32.S |   26 ++
  1 files changed, 26 insertions(+), 0 deletions(-)

 diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
 index 56212bc..d56e311 100644
 --- a/arch/powerpc/kernel/entry_32.S
 +++ b/arch/powerpc/kernel/entry_32.S
 @@ -1185,6 +1185,8 @@ recheck:
  bne-do_resched
  andi.   r0,r9,_TIF_USER_WORK_MASK
  beq restore_user
 +andis.  r0,r9,_TIF_DELAYED_KPROBE@h
 +bne-restore_kprobe
 
 Same comment as earlier about name. Note that you're not hooking in the
 right place. recheck is only reached if you -already- went out of the
 normal exit path and only when going back to user space unless I'm
 missing something (which is really the case you don't care about).
 
 You need to hook into resume_kernel instead.

Maybe I'm misunderstanding what you mean since as I recall you suggestion we
should do this at the end of do_work.

 
 Also, we may want to simplify the whole thing, instead of checking user
 vs. kernel first etc... we could instead have a single _TIF_WORK_MASK
 which includes both the bits for user work and the new bit for kernel
 work. With preempt, the kernel work bits would also include
 _TIF_NEED_RESCHED.
 
 Then you have in the common exit path, a single test for that, with a
 fast path that skips everything and just goes to restore for both
 kernel and user.
 
 The only possible issue is the setting of dbcr0 for BookE and 44x and we
 can keep that as a special case keyed of MSR_PR in the resume path under
 ifdef BOOKE (we'll probably sanitize that later with some different
 rework anyway). 
 
 So the exit path because something like:
 
 ret_from_except:
   .. hard disable interrupts (unchanged) ...
   read TIF flags
   andi with _TIF_WORK_MASK
   nothing set - restore
   check PR
   set - do_work_user
   no set - do_work_kernel (kprobes  preempt)
   (both loop until relevant _TIF flags are all clear)
 restore:
   #ifdef BOOKE  44x test PR  do dbcr0 stuff if needed
   ... nornal restore ...

Do you mean we should reorganize current ret_from_except for ppc32 as well?

 
  do_user_signal: /* r10 contains MSR_KERNEL here */
  ori r10,r10,MSR_EE
  SYNC
 @@ -1202,6 +1204,30 @@ do_user_signal:   /* r10 contains 
 MSR_KERNEL here */
  REST_NVGPRS(r1)
  b   recheck
  
 +restore_kprobe:
 +lwz r3,GPR1(r1)
 +subir3,r3,INT_FRAME_SIZE; /* Allocate a trampoline exception frame 
 */
 +mr  r4,r1
 +bl  copy_exc_stack  /* Copy from the original to the trampoline */
 +
 +/* Do real stw operation to complete stwu */
 +mr  r4,r1
 +addir4,r4,INT_FRAME_SIZE/* Get kprobed entry */
 +lwz r5,GPR1(r1) /* Backup r1 */
 +stw r4,GPR1(r1) /* Now store that safely */
 
 The above confuses me. Shouldn't you do instead something like
 
   lwz r4,GPR1(r1)
   subir3,r4,INT_FRAME_SIZE
   li  r5,INT_FRAME_SIZE
   bl  memcpy
 

Anyway I'll try this if you think memcpy is fine/safe in exception return codes.

 To start with, then you need to know the old r1 value which may or may
 not be related to your current r1. The emulation code should stash it

If the old r1 is not related to our current r1, it shouldn't be possible to go
restore_kprob since we set that new flag only for the current.

If I'm wrong please correct me :)

Thanks
Tiejun

 into the int frame in an unused slot such as orig_gpr3 (since that
 only pertains to restarting syscalls which we aren't doing here).
 
 Then you adjust your r1 and do something like
 
   lwz r3,GPR1(r1)
   lwz r0,ORIG_GPR3(r1)
   stw r0,0(r3)
 
 To perform the store, before doing the rest:
  
 +/* Reroute the trampoline frame to r1 */
 +subir5,r5,INT_FRAME_SIZE
 +mr  r1,r5
 +
 +/* Clear _TIF_DELAYED_KPROBE flag */
 +rlwinm  r9,r1,0,0,(31-THREAD_SHIFT)
 +lwz r0,TI_FLAGS(r9)
 +rlwinm  r0,r0,0,_TIF_DELAYED_KPROBE
 +stw r0,TI_FLAGS(r9)
 +
 +b   restore
 +
  /*
   * We come here when we are at the end of handling an exception
   * that occurred at a place where taking an exception will lose
___
Linuxppc-dev mailing list

Re: [PATCH 1/4] powerpc/kprobe: introduce a new thread flag

2011-12-12 Thread tiejun.chen
Benjamin Herrenschmidt wrote:
 On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote:
 We need to add a new thread flag, TIF_KPROBE/_TIF_DELAYED_KPROBE,
 for handling kprobe operation while exiting exception.
 
 The basic idea is sane, however the instruction emulation isn't per-se
 kprobe specific. It could be used by xmon too for example. I'd rather
 use a different name, something like TIF_EMULATE_STACK_STORE or

Its good term so I'll use this directly :)

Thanks
Tiejun

 something like that.
 
 Cheers,
 Ben.
 
 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
 ---
  arch/powerpc/include/asm/thread_info.h |2 ++
  1 files changed, 2 insertions(+), 0 deletions(-)

 diff --git a/arch/powerpc/include/asm/thread_info.h 
 b/arch/powerpc/include/asm/thread_info.h
 index 836f231..3378734 100644
 --- a/arch/powerpc/include/asm/thread_info.h
 +++ b/arch/powerpc/include/asm/thread_info.h
 @@ -112,6 +112,7 @@ static inline struct thread_info 
 *current_thread_info(void)
  #define TIF_FREEZE  14  /* Freezing for suspend */
  #define TIF_SYSCALL_TRACEPOINT  15  /* syscall tracepoint 
 instrumentation */
  #define TIF_RUNLATCH16  /* Is the runlatch enabled? */
 +#define TIF_KPROBE  17  /* Is the delayed kprobe operation? */
  
  /* as above, but as bit values */
  #define _TIF_SYSCALL_TRACE  (1TIF_SYSCALL_TRACE)
 @@ -130,6 +131,7 @@ static inline struct thread_info 
 *current_thread_info(void)
  #define _TIF_FREEZE (1TIF_FREEZE)
  #define _TIF_SYSCALL_TRACEPOINT (1TIF_SYSCALL_TRACEPOINT)
  #define _TIF_RUNLATCH   (1TIF_RUNLATCH)
 +#define _TIF_DELAYED_KPROBE (1TIF_KPROBE)
  #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
   _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
  
 
 
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/4] ppc32/kprobe: introduce copy_exc_stack

2011-12-12 Thread tiejun.chen
Benjamin Herrenschmidt wrote:
 On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote:
 We need a copy mechanism to migrate exception stack. But looks copy_page()
 already implement this well so we can complete copy_exc_stack() based on
 that directly.
 
 I'd rather you don't hijack copy_page which is quite sensitive. The
 emulation isn't performance critical so a dumber routine would work

Yes, I just think we should introduce good performance so I 'steal' the original
copy_page().

 fine.
 
 Why not use memcpy ? You can call it from assembly.

I'd like to switch to memcpy.

Thanks
Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/1] ppc64: fix missing to check all bits of _TIF_USER_WORK_MASK in preempt

2011-12-12 Thread tiejun.chen
Tiejun Chen wrote:
 In entry_64.S version of ret_from_except_lite, you'll notice that
 in the !preempt case, after we've checked MSR_PR we test for any
 TIF flag in _TIF_USER_WORK_MASK to decide whether to go to do_work
 or not. However, in the preempt case, we do a convoluted trick to
 test SIGPENDING only if PR was set and always test NEED_RESCHED ...
 but we forget to test any other bit of _TIF_USER_WORK_MASK !!! So
 that means that with preempt, we completely fail to test for things
 like single step, syscall tracing, etc...
 
 This should be fixed as the following path:
 
  - Test PR. If set, go to test_work_user, else continue.
 
  - In test_work_user, always test for _TIF_USER_WORK_MASK to decide to
 go to do_work, maybe call it do_user_work
 
  - In test_work_kernel, test for _TIF_KERNEL_WORK_MASK which is set to
 our new flag along with NEED_RESCHED if preempt is enabled and branch to
 do_kernel_work.

Ben,

Any comment for this?

Tiejun

 
 Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
 ---
  arch/powerpc/kernel/entry_64.S |   33 +++--
  1 files changed, 15 insertions(+), 18 deletions(-)
 
 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
 index d834425..9e70b9a 100644
 --- a/arch/powerpc/kernel/entry_64.S
 +++ b/arch/powerpc/kernel/entry_64.S
 @@ -571,27 +571,26 @@ _GLOBAL(ret_from_except_lite)
   mtmsrd  r9,1/* Update machine state */
  #endif /* CONFIG_PPC_BOOK3E */
  
 -#ifdef CONFIG_PREEMPT
 - clrrdi  r9,r1,THREAD_SHIFT  /* current_thread_info() */
 - li  r0,_TIF_NEED_RESCHED/* bits to check */
 - ld  r3,_MSR(r1)
 - ld  r4,TI_FLAGS(r9)
 - /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */
 - rlwimi  r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING
 - and.r0,r4,r0/* check NEED_RESCHED and maybe SIGPENDING */
 - bne do_work
 -
 -#else /* !CONFIG_PREEMPT */
   ld  r3,_MSR(r1) /* Returning to user mode? */
   andi.   r3,r3,MSR_PR
 - beq restore /* if not, just restore regs and return */
 + bne test_work_user
  
 + clrrdi  r9,r1,THREAD_SHIFT  /* current_thread_info() */
 + li  r0,_TIF_USER_WORK_MASK
 +#ifdef CONFIG_PREEMPT
 + ori r0,r0,_TIF_NEED_RESCHED
 +#endif
 + ld  r4,TI_FLAGS(r9)
 + and.r0,r4,r0/* check NEED_RESCHED and maybe 
 _TIF_USER_WORK_MASK */
 + bne do_kernel_work
 + b   restore /* if so, just restore regs and return */
 +
 +test_work_user:
   /* Check current_thread_info()-flags */
   clrrdi  r9,r1,THREAD_SHIFT
   ld  r4,TI_FLAGS(r9)
   andi.   r0,r4,_TIF_USER_WORK_MASK
 - bne do_work
 -#endif
 + bne do_user_work
  
  restore:
  BEGIN_FW_FTR_SECTION
 @@ -693,10 +692,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
   b   .ret_from_except_lite   /* loop back and handle more */
  #endif
  
 -do_work:
 +do_kernel_work:
  #ifdef CONFIG_PREEMPT
 - andi.   r0,r3,MSR_PR/* Returning to user mode? */
 - bne user_work
   /* Check that preempt_count() == 0 and interrupts are enabled */
   lwz r8,TI_PREEMPT(r9)
   cmpwi   cr1,r8,0
 @@ -738,9 +735,9 @@ do_work:
   bne 1b
   b   restore
  
 -user_work:
  #endif /* CONFIG_PREEMPT */
  
 +do_user_work:
   /* Enable interrupts */
  #ifdef CONFIG_PPC_BOOK3E
   wrteei  1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/1] ppc64: fix missing to check all bits of _TIF_USER_WORK_MASK in preempt

2011-12-12 Thread Benjamin Herrenschmidt
On Tue, 2011-12-13 at 13:01 +0800, tiejun.chen wrote:
 Tiejun Chen wrote:
  In entry_64.S version of ret_from_except_lite, you'll notice that
  in the !preempt case, after we've checked MSR_PR we test for any
  TIF flag in _TIF_USER_WORK_MASK to decide whether to go to do_work
  or not. However, in the preempt case, we do a convoluted trick to
  test SIGPENDING only if PR was set and always test NEED_RESCHED ...
  but we forget to test any other bit of _TIF_USER_WORK_MASK !!! So
  that means that with preempt, we completely fail to test for things
  like single step, syscall tracing, etc...
  
  This should be fixed as the following path:
  
   - Test PR. If set, go to test_work_user, else continue.
  
   - In test_work_user, always test for _TIF_USER_WORK_MASK to decide to
  go to do_work, maybe call it do_user_work
  
   - In test_work_kernel, test for _TIF_KERNEL_WORK_MASK which is set to
  our new flag along with NEED_RESCHED if preempt is enabled and branch to
  do_kernel_work.
 
 Ben,
 
 Any comment for this?

Sorry, I didn't get to review that one yet (nor reply to your newer
responses), I have very sore eyes and basically had to get off the
computer. Hopefully I'll be better tomorrow.

Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/1] ppc64: fix missing to check all bits of _TIF_USER_WORK_MASK in preempt

2011-12-12 Thread tiejun.chen
Benjamin Herrenschmidt wrote:
 On Tue, 2011-12-13 at 13:01 +0800, tiejun.chen wrote:
 Tiejun Chen wrote:
 In entry_64.S version of ret_from_except_lite, you'll notice that
 in the !preempt case, after we've checked MSR_PR we test for any
 TIF flag in _TIF_USER_WORK_MASK to decide whether to go to do_work
 or not. However, in the preempt case, we do a convoluted trick to
 test SIGPENDING only if PR was set and always test NEED_RESCHED ...
 but we forget to test any other bit of _TIF_USER_WORK_MASK !!! So
 that means that with preempt, we completely fail to test for things
 like single step, syscall tracing, etc...

 This should be fixed as the following path:

  - Test PR. If set, go to test_work_user, else continue.

  - In test_work_user, always test for _TIF_USER_WORK_MASK to decide to
 go to do_work, maybe call it do_user_work

  - In test_work_kernel, test for _TIF_KERNEL_WORK_MASK which is set to
 our new flag along with NEED_RESCHED if preempt is enabled and branch to
 do_kernel_work.
 Ben,

 Any comment for this?
 
 Sorry, I didn't get to review that one yet (nor reply to your newer

I'm nothing, please do this when you're fine completely.

Thanks
Tiejun

 responses), I have very sore eyes and basically had to get off the
 computer. Hopefully I'll be better tomorrow.
 
 Cheers,
 Ben.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc: Fix comment explaining our VSID layout

2011-12-12 Thread Anton Blanchard

We support 16TB of user address space and half a million contexts
so update the comment to reflect this.

Signed-off-by: Anton Blanchard an...@samba.org
---

Index: linux-powerpc/arch/powerpc/include/asm/mmu-hash64.h
===
--- linux-powerpc.orig/arch/powerpc/include/asm/mmu-hash64.h2011-12-13 
14:47:14.498301148 +1100
+++ linux-powerpc/arch/powerpc/include/asm/mmu-hash64.h 2011-12-13 
14:58:01.085510915 +1100
@@ -312,10 +312,9 @@ extern void slb_set_size(u16 size);
  * (i.e. everything above 0xC000), except the very top
  * segment, which simplifies several things.
  *
- * - We allow for 15 significant bits of ESID and 20 bits of
- * context for user addresses.  i.e. 8T (43 bits) of address space for
- * up to 1M contexts (although the page table structure and context
- * allocation will need changes to take advantage of this).
+ * - We allow for 16 significant bits of ESID and 19 bits of
+ * context for user addresses.  i.e. 16T (44 bits) of address space for
+ * up to half a million contexts.
  *
  * - The scramble function gives robust scattering in the hash
  * table (at least based on some initial results).  The previous
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 01/19] mxc_udc: add workaround for ENGcm09152 for i.MX25

2011-12-12 Thread Eric Bénard
this patch gives the possibility to workaround bug ENGcm09152
on i.MX25 when the hardware workaround is also implemented on
the board.
It covers the workaround described on page 42 of the following Errata :
http://cache.freescale.com/files/dsp/doc/errata/IMX25CE.pdf

Signed-off-by: Eric Bénard e...@eukrea.com
Cc: Sascha Hauer ker...@pengutronix.de
Cc: Greg Kroah-Hartman gre...@suse.de
Cc: Li Yang le...@freescale.com
---
 drivers/usb/gadget/fsl_mxc_udc.c |   22 +-
 1 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/gadget/fsl_mxc_udc.c b/drivers/usb/gadget/fsl_mxc_udc.c
index dcbc0a2..4aff05d 100644
--- a/drivers/usb/gadget/fsl_mxc_udc.c
+++ b/drivers/usb/gadget/fsl_mxc_udc.c
@@ -23,7 +23,7 @@
 static struct clk *mxc_ahb_clk;
 static struct clk *mxc_usb_clk;
 
-/* workaround ENGcm09152 for i.MX35 */
+/* workaround ENGcm09152 for i.MX25/35 */
 #define USBPHYCTRL_OTGBASE_OFFSET  0x608
 #define USBPHYCTRL_EVDO(1  23)
 
@@ -89,16 +89,20 @@ eenahb:
 void fsl_udc_clk_finalize(struct platform_device *pdev)
 {
struct fsl_usb2_platform_data *pdata = pdev-dev.platform_data;
-   if (cpu_is_mx35()) {
+   if (cpu_is_mx25() || cpu_is_mx35()) {
unsigned int v;
-
-   /* workaround ENGcm09152 for i.MX35 */
+   void __iomem *otgbase;
+   if (cpu_is_mx25())
+   otgbase = MX25_IO_ADDRESS(MX25_USB_BASE_ADDR +
+   USBPHYCTRL_OTGBASE_OFFSET);
+   else if (cpu_is_mx35())
+   otgbase = MX35_IO_ADDRESS(MX35_USB_BASE_ADDR +
+   USBPHYCTRL_OTGBASE_OFFSET);
+
+   /* workaround ENGcm09152 for i.MX25/35 */
if (pdata-workaround  FLS_USB2_WORKAROUND_ENGCM09152) {
-   v = readl(MX35_IO_ADDRESS(MX35_USB_BASE_ADDR +
-   USBPHYCTRL_OTGBASE_OFFSET));
-   writel(v | USBPHYCTRL_EVDO,
-   MX35_IO_ADDRESS(MX35_USB_BASE_ADDR +
-   USBPHYCTRL_OTGBASE_OFFSET));
+   v = readl(otgbase);
+   writel(v | USBPHYCTRL_EVDO, otgbase);
}
}
 
-- 
1.7.6.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Linux port availability for p5010 processor

2011-12-12 Thread Vineeth
Do we have a linux port available for freescale P5010 processor (with
single E5500 core) ?
*(found arch/powerpc/platforms/pseries ; and a some details on
kernel/cputable.c *)

Is there any reference board which uses this processor ? any reference in
DTS file also will be helpful.

Thanks
Vineeth
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev