[PATCH 3/4] ppc32/kprobe: complete kprobe and migrate exception frame
We can't emulate stwu since that may corrupt current exception stack. So we will have to do real store operation in the exception return code. Firstly we'll allocate a trampoline exception frame below the kprobed function stack and copy the current exception frame to the trampoline. Then we can do this real store operation to implement 'stwu', and reroute the trampoline frame to r1 to complete this exception migration. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/entry_32.S | 26 ++ 1 files changed, 26 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 56212bc..d56e311 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1185,6 +1185,8 @@ recheck: bne-do_resched andi. r0,r9,_TIF_USER_WORK_MASK beq restore_user + andis. r0,r9,_TIF_DELAYED_KPROBE@h + bne-restore_kprobe do_user_signal:/* r10 contains MSR_KERNEL here */ ori r10,r10,MSR_EE SYNC @@ -1202,6 +1204,30 @@ do_user_signal: /* r10 contains MSR_KERNEL here */ REST_NVGPRS(r1) b recheck +restore_kprobe: + lwz r3,GPR1(r1) + subir3,r3,INT_FRAME_SIZE; /* Allocate a trampoline exception frame */ + mr r4,r1 + bl copy_exc_stack /* Copy from the original to the trampoline */ + + /* Do real stw operation to complete stwu */ + mr r4,r1 + addir4,r4,INT_FRAME_SIZE/* Get kprobed entry */ + lwz r5,GPR1(r1) /* Backup r1 */ + stw r4,GPR1(r1) /* Now store that safely */ + + /* Reroute the trampoline frame to r1 */ + subir5,r5,INT_FRAME_SIZE + mr r1,r5 + + /* Clear _TIF_DELAYED_KPROBE flag */ + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + lwz r0,TI_FLAGS(r9) + rlwinm r0,r0,0,_TIF_DELAYED_KPROBE + stw r0,TI_FLAGS(r9) + + b restore + /* * We come here when we are at the end of handling an exception * that occurred at a place where taking an exception will lose -- 1.5.6 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/4] powerpc/kprobe: introduce a new thread flag
We need to add a new thread flag, TIF_KPROBE/_TIF_DELAYED_KPROBE, for handling kprobe operation while exiting exception. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/include/asm/thread_info.h |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 836f231..3378734 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -112,6 +112,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_FREEZE 14 /* Freezing for suspend */ #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ #define TIF_RUNLATCH 16 /* Is the runlatch enabled? */ +#define TIF_KPROBE 17 /* Is the delayed kprobe operation? */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1TIF_SYSCALL_TRACE) @@ -130,6 +131,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_FREEZE(1TIF_FREEZE) #define _TIF_SYSCALL_TRACEPOINT(1TIF_SYSCALL_TRACEPOINT) #define _TIF_RUNLATCH (1TIF_RUNLATCH) +#define _TIF_DELAYED_KPROBE(1TIF_KPROBE) #define _TIF_SYSCALL_T_OR_A(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT) -- 1.5.6 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/4] ppc32/kprobe: introduce copy_exc_stack
We need a copy mechanism to migrate exception stack. But looks copy_page() already implement this well so we can complete copy_exc_stack() based on that directly. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/include/asm/page_32.h |1 + arch/powerpc/kernel/misc_32.S | 16 +++- arch/powerpc/kernel/ppc_ksyms.c|1 + 3 files changed, 17 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index 68d73b2..2c1fd84 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -40,6 +40,7 @@ struct page; extern void clear_pages(void *page, int order); static inline void clear_page(void *page) { clear_pages(page, 0); } extern void copy_page(void *to, void *from); +extern void copy_exc_stack(void *to, void *from); #include asm-generic/getorder.h diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 998a100..aa02545 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -527,7 +527,7 @@ _GLOBAL(clear_pages) stw r8,12(r3); \ stwur9,16(r3) -_GLOBAL(copy_page) +ready_copy: addir3,r3,-4 addir4,r4,-4 @@ -544,7 +544,21 @@ _GLOBAL(copy_page) dcbtr5,r4 li r11,L1_CACHE_BYTES+4 #endif /* MAX_COPY_PREFETCH */ + blr + +_GLOBAL(copy_exc_stack) + mflrr12 + bl ready_copy + mtlrr12 + li r0,INT_FRAME_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH + b go_copy + +_GLOBAL(copy_page) + mflrr12 + bl ready_copy + mtlrr12 li r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH +go_copy: crclr 4*cr0+eq 2: mtctr r0 diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index f5ae872..2223daf 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -88,6 +88,7 @@ EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__strnlen_user); EXPORT_SYMBOL(copy_page); +EXPORT_SYMBOL(copy_exc_stack); #if defined(CONFIG_PCI) defined(CONFIG_PPC32) EXPORT_SYMBOL(isa_io_base); -- 1.5.6 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
ppc32/kprobe: Fix a bug for kprobe stwu r1
ppc32/kprobe: Fix a bug for kprobe stwu r1 There patches is used to fix that known kprobe bug, [BUG?]3.0-rc4+ftrace+kprobe: set kprobe at instruction 'stwu' lead to system crash/freeze https://lkml.org/lkml/2011/7/3/156 We withdraw the original way to provide a dedicated exception stack. Now we implement this based on Ben's suggestion: https://lkml.org/lkml/2011/11/30/327 Here I fix this bug only for ppc32 since Ben address another problem in ppc64 exception return codes. So I think I'd better send another patch to fix this bug issued from ppc64 firstly. Then its convenient to merge this fix into ppc64. Tiejun Chen (4): powerpc/kprobe: introduce a new thread flag ppc32/kprobe: introduce copy_exc_stack ppc32/kprobe: complete kprobe and migrate exception frame ppc32/kprobe: don't emulate store when kprobe stwu r1 arch/powerpc/include/asm/page_32.h |1 + arch/powerpc/include/asm/thread_info.h |2 ++ arch/powerpc/kernel/entry_32.S | 26 ++ arch/powerpc/kernel/misc_32.S | 16 +++- arch/powerpc/kernel/ppc_ksyms.c|1 + arch/powerpc/lib/sstep.c | 19 +-- 6 files changed, 62 insertions(+), 3 deletions(-) Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 4/4] ppc32/kprobe: don't emulate store when kprobe stwu r1
We don't do the real store operation for kprobing 'stwu Rx,(y)R1' since this may corrupt the exception frame, now we will do this operation safely in exception return code after migrate current exception frame below the kprobed function stack. So we only update gpr[1] here and trigger a thread flag to mask this. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/lib/sstep.c | 19 +-- 1 files changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 9a52349..78b7168 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -566,7 +566,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) unsigned long int ea; unsigned int cr, mb, me, sh; int err; - unsigned long old_ra; + unsigned long old_ra, val3; long ival; opcode = instr 26; @@ -1486,10 +1486,25 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case 36:/* stw */ - case 37:/* stwu */ val = regs-gpr[rd]; err = write_mem(val, dform_ea(instr, regs), 4, regs); goto ldst_done; + case 37:/* stwu */ + val = regs-gpr[rd]; + val3 = dform_ea(instr, regs); + /* For PPC32 we always use stwu to change stack point with r1. So +* this emulated store may corrupt the exception frame, now we +* have to provide the exception frame trampoline, which is pushed +* below the kprobed function stack. So we only update gpr[1] but +* don't emulate the real store operation. We will do real store +* operation safely in exception return code by checking this flag. +*/ + if (ra == 1) { + set_thread_flag(TIF_KPROBE); + err = 0; + } else + err = write_mem(val, val3, 4, regs); + goto ldst_done; case 38:/* stb */ case 39:/* stbu */ -- 1.5.6 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/1] ppc64: fix missing to check all bits of _TIF_USER_WORK_MASK in preempt
In entry_64.S version of ret_from_except_lite, you'll notice that in the !preempt case, after we've checked MSR_PR we test for any TIF flag in _TIF_USER_WORK_MASK to decide whether to go to do_work or not. However, in the preempt case, we do a convoluted trick to test SIGPENDING only if PR was set and always test NEED_RESCHED ... but we forget to test any other bit of _TIF_USER_WORK_MASK !!! So that means that with preempt, we completely fail to test for things like single step, syscall tracing, etc... This should be fixed as the following path: - Test PR. If set, go to test_work_user, else continue. - In test_work_user, always test for _TIF_USER_WORK_MASK to decide to go to do_work, maybe call it do_user_work - In test_work_kernel, test for _TIF_KERNEL_WORK_MASK which is set to our new flag along with NEED_RESCHED if preempt is enabled and branch to do_kernel_work. Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/entry_64.S | 33 +++-- 1 files changed, 15 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d834425..9e70b9a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -571,27 +571,26 @@ _GLOBAL(ret_from_except_lite) mtmsrd r9,1/* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ -#ifdef CONFIG_PREEMPT - clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ - li r0,_TIF_NEED_RESCHED/* bits to check */ - ld r3,_MSR(r1) - ld r4,TI_FLAGS(r9) - /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */ - rlwimi r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING - and.r0,r4,r0/* check NEED_RESCHED and maybe SIGPENDING */ - bne do_work - -#else /* !CONFIG_PREEMPT */ ld r3,_MSR(r1) /* Returning to user mode? */ andi. r3,r3,MSR_PR - beq restore /* if not, just restore regs and return */ + bne test_work_user + clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ + li r0,_TIF_USER_WORK_MASK +#ifdef CONFIG_PREEMPT + ori r0,r0,_TIF_NEED_RESCHED +#endif + ld r4,TI_FLAGS(r9) + and.r0,r4,r0/* check NEED_RESCHED and maybe _TIF_USER_WORK_MASK */ + bne do_kernel_work + b restore /* if so, just restore regs and return */ + +test_work_user: /* Check current_thread_info()-flags */ clrrdi r9,r1,THREAD_SHIFT ld r4,TI_FLAGS(r9) andi. r0,r4,_TIF_USER_WORK_MASK - bne do_work -#endif + bne do_user_work restore: BEGIN_FW_FTR_SECTION @@ -693,10 +692,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) b .ret_from_except_lite /* loop back and handle more */ #endif -do_work: +do_kernel_work: #ifdef CONFIG_PREEMPT - andi. r0,r3,MSR_PR/* Returning to user mode? */ - bne user_work /* Check that preempt_count() == 0 and interrupts are enabled */ lwz r8,TI_PREEMPT(r9) cmpwi cr1,r8,0 @@ -738,9 +735,9 @@ do_work: bne 1b b restore -user_work: #endif /* CONFIG_PREEMPT */ +do_user_work: /* Enable interrupts */ #ifdef CONFIG_PPC_BOOK3E wrteei 1 -- 1.5.6 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/2] mtd/nand: fixup for fmr initialization of Freescale NAND controller
There was a bug for fmr initialization, which lead to fmr was always 0x100 in fsl_elbc_chip_init() and caused FCM command timeout before calling fsl_elbc_chip_init_tail(), now we initialize CWTO to maximum timeout value and not relying on the setting of bootloader. Signed-off-by: Shengzhou Liu shengzhou@freescale.com --- v3: add more descriptions. v2: make fmr not relying on the setting of bootloader. drivers/mtd/nand/fsl_elbc_nand.c | 10 +- 1 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c index eedd8ee..4f405a0 100644 --- a/drivers/mtd/nand/fsl_elbc_nand.c +++ b/drivers/mtd/nand/fsl_elbc_nand.c @@ -659,9 +659,7 @@ static int fsl_elbc_chip_init_tail(struct mtd_info *mtd) if (chip-pagemask 0xff00) al++; - /* add to ECCM mode set in fsl_elbc_init */ - priv-fmr |= (12 FMR_CWTO_SHIFT) | /* Timeout 12 ms */ -(al FMR_AL_SHIFT); + priv-fmr |= al FMR_AL_SHIFT; dev_dbg(priv-dev, fsl_elbc_init: nand-numchips = %d\n, chip-numchips); @@ -764,8 +762,10 @@ static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv) priv-mtd.priv = chip; priv-mtd.owner = THIS_MODULE; - /* Set the ECCM according to the settings in bootloader.*/ - priv-fmr = in_be32(lbc-fmr) FMR_ECCM; + /* set timeout to maximum */ + priv-fmr = 15 FMR_CWTO_SHIFT; + if (in_be32(lbc-bank[priv-bank].or) OR_FCM_PGS) + priv-fmr |= FMR_ECCM; /* fill in nand_chip structure */ /* set up function call table */ -- 1.6.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/2] mtd/nand: Add ONFI support for FSL NAND controller
- fix NAND_CMD_READID command for ONFI detect. - add NAND_CMD_PARAM command to read the ONFI parameter page. Signed-off-by: Shengzhou Liu shengzhou@freescale.com --- v3: unify the bytes of fbcr to 256. v2: no changes drivers/mtd/nand/fsl_elbc_nand.c | 18 ++ 1 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c index 4f405a0..320584a 100644 --- a/drivers/mtd/nand/fsl_elbc_nand.c +++ b/drivers/mtd/nand/fsl_elbc_nand.c @@ -349,20 +349,22 @@ static void fsl_elbc_cmdfunc(struct mtd_info *mtd, unsigned int command, fsl_elbc_run_command(mtd); return; - /* READID must read all 5 possible bytes while CEB is active */ case NAND_CMD_READID: - dev_vdbg(priv-dev, fsl_elbc_cmdfunc: NAND_CMD_READID.\n); + case NAND_CMD_PARAM: + dev_vdbg(priv-dev, fsl_elbc_cmdfunc: NAND_CMD %x\n, command); out_be32(lbc-fir, (FIR_OP_CM0 FIR_OP0_SHIFT) | (FIR_OP_UA FIR_OP1_SHIFT) | (FIR_OP_RBW FIR_OP2_SHIFT)); - out_be32(lbc-fcr, NAND_CMD_READID FCR_CMD0_SHIFT); - /* nand_get_flash_type() reads 8 bytes of entire ID string */ - out_be32(lbc-fbcr, 8); - elbc_fcm_ctrl-read_bytes = 8; + out_be32(lbc-fcr, command FCR_CMD0_SHIFT); + /* +* although currently it's 8 bytes for READID, we always read +* the maximum 256 bytes(for PARAM) +*/ + out_be32(lbc-fbcr, 256); + elbc_fcm_ctrl-read_bytes = 256; elbc_fcm_ctrl-use_mdr = 1; - elbc_fcm_ctrl-mdr = 0; - + elbc_fcm_ctrl-mdr = column; set_addr(mtd, 0, 0, 0); fsl_elbc_run_command(mtd); return; -- 1.6.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] block/swim3: Locking fixes
On 2011-12-12 05:57, Benjamin Herrenschmidt wrote: The old PowerMac swim3 driver has some interesting locking issues, using a private lock and failing to lock the queue before completing requests, which triggered WARN_ONs among others. This rips out the private lock, makes everything operate under the block queue lock, and generally makes things simpler. We used to also share a queue between the two possible instances which was problematic since we might pick the wrong controller in some cases, so make the queue and the current request per-instance and use queuedata to point to our private data which is a lot cleaner. We still share the queue lock but then, it's nearly impossible to actually use 2 swim3's simultaneously: one would need to have a Wallstreet PowerBook, the only machine afaik with two of these on the motherboard, and populate both hotswap bays with a floppy drive (the machine ships only with one), so nobody cares... While at it, add a little fix to clear up stale interrupts when loading the driver or plugging a floppy drive in a bay. Applied for current for-linus branch. -- Jens Axboe ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 01/16 v3] pmac_zilog: fix unexpected irq
On Mon, 12 Dec 2011, Benjamin Herrenschmidt wrote: Any chance you can test this patch ? I would not be surprised if it broke m68k since I had to do some of the changes in there blind, so let me know... with this, I can again suspend/resume properly on a Pismo while using the internal modem among other things. The patch works on a PowerBook 520 given a few changes (below). This PowerBook only has one serial port that I can test (the internal modem is not supported on 68k Macs). Can you test a machine with two ports? The rest of my Mac hardware is in storage since I moved house last week. Finn Index: linux-git/drivers/tty/serial/pmac_zilog.c === --- linux-git.orig/drivers/tty/serial/pmac_zilog.c 2011-12-13 00:18:02.0 +1100 +++ linux-git/drivers/tty/serial/pmac_zilog.c 2011-12-13 00:23:55.0 +1100 @@ -1705,8 +1705,8 @@ static int __init pmz_init_port(struct u struct resource *r_ports; int irq; - r_ports = platform_get_resource(uap-node, IORESOURCE_MEM, 0); - irq = platform_get_irq(uap-node, 0); + r_ports = platform_get_resource(uap-pdev, IORESOURCE_MEM, 0); + irq = platform_get_irq(uap-pdev, 0); if (!r_ports || !irq) return -ENODEV; @@ -1763,8 +1763,10 @@ static void pmz_dispose_port(struct uart static int __init pmz_attach(struct platform_device *pdev) { + struct uart_pmac_port *uap; int i; + /* Iterate the pmz_ports array to find a matching entry */ for (i = 0; i pmz_ports_count; i++) if (pmz_ports[i].pdev == pdev) break; @@ -1773,15 +1775,23 @@ static int __init pmz_attach(struct plat uap = pmz_ports[i]; uap-port.dev = pdev-dev; - dev_set_drvdata(mdev-ofdev.dev, uap); + platform_set_drvdata(pdev, uap); - return uart_add_one_port(pmz_uart_reg, -pmz_ports[i]-port); + return uart_add_one_port(pmz_uart_reg, uap-port); } static int __exit pmz_detach(struct platform_device *pdev) { + struct uart_pmac_port *uap = platform_get_drvdata(pdev); + + if (!uap) + return -ENODEV; + uart_remove_one_port(pmz_uart_reg, uap-port); + + platform_set_drvdata(pdev, NULL); + uap-port.dev = NULL; + return 0; } @@ -1918,8 +1928,13 @@ static void __exit exit_pmz(void) for (i = 0; i pmz_ports_count; i++) { struct uart_pmac_port *uport = pmz_ports[i]; +#ifdef CONFIG_PPC_PMAC if (uport-node != NULL) pmz_dispose_port(uport); +#else + if (uport-pdev != NULL) + pmz_dispose_port(uport); +#endif } /* Unregister UART driver */ uart_unregister_driver(pmz_uart_reg); @@ -1993,6 +2008,9 @@ static int __init pmz_console_setup(stru #ifdef CONFIG_PPC_PMAC if (uap-node == NULL) return -ENODEV; +#else + if (uap-pdev == NULL) + return -ENODEV; #endif port = uap-port; Index: linux-git/drivers/tty/serial/pmac_zilog.h === --- linux-git.orig/drivers/tty/serial/pmac_zilog.h 2011-12-13 00:18:02.0 +1100 +++ linux-git/drivers/tty/serial/pmac_zilog.h 2011-12-13 00:23:55.0 +1100 @@ -1,18 +1,9 @@ #ifndef __PMAC_ZILOG_H__ #define __PMAC_ZILOG_H__ -#ifdef CONFIG_PPC_PMAC -/* We cannot use dev_* because this can be called early, way before - * we are matched with a device (when using it as a kernel console) - */ #define pmz_debug(fmt, arg...) pr_debug(ttyPZ%d: fmt, uap-port.line, ## arg) #define pmz_error(fmt, arg...) pr_err(ttyPZ%d: fmt, uap-port.line, ## arg) #define pmz_info(fmt, arg...) pr_info(ttyPZ%d: fmt, uap-port.line, ## arg) -#else -#define pmz_debug(fmt, arg...) dev_dbg(uap-node-dev, fmt, ## arg) -#define pmz_error(fmt, arg...) dev_err(uap-node-dev, fmt, ## arg) -#define pmz_info(fmt, arg...) dev_info(uap-node-dev, fmt, ## arg) -#endif /* * At most 2 ESCCs with 2 ports each ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v3 2/3] hvc_init(): Enforce one-time initialization.
So on a CONSOLE_PORT_ADD message, we would take the (existing)ports_device::ports_lock, and for other control messages we would justtake the (new) port::port_lock? You are concerned that just takingthe ports_lock for all control messages could be too restrictive? Iwouldn't have expected these messages to be frequent occurrences, butI'll defer to your experience here. The CONSOLE_CONSOLE_PORT message calls hvc_alloc, which also needsserialization. That's in another one of these three patches; are youthinking we could leave that patch be, or that we would we use theport_lock for CONSOLE_CONSOLE_PORT? Using the port_lock wouldprovide the HVC serialization for free but it would be cleaner if weput HVC related synchronization in hvc_console.c. On Thu, Dec 8, 2011 at 4:08 AM, Amit Shah amit.s...@redhat.com wrote: On (Tue) 06 Dec 2011 [09:05:38], Miche Baker-Harvey wrote: Amit, Ah, indeed. I am not using MSI-X, so virtio_pci::vp_try_to_find_vqs() calls vp_request_intx() and sets up an interrupt callback. From there, when an interrupt occurs, the stack looks something like this: virtio_pci::vp_interrupt() virtio_pci::vp_vring_interrupt() virtio_ring::vring_interrupt() vq-vq.callback() -- in this case, that's virtio_console::control_intr() workqueue::schedule_work() workqueue::queue_work() queue_work_on(get_cpu()) -- queues the work on the current CPU. I'm not doing anything to keep multiple control message from being sent concurrently to the guest, and we will take those interrupts on any CPU. I've confirmed that the two instances of handle_control_message() are occurring on different CPUs. So let's have a new helper, port_lock() that takes the port-specific spinlock. There has to be a new helper, since the port lock should depend on the portdev lock being taken too. For the port addition case, just the portdev lock should be taken. For any other operations, the port lock should be taken. My assumption was that we would be able to serialise the work items, but that will be too restrictive. Taking port locks sounds like a better idea. We'd definitely need the port lock in the control work handler. We might need it in a few more places (like module removal), but we'll worry about that later. Does this sound fine? Amit ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v3 2/3] hvc_init(): Enforce one-time initialization.
On (Mon) 12 Dec 2011 [11:11:55], Miche Baker-Harvey wrote: So on a CONSOLE_PORT_ADD message, we would take the (existing)ports_device::ports_lock, and for other control messages we would justtake the (new) port::port_lock? You are concerned that just takingthe ports_lock for all control messages could be too restrictive? Iwouldn't have expected these messages to be frequent occurrences, butI'll defer to your experience here. No, I mean we'll have to take the new port_lock() everywhere we currently take the port lock, plus in a few more places. I only suggest using port_lock() helper since we'll need a dependency on the portdev lock as well. The CONSOLE_CONSOLE_PORT message calls hvc_alloc, which also needsserialization. That's in another one of these three patches; are youthinking we could leave that patch be, or that we would we use theport_lock for CONSOLE_CONSOLE_PORT? Using the port_lock wouldprovide the HVC serialization for free but it would be cleaner if weput HVC related synchronization in hvc_console.c. Yes, definitely, since other users of hvc_console may get bitten in similar ways. However, I'm not too familiar with the hvc code, the people at linux-ppc can be of help. On Thu, Dec 8, 2011 at 4:08 AM, Amit Shah amit.s...@redhat.com wrote: On (Tue) 06 Dec 2011 [09:05:38], Miche Baker-Harvey wrote: Amit, Ah, indeed. I am not using MSI-X, so virtio_pci::vp_try_to_find_vqs() calls vp_request_intx() and sets up an interrupt callback. From there, when an interrupt occurs, the stack looks something like this: virtio_pci::vp_interrupt() virtio_pci::vp_vring_interrupt() virtio_ring::vring_interrupt() vq-vq.callback() -- in this case, that's virtio_console::control_intr() workqueue::schedule_work() workqueue::queue_work() queue_work_on(get_cpu()) -- queues the work on the current CPU. I'm not doing anything to keep multiple control message from being sent concurrently to the guest, and we will take those interrupts on any CPU. I've confirmed that the two instances of handle_control_message() are occurring on different CPUs. So let's have a new helper, port_lock() that takes the port-specific spinlock. There has to be a new helper, since the port lock should depend on the portdev lock being taken too. For the port addition case, just the portdev lock should be taken. For any other operations, the port lock should be taken. My assumption was that we would be able to serialise the work items, but that will be too restrictive. Taking port locks sounds like a better idea. We'd definitely need the port lock in the control work handler. We might need it in a few more places (like module removal), but we'll worry about that later. Does this sound fine? Amit Amit ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 01/16 v3] pmac_zilog: fix unexpected irq
On Tue, 2011-12-13 at 00:34 +1100, Finn Thain wrote: On Mon, 12 Dec 2011, Benjamin Herrenschmidt wrote: Any chance you can test this patch ? I would not be surprised if it broke m68k since I had to do some of the changes in there blind, so let me know... with this, I can again suspend/resume properly on a Pismo while using the internal modem among other things. The patch works on a PowerBook 520 given a few changes (below). This PowerBook only has one serial port that I can test (the internal modem is not supported on 68k Macs). Interesting. The modem is a soft-modem geoport or a hw serial modem ? In the later case it's probably just a matter of finding the right GPIO bit in Apple ASIC to turn the power on :-) Can you test a machine with two ports? The rest of my Mac hardware is in storage since I moved house last week. I tried on 2 port powermacs, but I only have one adapter, so I've basically been running with one serial port open and shooting irda frame on the other (with nothing to check wether I got the frames on the other hand), oh well ... I'll apply your patch and commit via my tree. Cheers, Ben. Finn Index: linux-git/drivers/tty/serial/pmac_zilog.c === --- linux-git.orig/drivers/tty/serial/pmac_zilog.c2011-12-13 00:18:02.0 +1100 +++ linux-git/drivers/tty/serial/pmac_zilog.c 2011-12-13 00:23:55.0 +1100 @@ -1705,8 +1705,8 @@ static int __init pmz_init_port(struct u struct resource *r_ports; int irq; - r_ports = platform_get_resource(uap-node, IORESOURCE_MEM, 0); - irq = platform_get_irq(uap-node, 0); + r_ports = platform_get_resource(uap-pdev, IORESOURCE_MEM, 0); + irq = platform_get_irq(uap-pdev, 0); if (!r_ports || !irq) return -ENODEV; @@ -1763,8 +1763,10 @@ static void pmz_dispose_port(struct uart static int __init pmz_attach(struct platform_device *pdev) { + struct uart_pmac_port *uap; int i; + /* Iterate the pmz_ports array to find a matching entry */ for (i = 0; i pmz_ports_count; i++) if (pmz_ports[i].pdev == pdev) break; @@ -1773,15 +1775,23 @@ static int __init pmz_attach(struct plat uap = pmz_ports[i]; uap-port.dev = pdev-dev; - dev_set_drvdata(mdev-ofdev.dev, uap); + platform_set_drvdata(pdev, uap); - return uart_add_one_port(pmz_uart_reg, - pmz_ports[i]-port); + return uart_add_one_port(pmz_uart_reg, uap-port); } static int __exit pmz_detach(struct platform_device *pdev) { + struct uart_pmac_port *uap = platform_get_drvdata(pdev); + + if (!uap) + return -ENODEV; + uart_remove_one_port(pmz_uart_reg, uap-port); + + platform_set_drvdata(pdev, NULL); + uap-port.dev = NULL; + return 0; } @@ -1918,8 +1928,13 @@ static void __exit exit_pmz(void) for (i = 0; i pmz_ports_count; i++) { struct uart_pmac_port *uport = pmz_ports[i]; +#ifdef CONFIG_PPC_PMAC if (uport-node != NULL) pmz_dispose_port(uport); +#else + if (uport-pdev != NULL) + pmz_dispose_port(uport); +#endif } /* Unregister UART driver */ uart_unregister_driver(pmz_uart_reg); @@ -1993,6 +2008,9 @@ static int __init pmz_console_setup(stru #ifdef CONFIG_PPC_PMAC if (uap-node == NULL) return -ENODEV; +#else + if (uap-pdev == NULL) + return -ENODEV; #endif port = uap-port; Index: linux-git/drivers/tty/serial/pmac_zilog.h === --- linux-git.orig/drivers/tty/serial/pmac_zilog.h2011-12-13 00:18:02.0 +1100 +++ linux-git/drivers/tty/serial/pmac_zilog.h 2011-12-13 00:23:55.0 +1100 @@ -1,18 +1,9 @@ #ifndef __PMAC_ZILOG_H__ #define __PMAC_ZILOG_H__ -#ifdef CONFIG_PPC_PMAC -/* We cannot use dev_* because this can be called early, way before - * we are matched with a device (when using it as a kernel console) - */ #define pmz_debug(fmt, arg...) pr_debug(ttyPZ%d: fmt, uap-port.line, ## arg) #define pmz_error(fmt, arg...) pr_err(ttyPZ%d: fmt, uap-port.line, ## arg) #define pmz_info(fmt, arg...)pr_info(ttyPZ%d: fmt, uap-port.line, ## arg) -#else -#define pmz_debug(fmt, arg...) dev_dbg(uap-node-dev, fmt, ## arg) -#define pmz_error(fmt, arg...) dev_err(uap-node-dev, fmt, ## arg) -#define pmz_info(fmt, arg...)dev_info(uap-node-dev, fmt, ## arg) -#endif /* * At most 2 ESCCs with 2 ports each ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Rework gpio phandle parsing
I originally posted this as part of the DT clock bindings. I'm reposting now since I've fixed up some bugs and I'm planning to put them into linux-next. The DT clock binding patches will be posted separately. Cheers, g. arch/arm/boot/dts/testcases/tests-phandle.dtsi | 37 ++ arch/arm/boot/dts/testcases/tests.dtsi |1 + arch/arm/boot/dts/versatile-pb.dts |2 + arch/microblaze/kernel/reset.c | 43 +--- arch/powerpc/sysdev/qe_lib/gpio.c | 42 ++-- drivers/gpio/gpiolib.c |2 +- drivers/of/Kconfig |9 ++ drivers/of/Makefile|1 + drivers/of/base.c | 146 drivers/of/gpio.c | 43 +++ drivers/of/selftest.c | 139 ++ include/asm-generic/gpio.h |6 +- include/linux/of.h | 11 ++- include/linux/of_gpio.h| 10 +- 14 files changed, 313 insertions(+), 179 deletions(-) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/4] gpio/powerpc: Eliminate duplication of of_get_named_gpio_flags()
A large chunk of qe_pin_request() is unnecessarily cut-and-paste directly from of_get_named_gpio_flags(). This patch cuts out the duplicate code and replaces it with a call to of_get_gpio(). v2: fixed compile error due to missing gpio_to_chip() Signed-off-by: Grant Likely grant.lik...@secretlab.ca Cc: Benjamin Herrenschmidt b...@kernel.crashing.org Cc: Kumar Gala ga...@kernel.crashing.org --- arch/powerpc/sysdev/qe_lib/gpio.c | 42 +++- drivers/gpio/gpiolib.c|2 +- include/asm-generic/gpio.h|1 + 3 files changed, 10 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/sysdev/qe_lib/gpio.c b/arch/powerpc/sysdev/qe_lib/gpio.c index e23f23c..521e67a 100644 --- a/arch/powerpc/sysdev/qe_lib/gpio.c +++ b/arch/powerpc/sysdev/qe_lib/gpio.c @@ -139,14 +139,10 @@ struct qe_pin { struct qe_pin *qe_pin_request(struct device_node *np, int index) { struct qe_pin *qe_pin; - struct device_node *gpio_np; struct gpio_chip *gc; struct of_mm_gpio_chip *mm_gc; struct qe_gpio_chip *qe_gc; int err; - int size; - const void *gpio_spec; - const u32 *gpio_cells; unsigned long flags; qe_pin = kzalloc(sizeof(*qe_pin), GFP_KERNEL); @@ -155,45 +151,25 @@ struct qe_pin *qe_pin_request(struct device_node *np, int index) return ERR_PTR(-ENOMEM); } - err = of_parse_phandles_with_args(np, gpios, #gpio-cells, index, - gpio_np, gpio_spec); - if (err) { - pr_debug(%s: can't parse gpios property\n, __func__); + err = of_get_gpio(np, index); + if (err 0) + goto err0; + gc = gpio_to_chip(err); + if (WARN_ON(!gc)) goto err0; - } - if (!of_device_is_compatible(gpio_np, fsl,mpc8323-qe-pario-bank)) { + if (!of_device_is_compatible(gc-of_node, fsl,mpc8323-qe-pario-bank)) { pr_debug(%s: tried to get a non-qe pin\n, __func__); err = -EINVAL; - goto err1; - } - - gc = of_node_to_gpiochip(gpio_np); - if (!gc) { - pr_debug(%s: gpio controller %s isn't registered\n, -np-full_name, gpio_np-full_name); - err = -ENODEV; - goto err1; - } - - gpio_cells = of_get_property(gpio_np, #gpio-cells, size); - if (!gpio_cells || size != sizeof(*gpio_cells) || - *gpio_cells != gc-of_gpio_n_cells) { - pr_debug(%s: wrong #gpio-cells for %s\n, -np-full_name, gpio_np-full_name); - err = -EINVAL; - goto err1; + goto err0; } - err = gc-of_xlate(gc, np, gpio_spec, NULL); - if (err 0) - goto err1; - mm_gc = to_of_mm_gpio_chip(gc); qe_gc = to_qe_gpio_chip(mm_gc); spin_lock_irqsave(qe_gc-lock, flags); + err -= gc-base; if (test_and_set_bit(QE_PIN_REQUESTED, qe_gc-pin_flags[err]) == 0) { qe_pin-controller = qe_gc; qe_pin-num = err; @@ -206,8 +182,6 @@ struct qe_pin *qe_pin_request(struct device_node *np, int index) if (!err) return qe_pin; -err1: - of_node_put(gpio_np); err0: kfree(qe_pin); pr_debug(%s failed with status %d\n, __func__, err); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index a971e3d..dc315e9 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -114,7 +114,7 @@ static int gpio_ensure_requested(struct gpio_desc *desc, unsigned offset) } /* caller holds gpio_lock *OR* gpio is marked as requested */ -static inline struct gpio_chip *gpio_to_chip(unsigned gpio) +struct gpio_chip *gpio_to_chip(unsigned gpio) { return gpio_desc[gpio].chip; } diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index 8c86210..6b10bdc 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -135,6 +135,7 @@ struct gpio_chip { extern const char *gpiochip_is_requested(struct gpio_chip *chip, unsigned offset); +extern struct gpio_chip *gpio_to_chip(unsigned gpio); extern int __must_check gpiochip_reserve(int start, int ngpio); /* add/remove chips */ -- 1.7.5.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 3/4] of: create of_phandle_args to simplify return of phandle parsing data
of_parse_phandle_with_args() needs to return quite a bit of data. Rather than making each datum a separate **out_ argument, this patch creates struct of_phandle_args to contain all the returned data and reworks the user of the function. This patch also enables of_parse_phandle_with_args() to return the device node pointer for the phandle node. This patch also ends up being fairly major surgery to of_parse_handle_with_args(). The existing structure didn't work well when extending to use of_phandle_args, and I discovered bugs during testing. I also took the opportunity to rename the function to be like the existing of_parse_phandle(). v2: - moved declaration of of_phandle_args to fix compile on non-DT builds - fixed incorrect index in example usage - fixed incorrect return code handling for empty entries Reviewed-by: Shawn Guo shawn@freescale.com Signed-off-by: Grant Likely grant.lik...@secretlab.ca --- drivers/of/base.c | 146 ++- drivers/of/gpio.c | 43 ++--- include/asm-generic/gpio.h |5 +- include/linux/of.h | 11 +++- include/linux/of_gpio.h| 10 ++- 5 files changed, 112 insertions(+), 103 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 9b6588e..c6db9ab 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -824,17 +824,19 @@ of_parse_phandle(struct device_node *np, const char *phandle_name, int index) EXPORT_SYMBOL(of_parse_phandle); /** - * of_parse_phandles_with_args - Find a node pointed by phandle in a list + * of_parse_phandle_with_args() - Find a node pointed by phandle in a list * @np:pointer to a device tree node containing a list * @list_name: property name that contains a list * @cells_name:property name that specifies phandles' arguments count * @index: index of a phandle to parse out - * @out_node: optional pointer to device_node struct pointer (will be filled) - * @out_args: optional pointer to arguments pointer (will be filled) + * @out_args: optional pointer to output arguments structure (will be filled) * * This function is useful to parse lists of phandles and their arguments. - * Returns 0 on success and fills out_node and out_args, on error returns - * appropriate errno value. + * Returns 0 on success and fills out_args, on error returns appropriate + * errno value. + * + * Caller is responsible to call of_node_put() on the returned out_args-node + * pointer. * * Example: * @@ -851,94 +853,96 @@ EXPORT_SYMBOL(of_parse_phandle); * } * * To get a device_node of the `node2' node you may call this: - * of_parse_phandles_with_args(node3, list, #list-cells, 2, node2, args); + * of_parse_phandle_with_args(node3, list, #list-cells, 1, args); */ -int of_parse_phandles_with_args(struct device_node *np, const char *list_name, +int of_parse_phandle_with_args(struct device_node *np, const char *list_name, const char *cells_name, int index, - struct device_node **out_node, - const void **out_args) + struct of_phandle_args *out_args) { - int ret = -EINVAL; - const __be32 *list; - const __be32 *list_end; - int size; - int cur_index = 0; + const __be32 *list, *list_end; + int size, cur_index = 0; + uint32_t count = 0; struct device_node *node = NULL; - const void *args = NULL; + phandle phandle; + /* Retrieve the phandle list property */ list = of_get_property(np, list_name, size); - if (!list) { - ret = -ENOENT; - goto err0; - } + if (!list) + return -EINVAL; list_end = list + size / sizeof(*list); + /* Loop over the phandles until all the requested entry is found */ while (list list_end) { - const __be32 *cells; - phandle phandle; + count = 0; + /* +* If phandle is 0, then it is an empty entry with no +* arguments. Skip forward to the next entry. +*/ phandle = be32_to_cpup(list++); - args = list; - - /* one cell hole in the list = ; */ - if (!phandle) - goto next; - - node = of_find_node_by_phandle(phandle); - if (!node) { - pr_debug(%s: could not find phandle\n, -np-full_name); - goto err0; - } + if (phandle) { + /* +* Find the provider node and parse the #*-cells +* property to determine the argument length +*/ + node = of_find_node_by_phandle(phandle); + if (!node) {
[PATCH 1/4] gpio/microblaze: Eliminate duplication of of_get_named_gpio_flags()
of_reset_gpio_handle() is largely a cut-and-paste copy of of_get_named_gpio_flags(). There really isn't any reason for the split, so this patch deletes the duplicate function Signed-off-by: Grant Likely grant.lik...@secretlab.ca Cc: Michal Simek mon...@monstr.eu --- arch/microblaze/kernel/reset.c | 43 +-- 1 files changed, 2 insertions(+), 41 deletions(-) diff --git a/arch/microblaze/kernel/reset.c b/arch/microblaze/kernel/reset.c index bd8ccab..88a0163 100644 --- a/arch/microblaze/kernel/reset.c +++ b/arch/microblaze/kernel/reset.c @@ -19,50 +19,11 @@ static int handle; /* reset pin handle */ static unsigned int reset_val; -static int of_reset_gpio_handle(void) -{ - int ret; /* variable which stored handle reset gpio pin */ - struct device_node *root; /* root node */ - struct device_node *gpio; /* gpio node */ - struct gpio_chip *gc; - u32 flags; - const void *gpio_spec; - - /* find out root node */ - root = of_find_node_by_path(/); - - /* give me handle for gpio node to be possible allocate pin */ - ret = of_parse_phandles_with_args(root, hard-reset-gpios, - #gpio-cells, 0, gpio, gpio_spec); - if (ret) { - pr_debug(%s: can't parse gpios property\n, __func__); - goto err0; - } - - gc = of_node_to_gpiochip(gpio); - if (!gc) { - pr_debug(%s: gpio controller %s isn't registered\n, -root-full_name, gpio-full_name); - ret = -ENODEV; - goto err1; - } - - ret = gc-of_xlate(gc, root, gpio_spec, flags); - if (ret 0) - goto err1; - - ret += gc-base; -err1: - of_node_put(gpio); -err0: - pr_debug(%s exited with status %d\n, __func__, ret); - return ret; -} - void of_platform_reset_gpio_probe(void) { int ret; - handle = of_reset_gpio_handle(); + handle = of_get_named_gpio(of_find_node_by_path(/), + hard-reset-gpios, 0); if (!gpio_is_valid(handle)) { printk(KERN_INFO Skipping unavailable RESET gpio %d (%s)\n, -- 1.7.5.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/2] mtd/nand : set Nand flash page address to FBAR and FPAR correctly
On Fri, 2011-12-09 at 17:42 +0800, shuo@freescale.com wrote: From: Liu Shuo b35...@freescale.com If we use the Nand flash chip whose number of pages in a block is greater than 64(for large page), we must treat the low bit of FBAR as being the high bit of the page address due to the limitation of FCM, it simply uses the low 6-bits (for large page) of the combined block/page address as the FPAR component, rather than considering the actual block size. Pushed this one to l2-mtd-2.6.git, thanks! Artem. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip
On Tue, 2011-12-06 at 18:09 -0600, Scott Wood wrote: On 12/03/2011 10:31 PM, shuo@freescale.com wrote: From: Liu Shuo shuo@freescale.com Freescale FCM controller has a 2K size limitation of buffer RAM. In order to support the Nand flash chip whose page size is larger than 2K bytes, we read/write 2k data repeatedly by issuing FIR_OP_RB/FIR_OP_WB and save them to a large buffer. Signed-off-by: Liu Shuo shuo@freescale.com --- v3: -remove page_size of struct fsl_elbc_mtd. -do a oob write by NAND_CMD_RNDIN. drivers/mtd/nand/fsl_elbc_nand.c | 243 ++ 1 files changed, 218 insertions(+), 25 deletions(-) What is the plan for bad block marker migration? Why it should be migrated? I thought that you support 2KiB pages, and this adds 4 and 8 KiB pages support, which you never supported before. What is the migration you guys are talking about? Artem. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip
On 12/12/2011 03:09 PM, Artem Bityutskiy wrote: On Tue, 2011-12-06 at 18:09 -0600, Scott Wood wrote: On 12/03/2011 10:31 PM, shuo@freescale.com wrote: From: Liu Shuo shuo@freescale.com Freescale FCM controller has a 2K size limitation of buffer RAM. In order to support the Nand flash chip whose page size is larger than 2K bytes, we read/write 2k data repeatedly by issuing FIR_OP_RB/FIR_OP_WB and save them to a large buffer. Signed-off-by: Liu Shuo shuo@freescale.com --- v3: -remove page_size of struct fsl_elbc_mtd. -do a oob write by NAND_CMD_RNDIN. drivers/mtd/nand/fsl_elbc_nand.c | 243 ++ 1 files changed, 218 insertions(+), 25 deletions(-) What is the plan for bad block marker migration? Why it should be migrated? I thought that you support 2KiB pages, and this adds 4 and 8 KiB pages support, which you never supported before. What is the migration you guys are talking about? NAND chips come from the factory with bad blocks marked at a certain offset into each page. This offset is normally in the OOB area, but since we change the layout from 4k data, 128 byte oob to 2k data, 64 byte oob, 2k data, 64 byte oob the marker is no longer in the oob. On first use we need to migrate the markers so that they are still in the oob. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip
On Mon, 2011-12-12 at 15:15 -0600, Scott Wood wrote: NAND chips come from the factory with bad blocks marked at a certain offset into each page. This offset is normally in the OOB area, but since we change the layout from 4k data, 128 byte oob to 2k data, 64 byte oob, 2k data, 64 byte oob the marker is no longer in the oob. On first use we need to migrate the markers so that they are still in the oob. Ah, I see, thanks. Are you planning to implement in-kernel migration or use a user-space tool? Artem. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip
On 12/12/2011 03:19 PM, Artem Bityutskiy wrote: On Mon, 2011-12-12 at 15:15 -0600, Scott Wood wrote: NAND chips come from the factory with bad blocks marked at a certain offset into each page. This offset is normally in the OOB area, but since we change the layout from 4k data, 128 byte oob to 2k data, 64 byte oob, 2k data, 64 byte oob the marker is no longer in the oob. On first use we need to migrate the markers so that they are still in the oob. Ah, I see, thanks. Are you planning to implement in-kernel migration or use a user-space tool? That's the kind of answer I was hoping to get from Shuo. :-) Most likely is a firmware-based tool, but I'd like there to be some way for the tool to mark that this has happened, so that the Linux driver can refuse to do non-raw accesses to a chip that isn't marked as having been migrated (or at least yell loudly in the log). Speaking of raw accesses, these are currently broken in the eLBC driver... we need some way for the generic layer to tell us what kind of access it is before the transaction starts, not once it wants to read out the buffer (unless we add more hacks to delay the start of a read transaction until first buffer access...). We'd be better off with a high-level read page/write page function that does the whole thing (not just buffer access, but command issuance as well). -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3] ipc: provide generic compat versions of IPC syscalls
When using the compat APIs, architectures will generally want to be able to make direct syscalls to msgsnd(), shmctl(), etc., and in the kernel we would want them to be handled directly by compat_sys_xxx() functions, as is true for other compat syscalls. However, for historical reasons, several of the existing compat IPC syscalls do not do this. semctl() expects a pointer to the fourth argument, instead of the fourth argument itself. msgsnd(), msgrcv() and shmat() expect arguments in different order. This change adds an ARCH_WANT_OLD_COMPAT_IPC config option that can be set to preserve this behavior for ports that use it (x86, sparc, powerpc, s390, and mips). No actual semantics are changed for those architectures, and there is only a minimal amount of code refactoring in ipc/compat.c. Newer architectures like tile (and perhaps future architectures such as arm64 and unicore64) should not select this option, and thus can avoid having any IPC-specific code at all in their architecture-specific compat layer. In the same vein, if this option is not selected, IPC_64 mode is assumed, since that's what the asm-generic headers expect. The workaround code in tile for msgsnd() and msgrcv() is removed with this change; it also fixes the bug that shmat() and semctl() were not being properly handled. Signed-off-by: Chris Metcalf cmetc...@tilera.com --- arch/Kconfig |3 ++ arch/mips/Kconfig |1 + arch/powerpc/Kconfig |1 + arch/s390/Kconfig |1 + arch/sparc/Kconfig |1 + arch/tile/include/asm/compat.h | 11 -- arch/tile/kernel/compat.c | 43 arch/x86/Kconfig |1 + include/linux/compat.h | 12 ++- ipc/compat.c | 70 --- 10 files changed, 83 insertions(+), 61 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 4b0669c..dfb1e07 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -181,4 +181,7 @@ config HAVE_RCU_TABLE_FREE config ARCH_HAVE_NMI_SAFE_CMPXCHG bool +config ARCH_WANT_OLD_COMPAT_IPC + bool + source kernel/gcov/Kconfig diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index d46f1da..ad2af82 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2420,6 +2420,7 @@ config MIPS32_COMPAT config COMPAT bool depends on MIPS32_COMPAT + select ARCH_WANT_OLD_COMPAT_IPC default y config SYSVIPC_COMPAT diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 951e18f..e2be710 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -146,6 +146,7 @@ config COMPAT bool default y if PPC64 select COMPAT_BINFMT_ELF + select ARCH_WANT_OLD_COMPAT_IPC config SYSVIPC_COMPAT bool diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 373679b..2fc3bca 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -221,6 +221,7 @@ config COMPAT prompt Kernel support for 31 bit emulation depends on 64BIT select COMPAT_BINFMT_ELF + select ARCH_WANT_OLD_COMPAT_IPC help Select this option if you want to enable your system kernel to handle system-calls from ELF binaries for 31 bit ESA. This option diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index f92602e..846cb5c 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -577,6 +577,7 @@ config COMPAT depends on SPARC64 default y select COMPAT_BINFMT_ELF + select ARCH_WANT_OLD_COMPAT_IPC config SYSVIPC_COMPAT bool diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index bf95f55..4b4b289 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -242,17 +242,6 @@ long compat_sys_fallocate(int fd, int mode, long compat_sys_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval); -/* Versions of compat functions that differ from generic Linux. */ -struct compat_msgbuf; -long tile_compat_sys_msgsnd(int msqid, - struct compat_msgbuf __user *msgp, - size_t msgsz, int msgflg); -long tile_compat_sys_msgrcv(int msqid, - struct compat_msgbuf __user *msgp, - size_t msgsz, long msgtyp, int msgflg); -long tile_compat_sys_ptrace(compat_long_t request, compat_long_t pid, - compat_long_t addr, compat_long_t data); - /* Tilera Linux syscalls that don't have compat versions. */ #define compat_sys_flush_cache sys_flush_cache diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index bf5e9d7..d67459b 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -16,7 +16,6 @@ #define __SYSCALL_COMPAT #include linux/compat.h -#include linux/msg.h #include linux/syscalls.h #include linux/kdev_t.h
[PATCH v3 10/14] KVM: PPC: Maintain a doubly-linked list of guest HPTEs for each gfn
This expands the reverse mapping array to contain two links for each HPTE which are used to link together HPTEs that correspond to the same guest logical page. Each circular list of HPTEs is pointed to by the rmap array entry for the guest logical page, pointed to by the relevant memslot. Links are 32-bit HPT entry indexes rather than full 64-bit pointers, to save space. We use 3 of the remaining 32 bits in the rmap array entries as a lock bit, a referenced bit and a present bit (the present bit is needed since HPTE index 0 is valid). The bit lock for the rmap chain nests inside the HPTE lock bit. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_book3s_64.h | 18 ++ arch/powerpc/include/asm/kvm_host.h | 17 ++- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 84 +- 3 files changed, 117 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 18b590d..9508c03 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -113,6 +113,11 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) return 0; /* error */ } +static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) +{ + return ((ptel HPTE_R_RPN) ~(psize - 1)) PAGE_SHIFT; +} + static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) { unsigned int wimg = ptel HPTE_R_WIMG; @@ -139,6 +144,19 @@ static inline unsigned long hpte_cache_bits(unsigned long pte_val) #endif } +static inline void lock_rmap(unsigned long *rmap) +{ + do { + while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap)) + cpu_relax(); + } while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap)); +} + +static inline void unlock_rmap(unsigned long *rmap) +{ + __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmap); +} + static inline bool slot_is_aligned(struct kvm_memory_slot *memslot, unsigned long pagesize) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 243bc80..97cb2d7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -170,12 +170,27 @@ struct kvmppc_rma_info { /* * The reverse mapping array has one entry for each HPTE, * which stores the guest's view of the second word of the HPTE - * (including the guest physical address of the mapping). + * (including the guest physical address of the mapping), + * plus forward and backward pointers in a doubly-linked ring + * of HPTEs that map the same host page. The pointers in this + * ring are 32-bit HPTE indexes, to save space. */ struct revmap_entry { unsigned long guest_rpte; + unsigned int forw, back; }; +/* + * We use the top bit of each memslot-rmap entry as a lock bit, + * and bit 32 as a present flag. The bottom 32 bits are the + * index in the guest HPT of a HPTE that points to the page. + */ +#define KVMPPC_RMAP_LOCK_BIT 63 +#define KVMPPC_RMAP_REF_BIT33 +#define KVMPPC_RMAP_REFERENCED (1ul KVMPPC_RMAP_REF_BIT) +#define KVMPPC_RMAP_PRESENT0x1ul +#define KVMPPC_RMAP_INDEX 0xul + /* Low-order bits in kvm-arch.slot_phys[][] */ #define KVMPPC_PAGE_ORDER_MASK 0x1f #define KVMPPC_PAGE_NO_CACHE HPTE_R_I/* 0x20 */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 3f5b016..5b31caa 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -54,6 +54,70 @@ static void *real_vmalloc_addr(void *x) return __va(addr); } +/* + * Add this HPTE into the chain for the real page. + * Must be called with the chain locked; it unlocks the chain. + */ +static void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, +unsigned long *rmap, long pte_index, int realmode) +{ + struct revmap_entry *head, *tail; + unsigned long i; + + if (*rmap KVMPPC_RMAP_PRESENT) { + i = *rmap KVMPPC_RMAP_INDEX; + head = kvm-arch.revmap[i]; + if (realmode) + head = real_vmalloc_addr(head); + tail = kvm-arch.revmap[head-back]; + if (realmode) + tail = real_vmalloc_addr(tail); + rev-forw = i; + rev-back = head-back; + tail-forw = pte_index; + head-back = pte_index; + } else { + rev-forw = rev-back = pte_index; + i = pte_index; + } + smp_wmb(); + *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */ +} + +/* Remove this HPTE from the chain for a real page */ +static void remove_revmap_chain(struct kvm *kvm, long pte_index, +
[PATCH v3 02/14] KVM: PPC: Move kvm_vcpu_ioctl_[gs]et_one_reg down to platform-specific code
This moves the get/set_one_reg implementation down from powerpc.c into booke.c, book3s_pr.c and book3s_hv.c. This avoids #ifdefs in C code, but more importantly, it fixes a bug on Book3s HV where we were accessing beyond the end of the kvm_vcpu struct (via the to_book3s() macro) and corrupting memory, causing random crashes and file corruption. On Book3s HV we only accept setting the HIOR to zero, since the guest runs in supervisor mode and its vectors are never offset from zero. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_ppc.h |3 ++ arch/powerpc/kvm/book3s_hv.c | 33 ++ arch/powerpc/kvm/book3s_pr.c | 33 ++ arch/powerpc/kvm/booke.c | 10 + arch/powerpc/kvm/powerpc.c | 39 5 files changed, 79 insertions(+), 39 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 5192c2e..fc2d696 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -176,6 +176,9 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); + void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); #ifdef CONFIG_KVM_BOOK3S_64_HV diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index b1e3b9c..da7db14 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -392,6 +392,39 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg-id) { + case KVM_ONE_REG_PPC_HIOR: + reg-u.reg64 = 0; + r = 0; + break; + default: + break; + } + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg-id) { + case KVM_ONE_REG_PPC_HIOR: + /* Only allow this to be set to zero */ + if (reg-u.reg64 == 0) + r = 0; + break; + default: + break; + } + + return r; +} + int kvmppc_core_check_processor_compat(void) { if (cpu_has_feature(CPU_FTR_HVMODE)) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index ae6a034..ddd92a5 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -863,6 +863,39 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg-id) { + case KVM_ONE_REG_PPC_HIOR: + reg-u.reg64 = to_book3s(vcpu)-hior; + r = 0; + break; + default: + break; + } + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg-id) { + case KVM_ONE_REG_PPC_HIOR: + to_book3s(vcpu)-hior = reg-u.reg64; + to_book3s(vcpu)-hior_explicit = true; + r = 0; + break; + default: + break; + } + + return r; +} + int kvmppc_core_check_processor_compat(void) { return 0; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 9e41f45..ee9e1ee 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -887,6 +887,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return kvmppc_core_set_sregs(vcpu, sregs); } +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + return -EINVAL; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + return -EINVAL; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -ENOTSUPP; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b939b8a..69367ac 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -624,45 +624,6 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return r; } -static int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, - struct kvm_one_reg *reg) -{ - int r = -EINVAL; - - switch (reg-id) { -#ifdef CONFIG_PPC_BOOK3S - case KVM_ONE_REG_PPC_HIOR: - reg-u.reg64 = to_book3s(vcpu)-hior; - r = 0; -
[PATCH v3 08/14] KVM: PPC: Allow use of small pages to back Book3S HV guests
This relaxes the requirement that the guest memory be provided as 16MB huge pages, allowing it to be provided as normal memory, i.e. in pages of PAGE_SIZE bytes (4k or 64k). To allow this, we index the kvm-arch.slot_phys[] arrays with a small page index, even if huge pages are being used, and use the low-order 5 bits of each entry to store the order of the enclosing page with respect to normal pages, i.e. log_2(enclosing_page_size / PAGE_SIZE). Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_book3s_64.h | 10 +++ arch/powerpc/include/asm/kvm_host.h |3 +- arch/powerpc/include/asm/kvm_ppc.h |2 +- arch/powerpc/include/asm/reg.h |1 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 122 -- arch/powerpc/kvm/book3s_hv.c | 57 -- arch/powerpc/kvm/book3s_hv_rm_mmu.c |6 +- 7 files changed, 132 insertions(+), 69 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 7e6f2ed..10920f7 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -113,4 +113,14 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) return 0; /* error */ } +static inline bool slot_is_aligned(struct kvm_memory_slot *memslot, + unsigned long pagesize) +{ + unsigned long mask = (pagesize PAGE_SHIFT) - 1; + + if (pagesize = PAGE_SIZE) + return 1; + return !(memslot-base_gfn mask) !(memslot-npages mask); +} + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index beb22ba..9252d5e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -177,14 +177,13 @@ struct revmap_entry { }; /* Low-order bits in kvm-arch.slot_phys[][] */ +#define KVMPPC_PAGE_ORDER_MASK 0x1f #define KVMPPC_GOT_PAGE0x80 struct kvm_arch { #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; struct revmap_entry *revmap; - unsigned long ram_psize; - unsigned long ram_porder; unsigned int lpid; unsigned int host_lpid; unsigned long host_lpcr; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 111e1b4..a61b5b5 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -122,7 +122,7 @@ extern void kvmppc_free_hpt(struct kvm *kvm); extern long kvmppc_prepare_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, - struct kvm_memory_slot *memslot); + struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 559da19..4599d12 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -237,6 +237,7 @@ #define LPCR_ISL (1ul (63-2)) #define LPCR_VC_SH (63-2) #define LPCR_DPFD_SH (63-11) +#define LPCR_VRMASD (0x1ful (63-16)) #define LPCR_VRMA_L (1ul (63-12)) #define LPCR_VRMA_LP0(1ul (63-15)) #define LPCR_VRMA_LP1(1ul (63-16)) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 87016cc..cc18f3d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -34,8 +34,6 @@ #include asm/ppc-opcode.h #include asm/cputable.h -/* Pages in the VRMA are 16MB pages */ -#define VRMA_PAGE_ORDER24 #define VRMA_VSID 0x1ffUL /* 1TB VSID reserved for VRMA */ /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ @@ -95,17 +93,31 @@ void kvmppc_free_hpt(struct kvm *kvm) free_pages(kvm-arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); } -void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) +/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ +static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize) +{ + return (pgsize 0x1000) ? HPTE_V_LARGE : 0; +} + +/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */ +static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize) +{ + return (pgsize == 0x1) ? 0x1000 : 0; +} + +void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, +unsigned long porder) { - struct kvm *kvm = vcpu-kvm; unsigned long i; unsigned long npages; unsigned long hp_v, hp_r; unsigned long addr, hash; - unsigned long porder =
[PATCH v3 01/14] KVM: PPC: Make wakeups work again for Book3S HV guests
When commit f43fdc15fa (KVM: PPC: booke: Improve timer register emulation) factored out some code in arch/powerpc/kvm/powerpc.c into a new helper function, kvm_vcpu_kick(), an error crept in which causes Book3s HV guest vcpus to stall. This fixes it. On POWER7 machines, guest vcpus are grouped together into virtual CPU cores that share a single waitqueue, so it's important to use vcpu-arch.wqp rather than vcpu-wq. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/kvm/powerpc.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ef8c990..b939b8a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -561,7 +561,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) int cpu = vcpu-cpu; me = get_cpu(); - if (waitqueue_active(vcpu-wq)) { + if (waitqueue_active(vcpu-arch.wqp)) { wake_up_interruptible(vcpu-arch.wqp); vcpu-stat.halt_wakeup++; } else if (cpu != me cpu != -1) { -- 1.7.7.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 13/14] KVM: PPC: Implement MMU notifiers for Book3S HV guests
This adds the infrastructure to enable us to page out pages underneath a Book3S HV guest, on processors that support virtualized partition memory, that is, POWER7. Instead of pinning all the guest's pages, we now look in the host userspace Linux page tables to find the mapping for a given guest page. Then, if the userspace Linux PTE gets invalidated, kvm_unmap_hva() gets called for that address, and we replace all the guest HPTEs that refer to that page with absent HPTEs, i.e. ones with the valid bit clear and the HPTE_V_ABSENT bit set, which will cause an HDSI when the guest tries to access them. Finally, the page fault handler is extended to reinstantiate the guest HPTE when the guest tries to access a page which has been paged out. Since we can't intercept the guest DSI and ISI interrupts on PPC970, we still have to pin all the guest pages on PPC970. We have a new flag, kvm-arch.using_mmu_notifiers, that indicates whether we can page guest pages out. If it is not set, the MMU notifier callbacks do nothing and everything operates as before. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_book3s.h|4 + arch/powerpc/include/asm/kvm_book3s_64.h | 31 arch/powerpc/include/asm/kvm_host.h | 16 ++ arch/powerpc/include/asm/reg.h |3 + arch/powerpc/kvm/Kconfig |1 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 290 +++--- arch/powerpc/kvm/book3s_hv.c | 25 ++-- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 140 +++--- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 49 + arch/powerpc/kvm/powerpc.c |3 + arch/powerpc/mm/hugetlbpage.c|2 + 11 files changed, 499 insertions(+), 65 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index f6329bb..ea9539c 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -145,6 +145,10 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); +extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, + unsigned long *rmap, long pte_index, int realmode); +extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, + unsigned long pte_index); extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 79dc37f..c21e46d 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -136,6 +136,37 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) return (wimg (HPTE_R_W | HPTE_R_I)) == io_type; } +/* + * Lock and read a linux PTE. If it's present and writable, atomically + * set dirty and referenced bits and return the PTE, otherwise return 0. + */ +static inline pte_t kvmppc_read_update_linux_pte(pte_t *p) +{ + pte_t pte, tmp; + + /* wait until _PAGE_BUSY is clear then set it atomically */ + __asm__ __volatile__ ( + 1: ldarx %0,0,%3\n + andi. %1,%0,%4\n + bne-1b\n + ori %1,%0,%4\n + stdcx. %1,0,%3\n + bne-1b + : =r (pte), =r (tmp), =m (*p) + : r (p), i (_PAGE_BUSY) + : cc); + + if (pte_present(pte)) { + pte = pte_mkyoung(pte); + if (pte_write(pte)) + pte = pte_mkdirty(pte); + } + + *p = pte; /* clears _PAGE_BUSY */ + + return pte; +} + /* Return HPTE cache control bits corresponding to Linux pte bits */ static inline unsigned long hpte_cache_bits(unsigned long pte_val) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 937caca..968f3aa 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -32,6 +32,7 @@ #include linux/atomic.h #include asm/kvm_asm.h #include asm/processor.h +#include asm/page.h #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS @@ -44,6 +45,19 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif +#ifdef CONFIG_KVM_BOOK3S_64_HV +#include linux/mmu_notifier.h + +#define KVM_ARCH_WANT_MMU_NOTIFIER + +struct kvm; +extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); +extern int kvm_test_age_hva(struct kvm *kvm,
[PATCH v3 03/14] KVM: PPC: Keep a record of HV guest view of hashed page table entries
This adds an array that parallels the guest hashed page table (HPT), that is, it has one entry per HPTE, used to store the guest's view of the second doubleword of the corresponding HPTE. The first doubleword in the HPTE is the same as the guest's idea of it, so we don't need to store a copy, but the second doubleword in the HPTE has the real page number rather than the guest's logical page number. This allows us to remove the back_translate() and reverse_xlate() functions. This reverse mapping array is vmalloc'd, meaning that to access it in real mode we have to walk the kernel's page tables explicitly. That is done by the new real_vmalloc_addr() function. (In fact this returns an address in the linear mapping, so the result is usable both in real mode and in virtual mode.) There are also some minor cleanups here: moving the definitions of HPT_ORDER etc. to a header file and defining HPT_NPTE for HPT_NPTEG 3. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_book3s_64.h |8 +++ arch/powerpc/include/asm/kvm_host.h | 10 arch/powerpc/kvm/book3s_64_mmu_hv.c | 44 +++ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 87 ++ 4 files changed, 103 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 2054e47..fa3dc79 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -35,6 +35,14 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) #define SPAPR_TCE_SHIFT12 +#ifdef CONFIG_KVM_BOOK3S_64_HV +/* For now use fixed-size 16MB page table */ +#define HPT_ORDER 24 +#define HPT_NPTEG (1ul (HPT_ORDER - 7))/* 128B per pteg */ +#define HPT_NPTE (HPT_NPTEG 3)/* 8 PTEs per PTEG */ +#define HPT_HASH_MASK (HPT_NPTEG - 1) +#endif + static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, unsigned long pte_index) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 66c75cd..629df2e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -166,9 +166,19 @@ struct kvmppc_rma_info { atomic_t use_count; }; +/* + * The reverse mapping array has one entry for each HPTE, + * which stores the guest's view of the second word of the HPTE + * (including the guest physical address of the mapping). + */ +struct revmap_entry { + unsigned long guest_rpte; +}; + struct kvm_arch { #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; + struct revmap_entry *revmap; unsigned long ram_npages; unsigned long ram_psize; unsigned long ram_porder; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index bc3a2ea..80ece8d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -23,6 +23,7 @@ #include linux/gfp.h #include linux/slab.h #include linux/hugetlb.h +#include linux/vmalloc.h #include asm/tlbflush.h #include asm/kvm_ppc.h @@ -33,11 +34,6 @@ #include asm/ppc-opcode.h #include asm/cputable.h -/* For now use fixed-size 16MB page table */ -#define HPT_ORDER 24 -#define HPT_NPTEG (1ul (HPT_ORDER - 7))/* 128B per pteg */ -#define HPT_HASH_MASK (HPT_NPTEG - 1) - /* Pages in the VRMA are 16MB pages */ #define VRMA_PAGE_ORDER24 #define VRMA_VSID 0x1ffUL /* 1TB VSID reserved for VRMA */ @@ -51,7 +47,9 @@ long kvmppc_alloc_hpt(struct kvm *kvm) { unsigned long hpt; unsigned long lpid; + struct revmap_entry *rev; + /* Allocate guest's hashed page table */ hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN, HPT_ORDER - PAGE_SHIFT); if (!hpt) { @@ -60,12 +58,20 @@ long kvmppc_alloc_hpt(struct kvm *kvm) } kvm-arch.hpt_virt = hpt; + /* Allocate reverse map array */ + rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE); + if (!rev) { + pr_err(kvmppc_alloc_hpt: Couldn't alloc reverse map array\n); + goto out_freehpt; + } + kvm-arch.revmap = rev; + + /* Allocate the guest's logical partition ID */ do { lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS); if (lpid = NR_LPIDS) { pr_err(kvm_alloc_hpt: No LPIDs free\n); - free_pages(hpt, HPT_ORDER - PAGE_SHIFT); - return -ENOMEM; + goto out_freeboth; } } while (test_and_set_bit(lpid, lpid_inuse)); @@ -74,11 +80,18 @@ long kvmppc_alloc_hpt(struct kvm *kvm) pr_info(KVM guest htab at %lx, LPID %lx\n, hpt, lpid); return 0; + +
[PATCH v3 00/14] KVM: PPC: Update Book3S HV memory handling
This series of patches updates the Book3S-HV KVM code that manages the guest hashed page table (HPT) to enable several things: * MMIO emulation and MMIO pass-through * Use of small pages (4kB or 64kB, depending on config) to back the guest memory * Pageable guest memory - i.e. backing pages can be removed from the guest and reinstated on demand, using the MMU notifier mechanism * Guests can be given read-only access to pages even though they think they have mapped them read/write. When they try to write to them their access is upgraded to read/write. This allows KSM to share pages between guests. On PPC970 we have no way to get DSIs and ISIs to come to the hypervisor, so we can't do MMIO emulation or pageable guest memory. On POWER7 we set the VPM1 bit in the LPCR to make all DSIs and ISIs come to the hypervisor (host) as HDSIs or HISIs. This code is working well in my tests. The sporadic crashes that I was seeing earlier are fixed by the second patch in the series. Somewhat to my surprise, when I implemented the last patch in the series I started to see KSM coalescing pages without any further effort on my part -- my tests were on a machine with Fedora 16 installed, and it has ksmtuned running by default. This series is on top of Alex Graf's kvm-ppc-next branch. The first patch in my series fixes a bug in one of the patches in that branch (KVM: PPC: booke: Improve timer register emulation). These patches only touch arch/powerpc except for patch 12, which adds a couple of barriers to allow mmu_notifier_retry() to be used outside of the kvm-mmu_lock. Paul. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 09/14] KVM: PPC: Allow I/O mappings in memory slots
This provides for the case where userspace maps an I/O device into the address range of a memory slot using a VM_PFNMAP mapping. In that case, we work out the pfn from vma-vm_pgoff, and record the cache enable bits from vma-vm_page_prot in two low-order bits in the slot_phys array entries. Then, in kvmppc_h_enter() we check that the cache bits in the HPTE that the guest wants to insert match the cache bits in the slot_phys array entry. However, we do allow the guest to create what it thinks is a non-cacheable or write-through mapping to memory that is actually cacheable, so that we can use normal system memory as part of an emulated device later on. In that case the actual HPTE we insert is a cacheable HPTE. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_book3s_64.h | 26 arch/powerpc/include/asm/kvm_host.h |2 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 65 -- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 15 +- 4 files changed, 84 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 10920f7..18b590d 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -113,6 +113,32 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) return 0; /* error */ } +static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) +{ + unsigned int wimg = ptel HPTE_R_WIMG; + + /* Handle SAO */ + if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) + cpu_has_feature(CPU_FTR_ARCH_206)) + wimg = HPTE_R_M; + + if (!io_type) + return wimg == HPTE_R_M; + + return (wimg (HPTE_R_W | HPTE_R_I)) == io_type; +} + +/* Return HPTE cache control bits corresponding to Linux pte bits */ +static inline unsigned long hpte_cache_bits(unsigned long pte_val) +{ +#if _PAGE_NO_CACHE == HPTE_R_I _PAGE_WRITETHRU == HPTE_R_W + return pte_val (HPTE_R_W | HPTE_R_I); +#else + return ((pte_val _PAGE_NO_CACHE) ? HPTE_R_I : 0) + + ((pte_val _PAGE_WRITETHRU) ? HPTE_R_W : 0); +#endif +} + static inline bool slot_is_aligned(struct kvm_memory_slot *memslot, unsigned long pagesize) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 9252d5e..243bc80 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -178,6 +178,8 @@ struct revmap_entry { /* Low-order bits in kvm-arch.slot_phys[][] */ #define KVMPPC_PAGE_ORDER_MASK 0x1f +#define KVMPPC_PAGE_NO_CACHE HPTE_R_I/* 0x20 */ +#define KVMPPC_PAGE_WRITETHRU HPTE_R_W/* 0x40 */ #define KVMPPC_GOT_PAGE0x80 struct kvm_arch { diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index cc18f3d..b904c40 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -199,7 +199,8 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, struct page *page, *hpage, *pages[1]; unsigned long s, pgsize; unsigned long *physp; - unsigned int got, pgorder; + unsigned int is_io, got, pgorder; + struct vm_area_struct *vma; unsigned long pfn, i, npages; physp = kvm-arch.slot_phys[memslot-id]; @@ -208,34 +209,51 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, if (physp[gfn - memslot-base_gfn]) return 0; + is_io = 0; + got = 0; page = NULL; pgsize = psize; + err = -EINVAL; start = gfn_to_hva_memslot(memslot, gfn); /* Instantiate and get the page we want access to */ np = get_user_pages_fast(start, 1, 1, pages); - if (np != 1) - return -EINVAL; - page = pages[0]; - got = KVMPPC_GOT_PAGE; + if (np != 1) { + /* Look up the vma for the page */ + down_read(current-mm-mmap_sem); + vma = find_vma(current-mm, start); + if (!vma || vma-vm_start start || + start + psize vma-vm_end || + !(vma-vm_flags VM_PFNMAP)) + goto up_err; + is_io = hpte_cache_bits(pgprot_val(vma-vm_page_prot)); + pfn = vma-vm_pgoff + ((start - vma-vm_start) PAGE_SHIFT); + /* check alignment of pfn vs. requested page size */ + if (psize PAGE_SIZE (pfn ((psize PAGE_SHIFT) - 1))) + goto up_err; + up_read(current-mm-mmap_sem); - /* See if this is a large page */ - s = PAGE_SIZE; - if (PageHuge(page)) { - hpage = compound_head(page); - s = compound_order(hpage); -
[PATCH v3 12/14] KVM: Add barriers to allow mmu_notifier_retry to be used locklessly
This adds an smp_wmb in kvm_mmu_notifier_invalidate_range_end() and an smp_rmb in mmu_notifier_retry() so that mmu_notifier_retry() will give the correct answer when called without kvm-mmu_lock being held. PowerPC Book3S HV KVM wants to use a bitlock per guest page rather than a single global spinlock in order to improve the scalability of updates to the guest MMU hashed page table, and so needs this. Signed-off-by: Paul Mackerras pau...@samba.org --- include/linux/kvm_host.h | 14 +- virt/kvm/kvm_main.c |6 +++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8c5c303..ec79a45 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -700,12 +700,16 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se if (unlikely(vcpu-kvm-mmu_notifier_count)) return 1; /* -* Both reads happen under the mmu_lock and both values are -* modified under mmu_lock, so there's no need of smb_rmb() -* here in between, otherwise mmu_notifier_count should be -* read before mmu_notifier_seq, see -* mmu_notifier_invalidate_range_end write side. +* Ensure the read of mmu_notifier_count happens before the read +* of mmu_notifier_seq. This interacts with the smp_wmb() in +* mmu_notifier_invalidate_range_end to make sure that the caller +* either sees the old (non-zero) value of mmu_notifier_count or +* the new (incremented) value of mmu_notifier_seq. +* PowerPC Book3s HV KVM calls this under a per-page lock +* rather than under kvm-mmu_lock, for scalability, so +* can't rely on kvm-mmu_lock to keep things ordered. */ + smp_rmb(); if (vcpu-kvm-mmu_notifier_seq != mmu_seq) return 1; return 0; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e289486..c144132 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -357,11 +357,11 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, * been freed. */ kvm-mmu_notifier_seq++; + smp_wmb(); /* * The above sequence increase must be visible before the -* below count decrease but both values are read by the kvm -* page fault under mmu_lock spinlock so we don't need to add -* a smb_wmb() here in between the two. +* below count decrease, which is ensured by the smp_wmb above +* in conjunction with the smp_rmb in mmu_notifier_retry(). */ kvm-mmu_notifier_count--; spin_unlock(kvm-mmu_lock); -- 1.7.7.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 05/14] KVM: PPC: Add an interface for pinning guest pages in Book3s HV guests
This adds two new functions, kvmppc_pin_guest_page() and kvmppc_unpin_guest_page(), and uses them to pin the guest pages where the guest has registered areas of memory for the hypervisor to update, (i.e. the per-cpu virtual processor areas, SLB shadow buffers and dispatch trace logs) and then unpin them when they are no longer required. Although it is not strictly necessary to pin the pages at this point, since all guest pages are already pinned, later commits in this series will mean that guest pages aren't all pinned. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_book3s.h |3 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 38 ++ arch/powerpc/kvm/book3s_hv.c | 67 ++--- 3 files changed, 78 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index e8c78ac..a2a89c6 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -140,6 +140,9 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); +extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, + unsigned long *nb_ret); +extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index e4c6069..dcd39dc 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -184,6 +184,44 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, return -ENOENT; } +void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, + unsigned long *nb_ret) +{ + struct kvm_memory_slot *memslot; + unsigned long gfn = gpa PAGE_SHIFT; + struct page *page; + unsigned long offset; + unsigned long pfn, pa; + unsigned long *physp; + + memslot = gfn_to_memslot(kvm, gfn); + if (!memslot || (memslot-flags KVM_MEMSLOT_INVALID)) + return NULL; + physp = kvm-arch.slot_phys[memslot-id]; + if (!physp) + return NULL; + physp += (gfn - memslot-base_gfn) + (kvm-arch.ram_porder - PAGE_SHIFT); + pa = *physp; + if (!pa) + return NULL; + pfn = pa PAGE_SHIFT; + page = pfn_to_page(pfn); + get_page(page); + offset = gpa (kvm-arch.ram_psize - 1); + if (nb_ret) + *nb_ret = kvm-arch.ram_psize - offset; + return page_address(page) + offset; +} + +void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) +{ + struct page *page = virt_to_page(va); + + page = compound_head(page); + put_page(page); +} + void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) { struct kvmppc_mmu *mmu = vcpu-arch.mmu; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 86d3e4b..bd82789 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -139,12 +139,10 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, unsigned long vcpuid, unsigned long vpa) { struct kvm *kvm = vcpu-kvm; - unsigned long gfn, pg_index, ra, len; - unsigned long pg_offset; + unsigned long len, nb; void *va; struct kvm_vcpu *tvcpu; - struct kvm_memory_slot *memslot; - unsigned long *physp; + int err = H_PARAMETER; tvcpu = kvmppc_find_vcpu(kvm, vcpuid); if (!tvcpu) @@ -157,51 +155,41 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, if (flags 4) { if (vpa 0x7f) return H_PARAMETER; + if (flags = 2 !tvcpu-arch.vpa) + return H_RESOURCE; /* registering new area; convert logical addr to real */ - gfn = vpa PAGE_SHIFT; - memslot = gfn_to_memslot(kvm, gfn); - if (!memslot || !(memslot-flags KVM_MEMSLOT_INVALID)) - return H_PARAMETER; - physp = kvm-arch.slot_phys[memslot-id]; - if (!physp) - return H_PARAMETER; - pg_index = (gfn - memslot-base_gfn) - (kvm-arch.ram_porder - PAGE_SHIFT); - pg_offset = vpa (kvm-arch.ram_psize - 1); - ra = physp[pg_index]; - if (!ra) + va = kvmppc_pin_guest_page(kvm, vpa, nb); + if (va == NULL) return H_PARAMETER; - ra =
[PATCH v3 07/14] KVM: PPC: Only get pages when actually needed, not in prepare_memory_region()
This removes the code from kvmppc_core_prepare_memory_region() that looked up the VMA for the region being added and called hva_to_page to get the pfns for the memory. We have no guarantee that there will be anything mapped there at the time of the KVM_SET_USER_MEMORY_REGION ioctl call; userspace can do that ioctl and then map memory into the region later. Instead we defer looking up the pfn for each memory page until it is needed, which generally means when the guest does an H_ENTER hcall on the page. Since we can't call get_user_pages in real mode, if we don't already have the pfn for the page, kvmppc_h_enter() will return H_TOO_HARD and we then call kvmppc_virtmode_h_enter() once we get back to kernel context. That calls kvmppc_get_guest_page() to get the pfn for the page, and then calls back to kvmppc_h_enter() to redo the HPTE insertion. When the first vcpu starts executing, we need to have the RMO or VRMA region mapped so that the guest's real mode accesses will work. Thus we now have a check in kvmppc_vcpu_run() to see if the RMO/VRMA is set up and if not, call kvmppc_hv_setup_rma(). It checks if the memslot starting at guest physical 0 now has RMO memory mapped there; if so it sets it up for the guest, otherwise on POWER7 it sets up the VRMA. The function that does that, kvmppc_map_vrma, is now a bit simpler, as it calls kvmppc_virtmode_h_enter instead of creating the HPTE itself. Since we are now potentially updating entries in the slot_phys[] arrays from multiple vcpu threads, we now have a spinlock protecting those updates to ensure that we don't lose track of any references to pages. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_book3s.h|4 + arch/powerpc/include/asm/kvm_book3s_64.h | 12 ++ arch/powerpc/include/asm/kvm_host.h |2 + arch/powerpc/include/asm/kvm_ppc.h |4 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 130 +--- arch/powerpc/kvm/book3s_hv.c | 244 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 54 7 files changed, 290 insertions(+), 160 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index a2a89c6..5329c21 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -143,6 +143,10 @@ extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); +extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel); +extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel); extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 300ec04..7e6f2ed 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -101,4 +101,16 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, return rb; } +static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +{ + /* only handle 4k, 64k and 16M pages for now */ + if (!(h HPTE_V_LARGE)) + return 1ul 12; /* 4k page */ + if ((l 0xf000) == 0x1000 cpu_has_feature(CPU_FTR_ARCH_206)) + return 1ul 16; /* 64k page */ + if ((l 0xff000) == 0) + return 1ul 24; /* 16M page */ + return 0; /* error */ +} + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 7a17ab5..beb22ba 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -194,7 +194,9 @@ struct kvm_arch { unsigned long lpcr; unsigned long rmor; struct kvmppc_rma_info *rma; + int rma_setup_done; struct list_head spapr_tce_tables; + spinlock_t slot_phys_lock; unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; int slot_npages[KVM_MEM_SLOTS_NUM]; unsigned short last_vcpu[NR_CPUS]; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index fc2d696..111e1b4 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -121,8 +121,8 @@ extern long kvmppc_alloc_hpt(struct kvm *kvm); extern void kvmppc_free_hpt(struct kvm *kvm); extern long kvmppc_prepare_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem); -extern void kvmppc_map_vrma(struct kvm
[PATCH v3 11/14] KVM: PPC: Implement MMIO emulation support for Book3S HV guests
This provides the low-level support for MMIO emulation in Book3S HV guests. When the guest tries to map a page which is not covered by any memslot, that page is taken to be an MMIO emulation page. Instead of inserting a valid HPTE, we insert an HPTE that has the valid bit clear but another hypervisor software-use bit set, which we call HPTE_V_ABSENT, to indicate that this is an absent page. An absent page is treated much like a valid page as far as guest hcalls (H_ENTER, H_REMOVE, H_READ etc.) are concerned, except of course that an absent HPTE doesn't need to be invalidated with tlbie since it was never valid as far as the hardware is concerned. When the guest accesses a page for which there is an absent HPTE, it will take a hypervisor data storage interrupt (HDSI) since we now set the VPM1 bit in the LPCR. Our HDSI handler for HPTE-not-present faults looks up the hash table and if it finds an absent HPTE mapping the requested virtual address, will switch to kernel mode and handle the fault in kvmppc_book3s_hv_page_fault(), which at present just calls kvmppc_hv_emulate_mmio() to set up the MMIO emulation. This is based on an earlier patch by Benjamin Herrenschmidt, but since heavily reworked. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_book3s.h|5 + arch/powerpc/include/asm/kvm_book3s_64.h | 26 +++ arch/powerpc/include/asm/kvm_host.h |5 + arch/powerpc/include/asm/mmu-hash64.h|2 +- arch/powerpc/include/asm/ppc-opcode.h|4 +- arch/powerpc/include/asm/reg.h |1 + arch/powerpc/kernel/asm-offsets.c|1 + arch/powerpc/kernel/exceptions-64s.S |8 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 228 +-- arch/powerpc/kvm/book3s_hv.c | 21 ++- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 262 ++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 127 --- 12 files changed, 607 insertions(+), 83 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 5329c21..f6329bb 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -121,6 +121,11 @@ extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu); extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); +extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run, + struct kvm_vcpu *vcpu, unsigned long addr, + unsigned long status); +extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, + unsigned long slb_v, unsigned long valid); extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 9508c03..79dc37f 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -43,12 +43,15 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) #define HPT_HASH_MASK (HPT_NPTEG - 1) #endif +#define VRMA_VSID 0x1ffUL /* 1TB VSID reserved for VRMA */ + /* * We use a lock bit in HPTE dword 0 to synchronize updates and * accesses to each HPTE, and another bit to indicate non-present * HPTEs. */ #define HPTE_V_HVLOCK 0x40UL +#define HPTE_V_ABSENT 0x20UL static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits) { @@ -144,6 +147,29 @@ static inline unsigned long hpte_cache_bits(unsigned long pte_val) #endif } +static inline bool hpte_read_permission(unsigned long pp, unsigned long key) +{ + if (key) + return PP_RWRX = pp pp = PP_RXRX; + return 1; +} + +static inline bool hpte_write_permission(unsigned long pp, unsigned long key) +{ + if (key) + return pp == PP_RWRW; + return pp = PP_RWRW; +} + +static inline int hpte_get_skey_perm(unsigned long hpte_r, unsigned long amr) +{ + unsigned long skey; + + skey = ((hpte_r HPTE_R_KEY_HI) 57) | + ((hpte_r HPTE_R_KEY_LO) 9); + return (amr (62 - 2 * skey)) 3; +} + static inline void lock_rmap(unsigned long *rmap) { do { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 97cb2d7..937caca 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -210,6 +210,7 @@ struct kvm_arch { unsigned long lpcr; unsigned long rmor; struct kvmppc_rma_info *rma; + unsigned long vrma_slb_v; int rma_setup_done; struct list_head spapr_tce_tables; spinlock_t slot_phys_lock; @@ -452,6 +453,10 @@ struct
[PATCH v3 04/14] KVM: PPC: Keep page physical addresses in per-slot arrays
This allocates an array for each memory slot that is added to store the physical addresses of the pages in the slot. This array is vmalloc'd and accessed in kvmppc_h_enter using real_vmalloc_addr(). This allows us to remove the ram_pginfo field from the kvm_arch struct, and removes the 64GB guest RAM limit that we had. We use the low-order bits of the array entries to store a flag indicating that we have done get_page on the corresponding page, and therefore need to call put_page when we are finished with the page. Currently this is set for all pages except those in our special RMO regions. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_host.h |9 ++- arch/powerpc/kvm/book3s_64_mmu_hv.c | 18 +++--- arch/powerpc/kvm/book3s_hv.c| 114 +-- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 41 +++- 4 files changed, 107 insertions(+), 75 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 629df2e..7a17ab5 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -38,6 +38,7 @@ #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) #ifdef CONFIG_KVM_MMIO #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -175,25 +176,27 @@ struct revmap_entry { unsigned long guest_rpte; }; +/* Low-order bits in kvm-arch.slot_phys[][] */ +#define KVMPPC_GOT_PAGE0x80 + struct kvm_arch { #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; struct revmap_entry *revmap; - unsigned long ram_npages; unsigned long ram_psize; unsigned long ram_porder; - struct kvmppc_pginfo *ram_pginfo; unsigned int lpid; unsigned int host_lpid; unsigned long host_lpcr; unsigned long sdr1; unsigned long host_sdr1; int tlbie_lock; - int n_rma_pages; unsigned long lpcr; unsigned long rmor; struct kvmppc_rma_info *rma; struct list_head spapr_tce_tables; + unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; + int slot_npages[KVM_MEM_SLOTS_NUM]; unsigned short last_vcpu[NR_CPUS]; struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; #endif /* CONFIG_KVM_BOOK3S_64_HV */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 80ece8d..e4c6069 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -98,16 +98,16 @@ void kvmppc_free_hpt(struct kvm *kvm) void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { unsigned long i; - unsigned long npages = kvm-arch.ram_npages; - unsigned long pfn; + unsigned long npages; + unsigned long pa; unsigned long *hpte; unsigned long hash; unsigned long porder = kvm-arch.ram_porder; struct revmap_entry *rev; - struct kvmppc_pginfo *pginfo = kvm-arch.ram_pginfo; + unsigned long *physp; - if (!pginfo) - return; + physp = kvm-arch.slot_phys[mem-slot]; + npages = kvm-arch.slot_npages[mem-slot]; /* VRMA can't be 1TB */ if (npages 1ul (40 - porder)) @@ -117,9 +117,10 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) npages = HPT_NPTEG; for (i = 0; i npages; ++i) { - pfn = pginfo[i].pfn; - if (!pfn) + pa = physp[i]; + if (!pa) break; + pa = PAGE_MASK; /* can't use hpt_hash since va 64 bits */ hash = (i ^ (VRMA_VSID ^ (VRMA_VSID 25))) HPT_HASH_MASK; /* @@ -131,8 +132,7 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) hash = (hash 3) + 7; hpte = (unsigned long *) (kvm-arch.hpt_virt + (hash 4)); /* HPTE low word - RPN, protection, etc. */ - hpte[1] = (pfn PAGE_SHIFT) | HPTE_R_R | HPTE_R_C | - HPTE_R_M | PP_RWXX; + hpte[1] = pa | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; smp_wmb(); hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID (40 - 16)) | (i (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index da7db14..86d3e4b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -50,14 +50,6 @@ #include linux/vmalloc.h #include linux/highmem.h -/* - * For now, limit memory to 64GB and require it to be large pages. - * This value is chosen because it makes the ram_pginfo array be - * 64kB in size, which is about as large as we want to be trying - * to allocate with kmalloc. -
[PATCH v3 14/14] KVM: PPC: Allow for read-only pages backing a Book3S HV guest
With this, if a guest does an H_ENTER with a read/write HPTE on a page which is currently read-only, we make the actual HPTE inserted be a read-only version of the HPTE. We now intercept protection faults as well as HPTE not found faults, and for a protection fault we work out whether it should be reflected to the guest (e.g. because the guest HPTE didn't allow write access to usermode) or handled by switching to kernel context and calling kvmppc_book3s_hv_page_fault, which will then request write access to the page and update the actual HPTE. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_book3s_64.h | 20 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 39 +++-- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 32 +--- arch/powerpc/kvm/book3s_hv_rmhandlers.S |4 +- 4 files changed, 78 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index c21e46d..b0c08b1 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -121,6 +121,22 @@ static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) return ((ptel HPTE_R_RPN) ~(psize - 1)) PAGE_SHIFT; } +static inline int hpte_is_writable(unsigned long ptel) +{ + unsigned long pp = ptel (HPTE_R_PP0 | HPTE_R_PP); + + return pp != PP_RXRX pp != PP_RXXX; +} + +static inline unsigned long hpte_make_readonly(unsigned long ptel) +{ + if ((ptel HPTE_R_PP0) || (ptel HPTE_R_PP) == PP_RWXX) + ptel = (ptel ~HPTE_R_PP) | PP_RXXX; + else + ptel |= PP_RXRX; + return ptel; +} + static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) { unsigned int wimg = ptel HPTE_R_WIMG; @@ -140,7 +156,7 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) * Lock and read a linux PTE. If it's present and writable, atomically * set dirty and referenced bits and return the PTE, otherwise return 0. */ -static inline pte_t kvmppc_read_update_linux_pte(pte_t *p) +static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing) { pte_t pte, tmp; @@ -158,7 +174,7 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *p) if (pte_present(pte)) { pte = pte_mkyoung(pte); - if (pte_write(pte)) + if (writing pte_write(pte)) pte = pte_mkdirty(pte); } diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 83761dd..66d6452 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -503,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, struct page *page, *pages[1]; long index, ret, npages; unsigned long is_io; + unsigned int writing, write_ok; struct vm_area_struct *vma; /* @@ -553,8 +554,11 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, pfn = 0; page = NULL; pte_size = PAGE_SIZE; + writing = (dsisr DSISR_ISSTORE) != 0; + /* If writing != 0, then the HPTE must allow writing, if we get here */ + write_ok = writing; hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, 1, pages); + npages = get_user_pages_fast(hva, 1, writing, pages); if (npages 1) { /* Check if it's an I/O mapping */ down_read(current-mm-mmap_sem); @@ -565,6 +569,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ((hva - vma-vm_start) PAGE_SHIFT); pte_size = psize; is_io = hpte_cache_bits(pgprot_val(vma-vm_page_prot)); + write_ok = vma-vm_flags VM_WRITE; } up_read(current-mm-mmap_sem); if (!pfn) @@ -575,6 +580,24 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, page = compound_head(page); pte_size = compound_order(page); } + /* if the guest wants write access, see if that is OK */ + if (!writing hpte_is_writable(r)) { + pte_t *ptep, pte; + + /* +* We need to protect against page table destruction +* while looking up and updating the pte. +*/ + rcu_read_lock_sched(); + ptep = find_linux_pte_or_hugepte(current-mm-pgd, +hva, NULL); + if (ptep pte_present(*ptep)) { + pte =
[PATCH v3 06/14] KVM: PPC: Make the H_ENTER hcall more reliable
At present, our implementation of H_ENTER only makes one try at locking each slot that it looks at, and doesn't even retry the ldarx/stdcx. atomic update sequence that it uses to attempt to lock the slot. Thus it can return the H_PTEG_FULL error unnecessarily, particularly when the H_EXACT flag is set, meaning that the caller wants a specific PTEG slot. This improves the situation by making a second pass when no free HPTE slot is found, where we spin until we succeed in locking each slot in turn and then check whether it is full while we hold the lock. If the second pass fails, then we return H_PTEG_FULL. This also moves lock_hpte to a header file (since later commits in this series will need to use it from other source files) and renames it to try_lock_hpte, which is a somewhat less misleading name. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_book3s_64.h | 25 arch/powerpc/kvm/book3s_hv_rm_mmu.c | 63 -- 2 files changed, 59 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fa3dc79..300ec04 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -43,6 +43,31 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) #define HPT_HASH_MASK (HPT_NPTEG - 1) #endif +/* + * We use a lock bit in HPTE dword 0 to synchronize updates and + * accesses to each HPTE, and another bit to indicate non-present + * HPTEs. + */ +#define HPTE_V_HVLOCK 0x40UL + +static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits) +{ + unsigned long tmp, old; + + asm volatile( ldarx %0,0,%2\n + and.%1,%0,%3\n + bne 2f\n + ori %0,%0,%4\n + stdcx. %0,0,%2\n + beq+2f\n + li %1,%3\n +2:isync +: =r (tmp), =r (old) +: r (hpte), r (bits), i (HPTE_V_HVLOCK) +: cc, memory); + return old == 0; +} + static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, unsigned long pte_index) { diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 84dae82..a28a603 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -53,26 +53,6 @@ static void *real_vmalloc_addr(void *x) return __va(addr); } -#define HPTE_V_HVLOCK 0x40UL - -static inline long lock_hpte(unsigned long *hpte, unsigned long bits) -{ - unsigned long tmp, old; - - asm volatile( ldarx %0,0,%2\n - and.%1,%0,%3\n - bne 2f\n - ori %0,%0,%4\n - stdcx. %0,0,%2\n - beq+2f\n - li %1,%3\n -2:isync -: =r (tmp), =r (old) -: r (hpte), r (bits), i (HPTE_V_HVLOCK) -: cc, memory); - return old == 0; -} - long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel) { @@ -126,24 +106,49 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, pteh = ~0x60UL; ptel = ~(HPTE_R_PP0 - kvm-arch.ram_psize); ptel |= pa; + if (pte_index = HPT_NPTE) return H_PARAMETER; if (likely((flags H_EXACT) == 0)) { pte_index = ~7UL; hpte = (unsigned long *)(kvm-arch.hpt_virt + (pte_index 4)); - for (i = 0; ; ++i) { - if (i == 8) - return H_PTEG_FULL; + for (i = 0; i 8; ++i) { if ((*hpte HPTE_V_VALID) == 0 - lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) + try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) break; hpte += 2; } + if (i == 8) { + /* +* Since try_lock_hpte doesn't retry (not even stdcx. +* failures), it could be that there is a free slot +* but we transiently failed to lock it. Try again, +* actually locking each slot and checking it. +*/ + hpte -= 16; + for (i = 0; i 8; ++i) { + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) + cpu_relax(); + if ((*hpte HPTE_V_VALID) == 0) +
Re: [PATCH 1/4] powerpc/kprobe: introduce a new thread flag
On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote: We need to add a new thread flag, TIF_KPROBE/_TIF_DELAYED_KPROBE, for handling kprobe operation while exiting exception. The basic idea is sane, however the instruction emulation isn't per-se kprobe specific. It could be used by xmon too for example. I'd rather use a different name, something like TIF_EMULATE_STACK_STORE or something like that. Cheers, Ben. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/include/asm/thread_info.h |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 836f231..3378734 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -112,6 +112,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_FREEZE 14 /* Freezing for suspend */ #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ #define TIF_RUNLATCH 16 /* Is the runlatch enabled? */ +#define TIF_KPROBE 17 /* Is the delayed kprobe operation? */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1TIF_SYSCALL_TRACE) @@ -130,6 +131,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_FREEZE (1TIF_FREEZE) #define _TIF_SYSCALL_TRACEPOINT (1TIF_SYSCALL_TRACEPOINT) #define _TIF_RUNLATCH(1TIF_RUNLATCH) +#define _TIF_DELAYED_KPROBE (1TIF_KPROBE) #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/4] ppc32/kprobe: introduce copy_exc_stack
On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote: We need a copy mechanism to migrate exception stack. But looks copy_page() already implement this well so we can complete copy_exc_stack() based on that directly. I'd rather you don't hijack copy_page which is quite sensitive. The emulation isn't performance critical so a dumber routine would work fine. Why not use memcpy ? You can call it from assembly. Cheers, Ben. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/include/asm/page_32.h |1 + arch/powerpc/kernel/misc_32.S | 16 +++- arch/powerpc/kernel/ppc_ksyms.c|1 + 3 files changed, 17 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index 68d73b2..2c1fd84 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -40,6 +40,7 @@ struct page; extern void clear_pages(void *page, int order); static inline void clear_page(void *page) { clear_pages(page, 0); } extern void copy_page(void *to, void *from); +extern void copy_exc_stack(void *to, void *from); #include asm-generic/getorder.h diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 998a100..aa02545 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -527,7 +527,7 @@ _GLOBAL(clear_pages) stw r8,12(r3); \ stwur9,16(r3) -_GLOBAL(copy_page) +ready_copy: addir3,r3,-4 addir4,r4,-4 @@ -544,7 +544,21 @@ _GLOBAL(copy_page) dcbtr5,r4 li r11,L1_CACHE_BYTES+4 #endif /* MAX_COPY_PREFETCH */ + blr + +_GLOBAL(copy_exc_stack) + mflrr12 + bl ready_copy + mtlrr12 + li r0,INT_FRAME_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH + b go_copy + +_GLOBAL(copy_page) + mflrr12 + bl ready_copy + mtlrr12 li r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH +go_copy: crclr 4*cr0+eq 2: mtctr r0 diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index f5ae872..2223daf 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -88,6 +88,7 @@ EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__strnlen_user); EXPORT_SYMBOL(copy_page); +EXPORT_SYMBOL(copy_exc_stack); #if defined(CONFIG_PCI) defined(CONFIG_PPC32) EXPORT_SYMBOL(isa_io_base); ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/4] ppc32/kprobe: complete kprobe and migrate exception frame
On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote: We can't emulate stwu since that may corrupt current exception stack. So we will have to do real store operation in the exception return code. Firstly we'll allocate a trampoline exception frame below the kprobed function stack and copy the current exception frame to the trampoline. Then we can do this real store operation to implement 'stwu', and reroute the trampoline frame to r1 to complete this exception migration. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/entry_32.S | 26 ++ 1 files changed, 26 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 56212bc..d56e311 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1185,6 +1185,8 @@ recheck: bne-do_resched andi. r0,r9,_TIF_USER_WORK_MASK beq restore_user + andis. r0,r9,_TIF_DELAYED_KPROBE@h + bne-restore_kprobe Same comment as earlier about name. Note that you're not hooking in the right place. recheck is only reached if you -already- went out of the normal exit path and only when going back to user space unless I'm missing something (which is really the case you don't care about). You need to hook into resume_kernel instead. Also, we may want to simplify the whole thing, instead of checking user vs. kernel first etc... we could instead have a single _TIF_WORK_MASK which includes both the bits for user work and the new bit for kernel work. With preempt, the kernel work bits would also include _TIF_NEED_RESCHED. Then you have in the common exit path, a single test for that, with a fast path that skips everything and just goes to restore for both kernel and user. The only possible issue is the setting of dbcr0 for BookE and 44x and we can keep that as a special case keyed of MSR_PR in the resume path under ifdef BOOKE (we'll probably sanitize that later with some different rework anyway). So the exit path because something like: ret_from_except: .. hard disable interrupts (unchanged) ... read TIF flags andi with _TIF_WORK_MASK nothing set - restore check PR set - do_work_user no set - do_work_kernel (kprobes preempt) (both loop until relevant _TIF flags are all clear) restore: #ifdef BOOKE 44x test PR do dbcr0 stuff if needed ... nornal restore ... do_user_signal: /* r10 contains MSR_KERNEL here */ ori r10,r10,MSR_EE SYNC @@ -1202,6 +1204,30 @@ do_user_signal:/* r10 contains MSR_KERNEL here */ REST_NVGPRS(r1) b recheck +restore_kprobe: + lwz r3,GPR1(r1) + subir3,r3,INT_FRAME_SIZE; /* Allocate a trampoline exception frame */ + mr r4,r1 + bl copy_exc_stack /* Copy from the original to the trampoline */ + + /* Do real stw operation to complete stwu */ + mr r4,r1 + addir4,r4,INT_FRAME_SIZE/* Get kprobed entry */ + lwz r5,GPR1(r1) /* Backup r1 */ + stw r4,GPR1(r1) /* Now store that safely */ The above confuses me. Shouldn't you do instead something like lwz r4,GPR1(r1) subir3,r4,INT_FRAME_SIZE li r5,INT_FRAME_SIZE bl memcpy To start with, then you need to know the old r1 value which may or may not be related to your current r1. The emulation code should stash it into the int frame in an unused slot such as orig_gpr3 (since that only pertains to restarting syscalls which we aren't doing here). Then you adjust your r1 and do something like lwz r3,GPR1(r1) lwz r0,ORIG_GPR3(r1) stw r0,0(r3) To perform the store, before doing the rest: + /* Reroute the trampoline frame to r1 */ + subir5,r5,INT_FRAME_SIZE + mr r1,r5 + + /* Clear _TIF_DELAYED_KPROBE flag */ + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + lwz r0,TI_FLAGS(r9) + rlwinm r0,r0,0,_TIF_DELAYED_KPROBE + stw r0,TI_FLAGS(r9) + + b restore + /* * We come here when we are at the end of handling an exception * that occurred at a place where taking an exception will lose Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/85xx: re-enable the ePAPR byte channel driver in corenet32_smp_defconfig
Commit 7c4b2f09 (powerpc: Update mpc85xx/corenet 32-bit defconfigs) accidentally disabled the ePAPR byte channel driver in the defconfig for Freescale CoreNet platforms. Signed-off-by: Timur Tabi ti...@freescale.com --- arch/powerpc/configs/corenet32_smp_defconfig |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig index 8ff6302..ebf42ae 100644 --- a/arch/powerpc/configs/corenet32_smp_defconfig +++ b/arch/powerpc/configs/corenet32_smp_defconfig @@ -105,6 +105,7 @@ CONFIG_FSL_PQ_MDIO=y # CONFIG_INPUT_MOUSE is not set CONFIG_SERIO_LIBPS2=y # CONFIG_LEGACY_PTYS is not set +CONFIG_PPC_EPAPR_HV_BYTECHAN=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_EXTENDED=y -- 1.7.3.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/fsl: add MSI support for the Freescale hypervisor
Add support for MSIs under the Freescale hypervisor. This involves updating the fsl_pci driver to support vmpic-msi nodes, and updating the fsl_pci driver to create an ATMU for the rerouted MSIIR register. Signed-off-by: Timur Tabi ti...@freescale.com --- arch/powerpc/sysdev/fsl_msi.c | 68 + arch/powerpc/sysdev/fsl_msi.h |7 ++-- arch/powerpc/sysdev/fsl_pci.c | 25 +++ 3 files changed, 77 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 89548e0..7dc473f 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -23,6 +23,8 @@ #include asm/hw_irq.h #include asm/ppc-pci.h #include asm/mpic.h +#include asm/fsl_hcalls.h + #include fsl_msi.h #include fsl_pci.h @@ -163,11 +165,13 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) */ np = of_parse_phandle(hose-dn, fsl,msi, 0); if (np) { - if (of_device_is_compatible(np, fsl,mpic-msi)) + if (of_device_is_compatible(np, fsl,mpic-msi) || + of_device_is_compatible(np, fsl,vmpic-msi)) phandle = np-phandle; else { - dev_err(pdev-dev, node %s has an invalid fsl,msi -phandle\n, hose-dn-full_name); + dev_err(pdev-dev, + node %s has an invalid fsl,msi phandle %u\n, + hose-dn-full_name, np-phandle); return -EINVAL; } } @@ -196,16 +200,14 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) if (hwirq 0) { rc = hwirq; - pr_debug(%s: fail allocating msi interrupt\n, - __func__); + dev_err(pdev-dev, could not allocate MSI interrupt\n); goto out_free; } virq = irq_create_mapping(msi_data-irqhost, hwirq); if (virq == NO_IRQ) { - pr_debug(%s: fail mapping hwirq 0x%x\n, - __func__, hwirq); + dev_err(pdev-dev, fail mapping hwirq %i\n, hwirq); msi_bitmap_free_hwirqs(msi_data-bitmap, hwirq, 1); rc = -ENOSPC; goto out_free; @@ -234,6 +236,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) u32 intr_index; u32 have_shift = 0; struct fsl_msi_cascade_data *cascade_data; + unsigned int ret; cascade_data = irq_get_handler_data(irq); msi_data = cascade_data-msi_data; @@ -265,6 +268,14 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) case FSL_PIC_IP_IPIC: msir_value = fsl_msi_read(msi_data-msi_regs, msir_index * 0x4); break; + case FSL_PIC_IP_VMPIC: + ret = fh_vmpic_get_msir(virq_to_hw(irq), msir_value); + if (ret) { + pr_err(fsl-msi: fh_vmpic_get_msir() failed for + irq %u (ret=%u)\n, irq, ret); + msir_value = 0; + } + break; } while (msir_value) { @@ -282,6 +293,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) switch (msi_data-feature FSL_PIC_IP_MASK) { case FSL_PIC_IP_MPIC: + case FSL_PIC_IP_VMPIC: chip-irq_eoi(idata); break; case FSL_PIC_IP_IPIC: @@ -311,7 +323,8 @@ static int fsl_of_msi_remove(struct platform_device *ofdev) } if (msi-bitmap.bitmap) msi_bitmap_free(msi-bitmap); - iounmap(msi-msi_regs); + if ((msi-feature FSL_PIC_IP_MASK) != FSL_PIC_IP_VMPIC) + iounmap(msi-msi_regs); kfree(msi); return 0; @@ -383,26 +396,32 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev) goto error_out; } - /* Get the MSI reg base */ - err = of_address_to_resource(dev-dev.of_node, 0, res); - if (err) { - dev_err(dev-dev, %s resource error!\n, + /* +* Under the Freescale hypervisor, the msi nodes don't have a 'reg' +* property. Instead, we use hypercalls to access the MSI. +*/ + if ((features-fsl_pic_ip FSL_PIC_IP_MASK) != FSL_PIC_IP_VMPIC) { + err = of_address_to_resource(dev-dev.of_node, 0, res); + if (err) { + dev_err(dev-dev, invalid resource for node %s\n, dev-dev.of_node-full_name); - goto error_out; - } + goto error_out; + } - msi-msi_regs =
Re: [PATCH] powerpc/fsl: add MSI support for the Freescale hypervisor
On 12/12/2011 05:37 PM, Timur Tabi wrote: @@ -205,6 +207,29 @@ static void __init setup_pci_atmu(struct pci_controller *hose, /* Setup inbound mem window */ mem = memblock_end_of_DRAM(); + + /* + * The msi-address-64 property, if it exists, indicates the physical + * address of the MSIIR register. Normally, this register is located + * inside CCSR, so the ATMU that covers all of CCSR is used for MSIs. + * But if this property exists, then we'll normally need to create a + * new ATMU for it. For now, however, we cheat. The only entity that + * creates this property is the Freescale hypervisor, and it + * always locates MSIIR in the page immediately after the end of DDR. + * So we can avoid allocating a new ATMU by just extending the DDR + * ATMU by one page. + */ Technically, it's up to the hv config file where MSIIR gets mapped. After main memory is just a common way of configuring it, but won't work if we're limiting the partition's memory to end at an unusual address. Might also want to comment that the reason for this weird remapping is hardware limitations in the IOMMU. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/fsl: add MSI support for the Freescale hypervisor
Scott Wood wrote: Technically, it's up to the hv config file where MSIIR gets mapped. After main memory is just a common way of configuring it, but won't work if we're limiting the partition's memory to end at an unusual address. I'll change the comment to reflect this. Why can't we have the hypervisor always put MSIIR at the end of DDR, and not make it configurable? -- Timur Tabi Linux kernel developer at Freescale ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/fsl: add MSI support for the Freescale hypervisor
On 12/12/2011 06:27 PM, Tabi Timur-B04825 wrote: Scott Wood wrote: Technically, it's up to the hv config file where MSIIR gets mapped. After main memory is just a common way of configuring it, but won't work if we're limiting the partition's memory to end at an unusual address. I'll change the comment to reflect this. Why can't we have the hypervisor always put MSIIR at the end of DDR, and not make it configurable? ...but won't work if we're limiting the partition's memory to end at an unusual address. We have to live with PAMU's iova limitations. PAMU setup is user-controlled in general under Topaz. How's the hypervisor even going to know if the mem= kernel command line argument is used to change the end of main memory (assuming that's been taken into account by this point in the boot sequence)? What if the user put a shared memory region immediately after the main partition memory? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/fsl: add MSI support for the Freescale hypervisor
Scott Wood wrote: How's the hypervisor even going to know if the mem= kernel command line argument is used to change the end of main memory (assuming that's been taken into account by this point in the boot sequence)? What if the user put a shared memory region immediately after the main partition memory? Alright, I'll need to add support for detached MSIIR addresses, but for now I think this patch is okay. It's the same level of functionality that we provide on the SDK. -- Timur Tabi Linux kernel developer at Freescale ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 01/16 v3] pmac_zilog: fix unexpected irq
On Tue, 13 Dec 2011, Benjamin Herrenschmidt wrote: On Tue, 2011-12-13 at 00:34 +1100, Finn Thain wrote: On Mon, 12 Dec 2011, Benjamin Herrenschmidt wrote: Any chance you can test this patch ? I would not be surprised if it broke m68k since I had to do some of the changes in there blind, so let me know... with this, I can again suspend/resume properly on a Pismo while using the internal modem among other things. The patch works on a PowerBook 520 given a few changes (below). This PowerBook only has one serial port that I can test (the internal modem is not supported on 68k Macs). Interesting. The modem is a soft-modem geoport or a hw serial modem ? It's the latter. In the later case it's probably just a matter of finding the right GPIO bit in Apple ASIC to turn the power on :-) Surely feasible, but not high on the list of missing hardware support. Can you test a machine with two ports? The rest of my Mac hardware is in storage since I moved house last week. I tried on 2 port powermacs, but I only have one adapter, so I've basically been running with one serial port open and shooting irda frame on the other (with nothing to check wether I got the frames on the other hand), oh well ... I'll apply your patch and commit via my tree. I forgot to include this fix for your logging change. Finn Index: linux-git/drivers/tty/serial/pmac_zilog.c === --- linux-git.orig/drivers/tty/serial/pmac_zilog.c 2011-12-13 12:12:05.0 +1100 +++ linux-git/drivers/tty/serial/pmac_zilog.c 2011-12-13 12:13:29.0 +1100 @@ -99,6 +99,10 @@ MODULE_LICENSE(GPL); #define PMACZILOG_NAME ttyPZ #endif +#define pmz_debug(fmt, arg...) pr_debug(PMACZILOG_NAME %d: fmt, uap-port.line, ## arg) +#define pmz_error(fmt, arg...) pr_err(PMACZILOG_NAME %d: fmt, uap-port.line, ## arg) +#define pmz_info(fmt, arg...) pr_info(PMACZILOG_NAME %d: fmt, uap-port.line, ## arg) + /* * For the sake of early serial console, we can do a pre-probe Index: linux-git/drivers/tty/serial/pmac_zilog.h === --- linux-git.orig/drivers/tty/serial/pmac_zilog.h 2011-12-13 12:12:05.0 +1100 +++ linux-git/drivers/tty/serial/pmac_zilog.h 2011-12-13 12:12:28.0 +1100 @@ -1,10 +1,6 @@ #ifndef __PMAC_ZILOG_H__ #define __PMAC_ZILOG_H__ -#define pmz_debug(fmt, arg...) pr_debug(ttyPZ%d: fmt, uap-port.line, ## arg) -#define pmz_error(fmt, arg...) pr_err(ttyPZ%d: fmt, uap-port.line, ## arg) -#define pmz_info(fmt, arg...) pr_info(ttyPZ%d: fmt, uap-port.line, ## arg) - /* * At most 2 ESCCs with 2 ports each */ ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [UPDATED] [PATCH v4 3/7] [ppc] Process dynamic relocations for kernel
On 12/11/11 01:32, Segher Boessenkool wrote: Hi Suzuki, Looks quite good, a few comments... +get_type: + /* r4 holds the relocation type */ + extrwi r4, r4, 8, 24 /* r4 = ((char*)r4)[3] */ This comment is confusing (only makes sense together with the lwz a long way up). Agree, will fix them. +nxtrela: + /* + * We have to flush the modified instructions to the + * main storage from the d-cache. And also, invalidate the + * cached instructions in i-cache which has been modified. + * + * We delay the msync / isync operation till the end, since + * we won't be executing the modified instructions until + * we return from here. + */ + dcbst r4,r7 + icbi r4,r7 You still need a sync between these two. Without it, the icbi can complete before the dcbst for the same address does, which leaves room for an instruction fetch from that address to get old data. Ok. + cmpwi r8, 0 /* relasz = 0 ? */ + ble done + add r9, r9, r6 /* move to next entry in the .rela table */ + subf r8, r6, r8 /* relasz -= relaent */ + b applyrela + +done: + msync /* Wait for the flush to finish */ The instruction is called sync. msync is a BookE thing. next if (/R_PPC64_RELATIVE/ or /R_PPC64_NONE/ or /R_PPC64_ADDR64\s+mach_/); + next if (/R_PPC_ADDR16_LO/ or /R_PPC_ADDR16_HI/ or + /R_PPC_ADDR16_HA/ or /R_PPC_RELATIVE/); Nothing new, but these should probably have \b or \s or just a space on each side. Will fix this too. Also will include the R_PPC_NONE to the list of valid relocations. Thanks Suzuki Segher ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip
于 2011年12月13日 05:30, Scott Wood 写道: On 12/12/2011 03:19 PM, Artem Bityutskiy wrote: On Mon, 2011-12-12 at 15:15 -0600, Scott Wood wrote: NAND chips come from the factory with bad blocks marked at a certain offset into each page. This offset is normally in the OOB area, but since we change the layout from 4k data, 128 byte oob to 2k data, 64 byte oob, 2k data, 64 byte oob the marker is no longer in the oob. On first use we need to migrate the markers so that they are still in the oob. Ah, I see, thanks. Are you planning to implement in-kernel migration or use a user-space tool? That's the kind of answer I was hoping to get from Shuo. :-) OK, I try to do this. Wait for a couple of days. -LiuShuo Most likely is a firmware-based tool, but I'd like there to be some way for the tool to mark that this has happened, so that the Linux driver can refuse to do non-raw accesses to a chip that isn't marked as having been migrated (or at least yell loudly in the log). Speaking of raw accesses, these are currently broken in the eLBC driver... we need some way for the generic layer to tell us what kind of access it is before the transaction starts, not once it wants to read out the buffer (unless we add more hacks to delay the start of a read transaction until first buffer access...). We'd be better off with a high-level read page/write page function that does the whole thing (not just buffer access, but command issuance as well). -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc: Fix swiotlb ops for ppc64
On Dec 7, 2011, at 11:46 PM, Kumar Gala wrote: On Dec 7, 2011, at 9:23 PM, Benjamin Herrenschmidt wrote: On Wed, 2011-12-07 at 11:19 -0600, Kumar Gala wrote: struct dma_map_ops swiotlb_dma_ops = { +#ifdef CONFIG_PPC64 + .alloc_coherent = swiotlb_alloc_coherent, + .free_coherent = swiotlb_free_coherent, +#else .alloc_coherent = dma_direct_alloc_coherent, .free_coherent = dma_direct_free_coherent, +#endif .map_sg = swiotlb_map_sg_attrs, .unmap_sg = swiotlb_unmap_sg_attrs, .dma_supported = swiotlb_dma_supported, Do we really need the ifdef ? What happens if we use swiotlb_alloc_coherent() on ppc32 ? Won't it allocate lowmem, realize that it doesn't need bouncing and be happy ? Cheers, Ben. Becky any comment? I know its been a while, but wondering if you had any reason to not do what Ben's suggesting ? Well, as you say, it's been a while, and but I think: 1) dma_direct_alloc_coherent strips GFP_HIGHMEM out of the flags field when calling the actual allocator and the iotlb version does not. I don't know how much this matters - I did a quick grep and I don't see any users that specify that, but somebody went through the trouble of putting it in there in the first place and without knowing why I wasn't willing to get rid of it. Now, since my patch it looks like someone added a VM_BUG_ON into __get_free_pages() if GFP_HIGHMEM so this would get caught. However, I don't know if we really want to throw a bug there. 2) The iotlb code doesn't deal with the !coherent parts like 8xx. We can work around that by setting up the dma_ops differently for that case instead. -Becky ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc: Fix swiotlb ops for ppc64
On Mon, 2011-12-12 at 21:55 -0600, Becky Bruce wrote: 1) dma_direct_alloc_coherent strips GFP_HIGHMEM out of the flags field when calling the actual allocator and the iotlb version does not. I don't know how much this matters - I did a quick grep and I don't see any users that specify that, but somebody went through the trouble of putting it in there in the first place and without knowing why I wasn't willing to get rid of it. Now, since my patch it looks like someone added a VM_BUG_ON into __get_free_pages() if GFP_HIGHMEM so this would get caught. However, I don't know if we really want to throw a bug there. 2) The iotlb code doesn't deal with the !coherent parts like 8xx. We can work around that by setting up the dma_ops differently for that case instead. Does the rest of it handle them ? I mean swiotlb_map_sg_attrs etc... If not then it's broken anyway so may as well not care for now. Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/4] ppc32/kprobe: complete kprobe and migrate exception frame
Benjamin Herrenschmidt wrote: On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote: We can't emulate stwu since that may corrupt current exception stack. So we will have to do real store operation in the exception return code. Firstly we'll allocate a trampoline exception frame below the kprobed function stack and copy the current exception frame to the trampoline. Then we can do this real store operation to implement 'stwu', and reroute the trampoline frame to r1 to complete this exception migration. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/entry_32.S | 26 ++ 1 files changed, 26 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 56212bc..d56e311 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1185,6 +1185,8 @@ recheck: bne-do_resched andi. r0,r9,_TIF_USER_WORK_MASK beq restore_user +andis. r0,r9,_TIF_DELAYED_KPROBE@h +bne-restore_kprobe Same comment as earlier about name. Note that you're not hooking in the right place. recheck is only reached if you -already- went out of the normal exit path and only when going back to user space unless I'm missing something (which is really the case you don't care about). You need to hook into resume_kernel instead. Maybe I'm misunderstanding what you mean since as I recall you suggestion we should do this at the end of do_work. Also, we may want to simplify the whole thing, instead of checking user vs. kernel first etc... we could instead have a single _TIF_WORK_MASK which includes both the bits for user work and the new bit for kernel work. With preempt, the kernel work bits would also include _TIF_NEED_RESCHED. Then you have in the common exit path, a single test for that, with a fast path that skips everything and just goes to restore for both kernel and user. The only possible issue is the setting of dbcr0 for BookE and 44x and we can keep that as a special case keyed of MSR_PR in the resume path under ifdef BOOKE (we'll probably sanitize that later with some different rework anyway). So the exit path because something like: ret_from_except: .. hard disable interrupts (unchanged) ... read TIF flags andi with _TIF_WORK_MASK nothing set - restore check PR set - do_work_user no set - do_work_kernel (kprobes preempt) (both loop until relevant _TIF flags are all clear) restore: #ifdef BOOKE 44x test PR do dbcr0 stuff if needed ... nornal restore ... Do you mean we should reorganize current ret_from_except for ppc32 as well? do_user_signal: /* r10 contains MSR_KERNEL here */ ori r10,r10,MSR_EE SYNC @@ -1202,6 +1204,30 @@ do_user_signal: /* r10 contains MSR_KERNEL here */ REST_NVGPRS(r1) b recheck +restore_kprobe: +lwz r3,GPR1(r1) +subir3,r3,INT_FRAME_SIZE; /* Allocate a trampoline exception frame */ +mr r4,r1 +bl copy_exc_stack /* Copy from the original to the trampoline */ + +/* Do real stw operation to complete stwu */ +mr r4,r1 +addir4,r4,INT_FRAME_SIZE/* Get kprobed entry */ +lwz r5,GPR1(r1) /* Backup r1 */ +stw r4,GPR1(r1) /* Now store that safely */ The above confuses me. Shouldn't you do instead something like lwz r4,GPR1(r1) subir3,r4,INT_FRAME_SIZE li r5,INT_FRAME_SIZE bl memcpy Anyway I'll try this if you think memcpy is fine/safe in exception return codes. To start with, then you need to know the old r1 value which may or may not be related to your current r1. The emulation code should stash it If the old r1 is not related to our current r1, it shouldn't be possible to go restore_kprob since we set that new flag only for the current. If I'm wrong please correct me :) Thanks Tiejun into the int frame in an unused slot such as orig_gpr3 (since that only pertains to restarting syscalls which we aren't doing here). Then you adjust your r1 and do something like lwz r3,GPR1(r1) lwz r0,ORIG_GPR3(r1) stw r0,0(r3) To perform the store, before doing the rest: +/* Reroute the trampoline frame to r1 */ +subir5,r5,INT_FRAME_SIZE +mr r1,r5 + +/* Clear _TIF_DELAYED_KPROBE flag */ +rlwinm r9,r1,0,0,(31-THREAD_SHIFT) +lwz r0,TI_FLAGS(r9) +rlwinm r0,r0,0,_TIF_DELAYED_KPROBE +stw r0,TI_FLAGS(r9) + +b restore + /* * We come here when we are at the end of handling an exception * that occurred at a place where taking an exception will lose ___ Linuxppc-dev mailing list
Re: [PATCH 1/4] powerpc/kprobe: introduce a new thread flag
Benjamin Herrenschmidt wrote: On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote: We need to add a new thread flag, TIF_KPROBE/_TIF_DELAYED_KPROBE, for handling kprobe operation while exiting exception. The basic idea is sane, however the instruction emulation isn't per-se kprobe specific. It could be used by xmon too for example. I'd rather use a different name, something like TIF_EMULATE_STACK_STORE or Its good term so I'll use this directly :) Thanks Tiejun something like that. Cheers, Ben. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/include/asm/thread_info.h |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 836f231..3378734 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -112,6 +112,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_FREEZE 14 /* Freezing for suspend */ #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ #define TIF_RUNLATCH16 /* Is the runlatch enabled? */ +#define TIF_KPROBE 17 /* Is the delayed kprobe operation? */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1TIF_SYSCALL_TRACE) @@ -130,6 +131,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_FREEZE (1TIF_FREEZE) #define _TIF_SYSCALL_TRACEPOINT (1TIF_SYSCALL_TRACEPOINT) #define _TIF_RUNLATCH (1TIF_RUNLATCH) +#define _TIF_DELAYED_KPROBE (1TIF_KPROBE) #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/4] ppc32/kprobe: introduce copy_exc_stack
Benjamin Herrenschmidt wrote: On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote: We need a copy mechanism to migrate exception stack. But looks copy_page() already implement this well so we can complete copy_exc_stack() based on that directly. I'd rather you don't hijack copy_page which is quite sensitive. The emulation isn't performance critical so a dumber routine would work Yes, I just think we should introduce good performance so I 'steal' the original copy_page(). fine. Why not use memcpy ? You can call it from assembly. I'd like to switch to memcpy. Thanks Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/1] ppc64: fix missing to check all bits of _TIF_USER_WORK_MASK in preempt
Tiejun Chen wrote: In entry_64.S version of ret_from_except_lite, you'll notice that in the !preempt case, after we've checked MSR_PR we test for any TIF flag in _TIF_USER_WORK_MASK to decide whether to go to do_work or not. However, in the preempt case, we do a convoluted trick to test SIGPENDING only if PR was set and always test NEED_RESCHED ... but we forget to test any other bit of _TIF_USER_WORK_MASK !!! So that means that with preempt, we completely fail to test for things like single step, syscall tracing, etc... This should be fixed as the following path: - Test PR. If set, go to test_work_user, else continue. - In test_work_user, always test for _TIF_USER_WORK_MASK to decide to go to do_work, maybe call it do_user_work - In test_work_kernel, test for _TIF_KERNEL_WORK_MASK which is set to our new flag along with NEED_RESCHED if preempt is enabled and branch to do_kernel_work. Ben, Any comment for this? Tiejun Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/entry_64.S | 33 +++-- 1 files changed, 15 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d834425..9e70b9a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -571,27 +571,26 @@ _GLOBAL(ret_from_except_lite) mtmsrd r9,1/* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ -#ifdef CONFIG_PREEMPT - clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ - li r0,_TIF_NEED_RESCHED/* bits to check */ - ld r3,_MSR(r1) - ld r4,TI_FLAGS(r9) - /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */ - rlwimi r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING - and.r0,r4,r0/* check NEED_RESCHED and maybe SIGPENDING */ - bne do_work - -#else /* !CONFIG_PREEMPT */ ld r3,_MSR(r1) /* Returning to user mode? */ andi. r3,r3,MSR_PR - beq restore /* if not, just restore regs and return */ + bne test_work_user + clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ + li r0,_TIF_USER_WORK_MASK +#ifdef CONFIG_PREEMPT + ori r0,r0,_TIF_NEED_RESCHED +#endif + ld r4,TI_FLAGS(r9) + and.r0,r4,r0/* check NEED_RESCHED and maybe _TIF_USER_WORK_MASK */ + bne do_kernel_work + b restore /* if so, just restore regs and return */ + +test_work_user: /* Check current_thread_info()-flags */ clrrdi r9,r1,THREAD_SHIFT ld r4,TI_FLAGS(r9) andi. r0,r4,_TIF_USER_WORK_MASK - bne do_work -#endif + bne do_user_work restore: BEGIN_FW_FTR_SECTION @@ -693,10 +692,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) b .ret_from_except_lite /* loop back and handle more */ #endif -do_work: +do_kernel_work: #ifdef CONFIG_PREEMPT - andi. r0,r3,MSR_PR/* Returning to user mode? */ - bne user_work /* Check that preempt_count() == 0 and interrupts are enabled */ lwz r8,TI_PREEMPT(r9) cmpwi cr1,r8,0 @@ -738,9 +735,9 @@ do_work: bne 1b b restore -user_work: #endif /* CONFIG_PREEMPT */ +do_user_work: /* Enable interrupts */ #ifdef CONFIG_PPC_BOOK3E wrteei 1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/1] ppc64: fix missing to check all bits of _TIF_USER_WORK_MASK in preempt
On Tue, 2011-12-13 at 13:01 +0800, tiejun.chen wrote: Tiejun Chen wrote: In entry_64.S version of ret_from_except_lite, you'll notice that in the !preempt case, after we've checked MSR_PR we test for any TIF flag in _TIF_USER_WORK_MASK to decide whether to go to do_work or not. However, in the preempt case, we do a convoluted trick to test SIGPENDING only if PR was set and always test NEED_RESCHED ... but we forget to test any other bit of _TIF_USER_WORK_MASK !!! So that means that with preempt, we completely fail to test for things like single step, syscall tracing, etc... This should be fixed as the following path: - Test PR. If set, go to test_work_user, else continue. - In test_work_user, always test for _TIF_USER_WORK_MASK to decide to go to do_work, maybe call it do_user_work - In test_work_kernel, test for _TIF_KERNEL_WORK_MASK which is set to our new flag along with NEED_RESCHED if preempt is enabled and branch to do_kernel_work. Ben, Any comment for this? Sorry, I didn't get to review that one yet (nor reply to your newer responses), I have very sore eyes and basically had to get off the computer. Hopefully I'll be better tomorrow. Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/1] ppc64: fix missing to check all bits of _TIF_USER_WORK_MASK in preempt
Benjamin Herrenschmidt wrote: On Tue, 2011-12-13 at 13:01 +0800, tiejun.chen wrote: Tiejun Chen wrote: In entry_64.S version of ret_from_except_lite, you'll notice that in the !preempt case, after we've checked MSR_PR we test for any TIF flag in _TIF_USER_WORK_MASK to decide whether to go to do_work or not. However, in the preempt case, we do a convoluted trick to test SIGPENDING only if PR was set and always test NEED_RESCHED ... but we forget to test any other bit of _TIF_USER_WORK_MASK !!! So that means that with preempt, we completely fail to test for things like single step, syscall tracing, etc... This should be fixed as the following path: - Test PR. If set, go to test_work_user, else continue. - In test_work_user, always test for _TIF_USER_WORK_MASK to decide to go to do_work, maybe call it do_user_work - In test_work_kernel, test for _TIF_KERNEL_WORK_MASK which is set to our new flag along with NEED_RESCHED if preempt is enabled and branch to do_kernel_work. Ben, Any comment for this? Sorry, I didn't get to review that one yet (nor reply to your newer I'm nothing, please do this when you're fine completely. Thanks Tiejun responses), I have very sore eyes and basically had to get off the computer. Hopefully I'll be better tomorrow. Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc: Fix comment explaining our VSID layout
We support 16TB of user address space and half a million contexts so update the comment to reflect this. Signed-off-by: Anton Blanchard an...@samba.org --- Index: linux-powerpc/arch/powerpc/include/asm/mmu-hash64.h === --- linux-powerpc.orig/arch/powerpc/include/asm/mmu-hash64.h2011-12-13 14:47:14.498301148 +1100 +++ linux-powerpc/arch/powerpc/include/asm/mmu-hash64.h 2011-12-13 14:58:01.085510915 +1100 @@ -312,10 +312,9 @@ extern void slb_set_size(u16 size); * (i.e. everything above 0xC000), except the very top * segment, which simplifies several things. * - * - We allow for 15 significant bits of ESID and 20 bits of - * context for user addresses. i.e. 8T (43 bits) of address space for - * up to 1M contexts (although the page table structure and context - * allocation will need changes to take advantage of this). + * - We allow for 16 significant bits of ESID and 19 bits of + * context for user addresses. i.e. 16T (44 bits) of address space for + * up to half a million contexts. * * - The scramble function gives robust scattering in the hash * table (at least based on some initial results). The previous ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 01/19] mxc_udc: add workaround for ENGcm09152 for i.MX25
this patch gives the possibility to workaround bug ENGcm09152 on i.MX25 when the hardware workaround is also implemented on the board. It covers the workaround described on page 42 of the following Errata : http://cache.freescale.com/files/dsp/doc/errata/IMX25CE.pdf Signed-off-by: Eric Bénard e...@eukrea.com Cc: Sascha Hauer ker...@pengutronix.de Cc: Greg Kroah-Hartman gre...@suse.de Cc: Li Yang le...@freescale.com --- drivers/usb/gadget/fsl_mxc_udc.c | 22 +- 1 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/usb/gadget/fsl_mxc_udc.c b/drivers/usb/gadget/fsl_mxc_udc.c index dcbc0a2..4aff05d 100644 --- a/drivers/usb/gadget/fsl_mxc_udc.c +++ b/drivers/usb/gadget/fsl_mxc_udc.c @@ -23,7 +23,7 @@ static struct clk *mxc_ahb_clk; static struct clk *mxc_usb_clk; -/* workaround ENGcm09152 for i.MX35 */ +/* workaround ENGcm09152 for i.MX25/35 */ #define USBPHYCTRL_OTGBASE_OFFSET 0x608 #define USBPHYCTRL_EVDO(1 23) @@ -89,16 +89,20 @@ eenahb: void fsl_udc_clk_finalize(struct platform_device *pdev) { struct fsl_usb2_platform_data *pdata = pdev-dev.platform_data; - if (cpu_is_mx35()) { + if (cpu_is_mx25() || cpu_is_mx35()) { unsigned int v; - - /* workaround ENGcm09152 for i.MX35 */ + void __iomem *otgbase; + if (cpu_is_mx25()) + otgbase = MX25_IO_ADDRESS(MX25_USB_BASE_ADDR + + USBPHYCTRL_OTGBASE_OFFSET); + else if (cpu_is_mx35()) + otgbase = MX35_IO_ADDRESS(MX35_USB_BASE_ADDR + + USBPHYCTRL_OTGBASE_OFFSET); + + /* workaround ENGcm09152 for i.MX25/35 */ if (pdata-workaround FLS_USB2_WORKAROUND_ENGCM09152) { - v = readl(MX35_IO_ADDRESS(MX35_USB_BASE_ADDR + - USBPHYCTRL_OTGBASE_OFFSET)); - writel(v | USBPHYCTRL_EVDO, - MX35_IO_ADDRESS(MX35_USB_BASE_ADDR + - USBPHYCTRL_OTGBASE_OFFSET)); + v = readl(otgbase); + writel(v | USBPHYCTRL_EVDO, otgbase); } } -- 1.7.6.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Linux port availability for p5010 processor
Do we have a linux port available for freescale P5010 processor (with single E5500 core) ? *(found arch/powerpc/platforms/pseries ; and a some details on kernel/cputable.c *) Is there any reference board which uses this processor ? any reference in DTS file also will be helpful. Thanks Vineeth ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev