[PATCH 3/3] powerpc: build-time sort exception table
Signed-off-by: Nicholas Piggin--- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/module.h | 4 scripts/sortextable.c | 2 ++ 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5d43cb8..b49062b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -80,6 +80,7 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK config PPC bool default y + select BUILDTIME_EXTABLE_SORT select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select BINFMT_ELF diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index cd4ffd8..cc12c61 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -90,10 +90,6 @@ static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sec } #endif -struct exception_table_entry; -void sort_ex_table(struct exception_table_entry *start, - struct exception_table_entry *finish); - #if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64) #define ARCH_RELOCATES_KCRCTAB #define reloc_start PHYSICAL_START diff --git a/scripts/sortextable.c b/scripts/sortextable.c index f453b7c..365a907 100644 --- a/scripts/sortextable.c +++ b/scripts/sortextable.c @@ -316,6 +316,8 @@ do_file(char const *const fname) case EM_S390: case EM_AARCH64: case EM_PARISC: + case EM_PPC: + case EM_PPC64: custom_sort = sort_relative_table; break; case EM_ARCOMPACT: -- 2.9.3
[PATCH 2/3] powerpc: relative exception tables
This halves the exception table size on 64-bit builds, and it allows build-time sorting of exception tables to work on relocated kernels. Signed-off-by: Nicholas Piggin--- arch/powerpc/include/asm/linkage.h| 20 - arch/powerpc/include/asm/uaccess.h| 27 ++- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/platforms/embedded6xx/holly.c| 2 +- arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c | 2 +- arch/powerpc/sysdev/fsl_rio.c | 2 +- 8 files changed, 33 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h index fcb9e0d..6898bf5 100644 --- a/arch/powerpc/include/asm/linkage.h +++ b/arch/powerpc/include/asm/linkage.h @@ -16,20 +16,20 @@ /* * Helper macro for exception table entries */ -#define EX_TABLE(_fault, _target) \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG #_fault "\n" \ - PPC_LONG #_target "\n" \ +#define EX_TABLE(_fault, _target) \ + ".section __ex_table,\"a\"\n" \ + ".balign 4\n" \ + ".long (" #_fault ") - . \n" \ + ".long (" #_target ") - . \n" \ ".previous\n" #else /* __ASSEMBLY__ */ -#define EX_TABLE(_fault, _target) \ - .section __ex_table,"a" ; \ - PPC_LONG_ALIGN ;\ - PPC_LONG _fault ; \ - PPC_LONG _target ; \ +#define EX_TABLE(_fault, _target) \ + .section __ex_table,"a" ; \ + .balign 4; \ + .long (_fault) - . ;\ + .long (_target) - . ; \ .previous #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index caff75e..f485a01 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -63,23 +63,30 @@ __access_ok((__force unsigned long)(addr), (size), get_fs())) /* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is + * The exception table consists of pairs of relative addresses: the first is + * the address of an instruction that is allowed to fault, and the second is * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. + * modified, so it is entirely up to the continuation code to figure out what + * to do. * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. + * All the routines below use bits of fixup code that are out of line with the + * main instruction path. This means when everything is well, we don't even + * have to jump over them. Further, they do not intrude on our cache or tlb + * entries. */ +#define ARCH_HAS_RELATIVE_EXTABLE + struct exception_table_entry { - unsigned long insn; - unsigned long fixup; + int insn; + int fixup; }; +static inline unsigned long extable_fixup(const struct exception_table_entry *x) +{ + return (unsigned long)>fixup + x->fixup; +} + /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index e785cc9..9479d8e 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -449,7 +449,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) * zero, try to fix up. */ if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index a1f8f56..ec5fd09 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -366,7 +366,7 @@ static inline int check_io_access(struct pt_regs *regs) (*nip & 0x100)? "OUT to": "IN from", regs->gpr[rb] - _IO_BASE, nip); regs->msr |= MSR_RI; - regs->nip = entry->fixup; +
[PATCH 1/3] powerpc: EX_TABLE macro for exception tables
This macro is taken from s390, and allows more flexibility in changing exception table format. Signed-off-by: Nicholas Piggin--- arch/powerpc/include/asm/futex.h | 14 +- arch/powerpc/include/asm/io.h | 18 +- arch/powerpc/include/asm/linkage.h| 22 +++ arch/powerpc/include/asm/uaccess.h| 24 +-- arch/powerpc/include/asm/word-at-a-time.h | 5 +- arch/powerpc/lib/checksum_32.S| 47 +++--- arch/powerpc/lib/checksum_64.S| 20 +-- arch/powerpc/lib/copy_32.S| 56 +++--- arch/powerpc/lib/copyuser_64.S| 271 +++--- arch/powerpc/lib/copyuser_power7.S| 21 +-- arch/powerpc/lib/ldstfp.S | 25 ++- arch/powerpc/lib/sstep.c | 15 +- arch/powerpc/lib/string.S | 11 +- arch/powerpc/lib/string_64.S | 16 +- arch/powerpc/sysdev/fsl_rio.c | 5 +- arch/powerpc/sysdev/tsi108_pci.c | 5 +- 16 files changed, 248 insertions(+), 327 deletions(-) diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 2a9cf84..eaada6c 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -23,10 +23,8 @@ "4:li %1,%3\n" \ "b 3b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - ".align 3\n" \ - PPC_LONG "1b,4b,2b,4b\n" \ - ".previous" \ + EX_TABLE(1b, 4b) \ + EX_TABLE(2b, 4b) \ : "=" (oldval), "=" (ret) \ : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \ : "cr0", "memory") @@ -104,11 +102,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "3:.section .fixup,\"ax\"\n\ 4: li %0,%6\n\ b 3b\n\ - .previous\n\ - .section __ex_table,\"a\"\n\ - .align 3\n\ - " PPC_LONG "1b,4b,2b,4b\n\ - .previous" \ + .previous\n" + EX_TABLE(1b, 4b) + EX_TABLE(2b, 4b) : "+r" (ret), "=" (prev), "+m" (*uaddr) : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT) : "cc", "memory"); diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index f6fda84..5219a19 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -458,13 +458,10 @@ static inline unsigned int name(unsigned int port) \ "5: li %0,-1\n"\ " b 4b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 2\n"\ - " .long 0b,5b\n"\ - " .long 1b,5b\n"\ - " .long 2b,5b\n"\ - " .long 3b,5b\n"\ - ".previous" \ + EX_TABLE(0b, 5b)\ + EX_TABLE(1b, 5b)\ + EX_TABLE(2b, 5b)\ + EX_TABLE(3b, 5b)\ : "=" (x) \ : "r" (port + _IO_BASE) \ : "memory");\ @@ -479,11 +476,8 @@ static inline void name(unsigned int val, unsigned int port) \ "0:" op " %0,0,%1\n"\ "1: sync\n" \ "2:\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 2\n"\ - " .long 0b,2b\n"\ - " .long 1b,2b\n"\ - ".previous" \ + EX_TABLE(0b, 2b)\ + EX_TABLE(1b, 2b)\ : : "r" (val), "r" (port + _IO_BASE)\ : "memory");\ } diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h index 0cf5e21..fcb9e0d 100644 --- a/arch/powerpc/include/asm/linkage.h +++ b/arch/powerpc/include/asm/linkage.h @@ -12,4 +12,26 @@ "\t.globl ." #alias "\n\t.set ." #alias ", ." #name) #endif +#ifndef __ASSEMBLY__ +/* + * Helper macro for exception table entries + */ +#define EX_TABLE(_fault, _target) \ + ".section __ex_table,\"a\"\n" \ + PPC_LONG_ALIGN "\n" \ + PPC_LONG #_fault "\n" \ + PPC_LONG #_target "\n" \ + ".previous\n" + +#else /* __ASSEMBLY__ */ + +#define EX_TABLE(_fault, _target) \ + .section __ex_table,"a" ; \ + PPC_LONG_ALIGN ;\ + PPC_LONG _fault ; \ +
[PATCH 0/3][RFC] powerpc: relative exception tables, and build-time sort
This implements relative exception tables for powerpc, and converts it to use build-time sorting. I've tested 64s only so far, but 32-bit seems to build. Will obviously require some more testing and reviews. Thanks, Nick Nicholas Piggin (3): powerpc: EX_TABLE macro for exception tables powerpc: relative exception tables powerpc: build-time sort exception table arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/futex.h | 14 +- arch/powerpc/include/asm/io.h | 18 +- arch/powerpc/include/asm/linkage.h| 22 ++ arch/powerpc/include/asm/module.h | 4 - arch/powerpc/include/asm/uaccess.h| 51 ++-- arch/powerpc/include/asm/word-at-a-time.h | 5 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/lib/checksum_32.S| 47 ++-- arch/powerpc/lib/checksum_64.S| 20 +- arch/powerpc/lib/copy_32.S| 56 ++--- arch/powerpc/lib/copyuser_64.S| 271 +++--- arch/powerpc/lib/copyuser_power7.S| 21 +- arch/powerpc/lib/ldstfp.S | 25 +- arch/powerpc/lib/sstep.c | 15 +- arch/powerpc/lib/string.S | 11 +- arch/powerpc/lib/string_64.S | 16 +- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/platforms/embedded6xx/holly.c| 2 +- arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c | 2 +- arch/powerpc/sysdev/fsl_rio.c | 7 +- arch/powerpc/sysdev/tsi108_pci.c | 5 +- scripts/sortextable.c | 2 + 24 files changed, 274 insertions(+), 347 deletions(-) -- 2.9.3
[PATCH 0/3][RFC] powerpc: relative exception tables, and build-time sort
This implements relative exception tables for powerpc, and converts it to use build-time sorting. I've tested 64s only so far, but 32-bit seems to build. Will obviously require some more testing and reviews. Thanks, Nick Nicholas Piggin (3): powerpc: EX_TABLE macro for exception tables powerpc: relative exception tables powerpc: build-time sort exception table arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/futex.h | 14 +- arch/powerpc/include/asm/io.h | 18 +- arch/powerpc/include/asm/linkage.h| 22 ++ arch/powerpc/include/asm/module.h | 4 - arch/powerpc/include/asm/uaccess.h| 51 ++-- arch/powerpc/include/asm/word-at-a-time.h | 5 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/lib/checksum_32.S| 47 ++-- arch/powerpc/lib/checksum_64.S| 20 +- arch/powerpc/lib/copy_32.S| 56 ++--- arch/powerpc/lib/copyuser_64.S| 271 +++--- arch/powerpc/lib/copyuser_power7.S| 21 +- arch/powerpc/lib/ldstfp.S | 25 +- arch/powerpc/lib/sstep.c | 15 +- arch/powerpc/lib/string.S | 11 +- arch/powerpc/lib/string_64.S | 16 +- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/platforms/embedded6xx/holly.c| 2 +- arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c | 2 +- arch/powerpc/sysdev/fsl_rio.c | 7 +- arch/powerpc/sysdev/tsi108_pci.c | 5 +- scripts/sortextable.c | 2 + 24 files changed, 274 insertions(+), 347 deletions(-) -- 2.9.3
[PATCH 0/3][RFC] powerpc: relative exception tables, and build-time sort
This implements relative exception tables for powerpc, and converts it to use build-time sorting. I've tested 64s only so far, but 32-bit seems to build. Will obviously require some more testing and reviews. Thanks, Nick Nicholas Piggin (3): powerpc: EX_TABLE macro for exception tables powerpc: relative exception tables powerpc: build-time sort exception table arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/futex.h | 14 +- arch/powerpc/include/asm/io.h | 18 +- arch/powerpc/include/asm/linkage.h| 22 ++ arch/powerpc/include/asm/module.h | 4 - arch/powerpc/include/asm/uaccess.h| 51 ++-- arch/powerpc/include/asm/word-at-a-time.h | 5 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/lib/checksum_32.S| 47 ++-- arch/powerpc/lib/checksum_64.S| 20 +- arch/powerpc/lib/copy_32.S| 56 ++--- arch/powerpc/lib/copyuser_64.S| 271 +++--- arch/powerpc/lib/copyuser_power7.S| 21 +- arch/powerpc/lib/ldstfp.S | 25 +- arch/powerpc/lib/sstep.c | 15 +- arch/powerpc/lib/string.S | 11 +- arch/powerpc/lib/string_64.S | 16 +- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/platforms/embedded6xx/holly.c| 2 +- arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c | 2 +- arch/powerpc/sysdev/fsl_rio.c | 7 +- arch/powerpc/sysdev/tsi108_pci.c | 5 +- scripts/sortextable.c | 2 + 24 files changed, 274 insertions(+), 347 deletions(-) -- 2.9.3
[PATCH] powerpc/mm: Drop dump_numa_memory_topology()
At boot we dump the NUMA memory topology in dump_numa_memory_topology(), at KERN_DEBUG level, resulting in output like: Node 0 Memory: 0x0-0x1 Node 1 Memory: 0x1-0x2 Which is nice enough, but immediately after that we iterate over each node and call setup_node_data(), which also prints out the node ranges, at KERN_INFO, giving eg: numa: Initmem setup node 0 [mem 0x-0x] numa: Initmem setup node 1 [mem 0x1-0x1] So drop dump_numa_memory_topology() as superfluous chatter. Signed-off-by: Michael Ellerman--- arch/powerpc/mm/numa.c | 36 1 file changed, 36 deletions(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 75b9cd6150cc..db5fc2b54c5a 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -871,40 +871,6 @@ void __init dump_numa_cpu_topology(void) } } -static void __init dump_numa_memory_topology(void) -{ - unsigned int node; - unsigned int count; - - if (min_common_depth == -1 || !numa_enabled) - return; - - for_each_online_node(node) { - unsigned long i; - - printk(KERN_DEBUG "Node %d Memory:", node); - - count = 0; - - for (i = 0; i < memblock_end_of_DRAM(); -i += (1 << SECTION_SIZE_BITS)) { - if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { - if (count == 0) - printk(" 0x%lx", i); - ++count; - } else { - if (count > 0) - printk("-0x%lx", i); - count = 0; - } - } - - if (count > 0) - printk("-0x%lx", i); - printk("\n"); - } -} - /* Initialize NODE_DATA for a node on the local memory */ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) { @@ -947,8 +913,6 @@ void __init initmem_init(void) if (parse_numa_properties()) setup_nonnuma(); - else - dump_numa_memory_topology(); memblock_dump_all(); -- 2.7.4
Re: [PATCH kernel v2 2/2] powerpc/mm/iommu, vfio/spapr: Put pages on VFIO container shutdown
On Wed, Oct 12, 2016 at 03:58:28PM +1100, Alexey Kardashevskiy wrote: > At the moment the userspace tool is expected to request pinning of > the entire guest RAM when VFIO IOMMU SPAPR v2 driver is present. > When the userspace process finishes, all the pinned pages need to > be put; this is done as a part of the userspace memory context (MM) > destruction which happens on the very last mmdrop(). > > This approach has a problem that a MM of the userspace process > may live longer than the userspace process itself as kernel threads > use userspace process MMs which was runnning on a CPU where > the kernel thread was scheduled to. If this happened, the MM remains > referenced until this exact kernel thread wakes up again > and releases the very last reference to the MM, on an idle system this > can take even hours. > > This moves preregistered regions tracking from MM to VFIO; insteads of > using mm_iommu_table_group_mem_t::used, tce_container::prereg_list is > added so each container releases regions which it has pre-registered. > > This changes the userspace interface to return EBUSY if a memory > region is already registered in a container. However it should not > have any practical effect as the only userspace tool available now > does register memory region once per container anyway. > > As tce_iommu_register_pages/tce_iommu_unregister_pages are called > under container->lock, this does not need additional locking. > > Signed-off-by: Alexey Kardashevskiy> Reviewed-by: Nicholas Piggin > --- > Changes: > v2: > * updated commit log > --- > arch/powerpc/include/asm/mmu_context.h | 1 - > arch/powerpc/mm/mmu_context_book3s64.c | 4 --- > arch/powerpc/mm/mmu_context_iommu.c| 11 > drivers/vfio/vfio_iommu_spapr_tce.c| 51 > +- > 4 files changed, 50 insertions(+), 17 deletions(-) > > diff --git a/arch/powerpc/include/asm/mmu_context.h > b/arch/powerpc/include/asm/mmu_context.h > index b9e3f0a..a6e18b5 100644 > --- a/arch/powerpc/include/asm/mmu_context.h > +++ b/arch/powerpc/include/asm/mmu_context.h > @@ -26,7 +26,6 @@ extern long mm_iommu_get(struct mm_struct *mm, > extern long mm_iommu_put(struct mm_struct *mm, > struct mm_iommu_table_group_mem_t *mem); > extern void mm_iommu_init(struct mm_struct *mm); > -extern void mm_iommu_cleanup(struct mm_struct *mm); > extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct > *mm, > unsigned long ua, unsigned long size); > extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, > diff --git a/arch/powerpc/mm/mmu_context_book3s64.c > b/arch/powerpc/mm/mmu_context_book3s64.c > index ad82735..1a07969 100644 > --- a/arch/powerpc/mm/mmu_context_book3s64.c > +++ b/arch/powerpc/mm/mmu_context_book3s64.c > @@ -159,10 +159,6 @@ static inline void destroy_pagetable_page(struct > mm_struct *mm) > > void destroy_context(struct mm_struct *mm) > { > -#ifdef CONFIG_SPAPR_TCE_IOMMU > - mm_iommu_cleanup(mm); > -#endif > - > #ifdef CONFIG_PPC_ICSWX > drop_cop(mm->context.acop, mm); > kfree(mm->context.cop_lockp); > diff --git a/arch/powerpc/mm/mmu_context_iommu.c > b/arch/powerpc/mm/mmu_context_iommu.c > index 4c6db09..104bad0 100644 > --- a/arch/powerpc/mm/mmu_context_iommu.c > +++ b/arch/powerpc/mm/mmu_context_iommu.c > @@ -365,14 +365,3 @@ void mm_iommu_init(struct mm_struct *mm) > { > INIT_LIST_HEAD_RCU(>context.iommu_group_mem_list); > } > - > -void mm_iommu_cleanup(struct mm_struct *mm) > -{ > - struct mm_iommu_table_group_mem_t *mem, *tmp; > - > - list_for_each_entry_safe(mem, tmp, >context.iommu_group_mem_list, > - next) { > - list_del_rcu(>next); > - mm_iommu_do_free(mem); > - } > -} > diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c > b/drivers/vfio/vfio_iommu_spapr_tce.c > index 3d2a65c..c8e9796 100644 > --- a/drivers/vfio/vfio_iommu_spapr_tce.c > +++ b/drivers/vfio/vfio_iommu_spapr_tce.c > @@ -89,6 +89,15 @@ struct tce_iommu_group { > }; > > /* > + * A container needs to remember which preregistered region it has > + * referenced to do proper cleanup at the userspace process exit. > + */ > +struct tce_iommu_prereg { > + struct list_head next; > + struct mm_iommu_table_group_mem_t *mem; > +}; > + > +/* > * The container descriptor supports only a single group per container. > * Required by the API as the container is not supplied with the IOMMU group > * at the moment of initialization. > @@ -101,12 +110,26 @@ struct tce_container { > struct mm_struct *mm; > struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; > struct list_head group_list; > + struct list_head prereg_list; > }; > > +static long tce_iommu_prereg_free(struct tce_container *container, > + struct tce_iommu_prereg *tcemem) > +{ > + long ret; > + > + list_del(>next); > + ret =
Re: [PATCH kernel v2 1/2] powerpc/iommu: Stop using @current in mm_iommu_xxx
On Wed, Oct 12, 2016 at 03:58:27PM +1100, Alexey Kardashevskiy wrote: > In some situations the userspace memory context may live longer than > the userspace process itself so if we need to do proper memory context > cleanup, we better cache @mm and use it later when the process is gone > (@current or @current->mm are NULL). > > This changes mm_iommu_xxx API to receive mm_struct instead of using one > from @current. > > This references and caches MM once per container so we do not depend > on @current pointing to a valid task descriptor anymore. > > This is needed by the following patch to do proper cleanup in time. > This depends on "powerpc/powernv/ioda: Fix endianness when reading TCEs" > to do proper cleanup via tce_iommu_clear() patch. > > To keep API consistent, this replaces mm_context_t with mm_struct; > we stick to mm_struct as mm_iommu_adjust_locked_vm() helper needs > access to >mmap_sem. > > This should cause no behavioral change. > > Signed-off-by: Alexey Kardashevskiy> Reviewed-by: Nicholas Piggin > Acked-by: Balbir Singh > --- > Changes: > v2: > * added BUG_ON(container->mm && (container->mm != current->mm)) in > tce_iommu_register_pages() > * added note about containers referencing MM > --- > arch/powerpc/include/asm/mmu_context.h | 20 +++-- > arch/powerpc/kernel/setup-common.c | 2 +- > arch/powerpc/mm/mmu_context_book3s64.c | 4 +-- > arch/powerpc/mm/mmu_context_iommu.c| 55 > ++ > drivers/vfio/vfio_iommu_spapr_tce.c| 41 - > 5 files changed, 63 insertions(+), 59 deletions(-) > > diff --git a/arch/powerpc/include/asm/mmu_context.h > b/arch/powerpc/include/asm/mmu_context.h > index 5c45114..b9e3f0a 100644 > --- a/arch/powerpc/include/asm/mmu_context.h > +++ b/arch/powerpc/include/asm/mmu_context.h > @@ -19,16 +19,18 @@ extern void destroy_context(struct mm_struct *mm); > struct mm_iommu_table_group_mem_t; > > extern int isolate_lru_page(struct page *page); /* from internal.h */ > -extern bool mm_iommu_preregistered(void); > -extern long mm_iommu_get(unsigned long ua, unsigned long entries, > +extern bool mm_iommu_preregistered(struct mm_struct *mm); > +extern long mm_iommu_get(struct mm_struct *mm, > + unsigned long ua, unsigned long entries, > struct mm_iommu_table_group_mem_t **pmem); > -extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem); > -extern void mm_iommu_init(mm_context_t *ctx); > -extern void mm_iommu_cleanup(mm_context_t *ctx); > -extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, > - unsigned long size); > -extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, > - unsigned long entries); > +extern long mm_iommu_put(struct mm_struct *mm, > + struct mm_iommu_table_group_mem_t *mem); > +extern void mm_iommu_init(struct mm_struct *mm); > +extern void mm_iommu_cleanup(struct mm_struct *mm); > +extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct > *mm, > + unsigned long ua, unsigned long size); > +extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, > + unsigned long ua, unsigned long entries); > extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, > unsigned long ua, unsigned long *hpa); > extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); > diff --git a/arch/powerpc/kernel/setup-common.c > b/arch/powerpc/kernel/setup-common.c > index dba265c..942cf49 100644 > --- a/arch/powerpc/kernel/setup-common.c > +++ b/arch/powerpc/kernel/setup-common.c > @@ -906,7 +906,7 @@ void __init setup_arch(char **cmdline_p) > init_mm.context.pte_frag = NULL; > #endif > #ifdef CONFIG_SPAPR_TCE_IOMMU > - mm_iommu_init(_mm.context); > + mm_iommu_init(_mm); > #endif > irqstack_early_init(); > exc_lvl_early_init(); > diff --git a/arch/powerpc/mm/mmu_context_book3s64.c > b/arch/powerpc/mm/mmu_context_book3s64.c > index b114f8b..ad82735 100644 > --- a/arch/powerpc/mm/mmu_context_book3s64.c > +++ b/arch/powerpc/mm/mmu_context_book3s64.c > @@ -115,7 +115,7 @@ int init_new_context(struct task_struct *tsk, struct > mm_struct *mm) > mm->context.pte_frag = NULL; > #endif > #ifdef CONFIG_SPAPR_TCE_IOMMU > - mm_iommu_init(>context); > + mm_iommu_init(mm); > #endif > return 0; > } > @@ -160,7 +160,7 @@ static inline void destroy_pagetable_page(struct > mm_struct *mm) > void destroy_context(struct mm_struct *mm) > { > #ifdef CONFIG_SPAPR_TCE_IOMMU > - mm_iommu_cleanup(>context); > + mm_iommu_cleanup(mm); > #endif > > #ifdef CONFIG_PPC_ICSWX > diff --git a/arch/powerpc/mm/mmu_context_iommu.c > b/arch/powerpc/mm/mmu_context_iommu.c > index e0f1c33..4c6db09 100644 > --- a/arch/powerpc/mm/mmu_context_iommu.c > +++
[PATCH] powerpc/64s: reduce exception alignment
Exception handlers are aligned to 128 bytes (L1 cache) on 64s, which is overkill. It can reduce the icache footprint of any individual exception path. However taken as a whole, the expansion in icache footprint seems likely to be counter-productive and cause more total misses. Create IFETCH_ALIGN_SHIFT/BYTES, which should give optimal ifetch alignment with much more reasonable alignment. This saves 1792 bytes from head_64.o text with an allmodconfig build. Other subarchitectures should define appropriate IFETCH_ALIGN_SHIFT values if this becomes more widely used. Cc: Anton BlanchardSigned-off-by: Nicholas Piggin --- arch/powerpc/include/asm/cache.h | 3 +++ arch/powerpc/include/asm/head-64.h | 8 arch/powerpc/kernel/exceptions-64s.S | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index ffbafbf..7657aa8 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -20,12 +20,15 @@ #endif #else /* CONFIG_PPC64 */ #define L1_CACHE_SHIFT 7 +#define IFETCH_ALIGN_SHIFT 4 /* POWER8,9 */ #endif #defineL1_CACHE_BYTES (1 << L1_CACHE_SHIFT) #defineSMP_CACHE_BYTES L1_CACHE_BYTES +#define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT) + #if defined(__powerpc64__) && !defined(__ASSEMBLY__) struct ppc64_caches { u32 dsize; /* L1 d-cache size */ diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index ab90c2f..fca7033 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -95,12 +95,12 @@ end_##sname: #define __FIXED_SECTION_ENTRY_BEGIN(sname, name, __align) \ USE_FIXED_SECTION(sname); \ - .align __align; \ + .balign __align;\ .global name; \ name: #define FIXED_SECTION_ENTRY_BEGIN(sname, name) \ - __FIXED_SECTION_ENTRY_BEGIN(sname, name, 0) + __FIXED_SECTION_ENTRY_BEGIN(sname, name, IFETCH_ALIGN_BYTES) #define FIXED_SECTION_ENTRY_BEGIN_LOCATION(sname, name, start) \ USE_FIXED_SECTION(sname); \ @@ -203,9 +203,9 @@ end_##sname: #define EXC_VIRT_END(name, start, end) \ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, end) -#define EXC_COMMON_BEGIN(name) \ +#define EXC_COMMON_BEGIN(name) \ USE_TEXT_SECTION(); \ - .align 7; \ + .balign IFETCH_ALIGN_BYTES; \ .global name; \ DEFINE_FIXED_SYMBOL(name); \ name: diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e680e84..4af87e4 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1403,7 +1403,7 @@ USE_TEXT_SECTION() /* * Hash table stuff */ - .align 7 + .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_STD_MMU_64 andis. r0,r4,0xa410/* weird error? */ -- 2.9.3
[RFC][PATCH] kernel relocation for KVM exceptions
Hi Paul, I wonder what you think about this approach for applying relocation to KVM exceptions? It's not yet tested and I haven't attempted PR, but I'll keep at it if you think it's the right direction. The relocation branch requires ctr, but we can get away without more scratch storage by putting trap and cr in one register. On the other hand, that's going to make the calling convention diverge even more for 32-bit, so perhaps it's being overly complex and you'd rather add another scratch save for CONFIG_RELOCATABLE? Other ideas? Thanks, Nick --- arch/powerpc/include/asm/exception-64s.h | 63 ++-- arch/powerpc/kernel/exceptions-64s.S | 4 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 18 + 3 files changed, 65 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 84d49b1..466870f 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -97,6 +97,11 @@ ld reg,PACAKBASE(r13); \ ori reg,reg,(ABS_ADDR(label))@l; +#define __LOAD_FAR_HANDLER(reg, label) \ + ld reg,PACAKBASE(r13); \ + ori reg,reg,(ABS_ADDR(label))@l;\ + addis reg,reg,(ABS_ADDR(label))@h; + /* Exception register prefixes */ #define EXC_HV H #define EXC_STD @@ -218,12 +223,43 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtctr reg;\ bctr +/* + * KVM requires a far (>64K) branch, and to set the exit number in r12 + * when branching from an exception + */ +#define BRANCH_TO_KVM_EXIT(reg, label) \ + mfctr reg;\ + std reg,HSTATE_SCRATCH2(r13); \ + __LOAD_FAR_HANDLER(reg, label); \ + mtctr reg;\ + bctr + +#define BRANCH_TO_KVM(reg, label) \ + __LOAD_FAR_HANDLER(reg, label); \ + mtctr reg;\ + bctr + +#define BRANCH_LINK_TO_KVM(reg, label) \ + __LOAD_FAR_HANDLER(reg, label); \ + mtctr reg;\ + bctrl + #else #define BRANCH_TO_COMMON(reg, label) \ b label +#define BRANCH_TO_KVM(reg, label) \ + b label + +#define BRANCH_TO_KVM_EXIT(reg, label) \ + b label + +#define BRANCH_LINK_TO_KVM(reg, label) \ + b label + #endif + #define __KVM_HANDLER_PROLOG(area, n) \ BEGIN_FTR_SECTION_NESTED(947) \ ld r10,area+EX_CFAR(r13); \ @@ -234,30 +270,35 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) std r10,HSTATE_PPR(r13);\ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948);\ ld r10,area+EX_R10(r13); \ - stw r9,HSTATE_SCRATCH1(r13);\ - ld r9,area+EX_R9(r13); \ std r12,HSTATE_SCRATCH0(r13); \ + li r12,(n);\ + sldir12,r12,32; \ + ori r12,r12,r9; \ + ld r9,area+EX_R9(r13); \ + std r9,HSTATE_SCRATCH1(r13);\ #define __KVM_HANDLER(area, h, n) \ __KVM_HANDLER_PROLOG(area, n) \ - li r12,n; \ - b kvmppc_interrupt + BRANCH_TO_KVM_EXIT(r9, kvmppc_interrupt) #define __KVM_HANDLER_SKIP(area, h, n) \ cmpwi r10,KVM_GUEST_MODE_SKIP;\ - ld r10,area+EX_R10(r13); \ beq 89f;\ - stw r9,HSTATE_SCRATCH1(r13);\ BEGIN_FTR_SECTION_NESTED(948) \ - ld r9,area+EX_PPR(r13);\ - std r9,HSTATE_PPR(r13);
Re: [PATCH v2] cxl: Prevent adapter reset if an active context exists
On 12/10/16 15:17, Vaibhav Jain wrote: This patch prevents resetting the cxl adapter via sysfs in presence of one or more active cxl_context on it. This protects against an unrecoverable error caused by PSL owning a dirty cache line even after reset and host tries to touch the same cache line. In case a force reset of the card is required irrespective of any active contexts, the int value -1 can be stored in the 'reset' sysfs attribute of the card. The patch introduces a new atomic_t member named contexts_num inside struct cxl that holds the number of active context attached to the card , which is checked against '0' before proceeding with the reset. To prevent against a race condition where a context is activated just after reset check is performed, the contexts_num is atomically set to '-1' after reset-check to indicate that no more contexts can be activated on the card anymore. Before activating a context we atomically test if contexts_num is non-negative and if so, increment its value by one. In case the value of contexts_num is negative then it indicates that the card is about to be reset and context activation is error-ed out at that point. Signed-off-by: Vaibhav JainAll the changes look good to me. Reviewed-by: Andrew Donnellan diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl index 4ba0a2a..dae2b38 100644 --- a/Documentation/ABI/testing/sysfs-class-cxl +++ b/Documentation/ABI/testing/sysfs-class-cxl @@ -220,8 +220,11 @@ What: /sys/class/cxl//reset Date: October 2014 Contact:linuxppc-dev@lists.ozlabs.org Description:write only -Writing 1 will issue a PERST to card which may cause the card -to reload the FPGA depending on load_image_on_perst. +Writing 1 will issue a PERST to card provided there are no + contexts active on any one of the card AFUs. This may cause + the card to reload the FPGA depending on load_image_on_perst. + Writing -1 will do a force PERST irrespective of any active + contexts on the card AFUs. Ugh, spaces vs tabs bites again :( -- Andrew Donnellan OzLabs, ADL Canberra andrew.donnel...@au1.ibm.com IBM Australia Limited
Re: [PATCH] powerpc/64: option to force run-at-load to test relocation
On Wed, 12 Oct 2016 18:35:21 +1100 Balbir Singhwrote: > On 12/10/16 17:57, Nicholas Piggin wrote: > > This adds a config option that can help exercise the case when > > the kernel is not running at PAGE_OFFSET. > > > > Signed-off-by: Nicholas Piggin > > --- > > arch/powerpc/Kconfig | 9 + > > arch/powerpc/kernel/head_64.S | 4 > > arch/powerpc/kernel/setup-common.c | 3 +++ > > 3 files changed, 16 insertions(+) > > > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > > index 65fba4c..5d43cb8 100644 > > --- a/arch/powerpc/Kconfig > > +++ b/arch/powerpc/Kconfig > > @@ -478,6 +478,15 @@ config RELOCATABLE > > setting can still be useful to bootwrappers that need to know the > > load address of the kernel (eg. u-boot/mkimage). > > > > +config RELOCATABLE_TEST > > + bool "Test relocatable kernel" > > + depends on (PPC64 && RELOCATABLE) > > + default n > > + help > > + This runs the relocatable kernel at the address it was initially > > + loaded at, which tends to be non-zero and therefore test the > > + relocation code. > > + > > config CRASH_DUMP > > bool "Build a kdump crash kernel" > > depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) > > diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S > > index 79da0641..bc9ceac 100644 > > --- a/arch/powerpc/kernel/head_64.S > > +++ b/arch/powerpc/kernel/head_64.S > > @@ -111,8 +111,12 @@ __secondary_hold_acknowledge: > > .globl __run_at_load > > __run_at_load: > > DEFINE_FIXED_SYMBOL(__run_at_load) > > +#ifdef CONFIG_RELOCATABLE_TEST > > + .long 0x1 /* Test relocation, do not relocate to 0 */ > > +#else > > .long 0x72756e30 /* "run0" -- relocate to 0 by default */ > > #endif > > +#endif > > Could we do something like > > config RELOCATION_VALUE > default 0x72756e30 > default 1 if CONFIG_RELOCTABLE_TEST > > and then get > > .long CONFIG_RELOCATION_VALUE Normally I'm up for reducing ifdefs in S and c files, but in this case I'm not sure. I like being able to see the two possible values in the source. I don't really mind though. If you or Michael feel strongly, I'm happy to change it. > > . = 0x60 > > /* > > diff --git a/arch/powerpc/kernel/setup-common.c > > b/arch/powerpc/kernel/setup-common.c > > index dba265c..18e0f19 100644 > > --- a/arch/powerpc/kernel/setup-common.c > > +++ b/arch/powerpc/kernel/setup-common.c > > @@ -795,6 +795,9 @@ static __init void print_system_info(void) > > pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); > > #ifdef CONFIG_PPC64 > > pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); > > + > > + if (get_paca()->kernelbase != PAGE_OFFSET) > > + pr_info("kernelbase= 0x%llx\n", get_paca()->kernelbase); > > #endif > > > > Do we need this? We get physical_offset if we are relocated. You're right, that hunk can go. Thanks, Nick
[PATCH] powerpc/64s: relocation, register save fixes for system reset interrupt
This patch does a couple of things. First of all, powernv immediately explodes when running a relocated kernel, because the system reset exception for handling sleeps does not do correct relocated branches. Secondly, the sleep handling code trashes the condition and cfar registers, which we would like to preserve for debugging purposes (for non-sleep case exception). This patch changes the exception to use the standard format that saves registers before any tests or branches are made. It adds the test for idle-wakeup as an "extra" to break out of the normal exception path. Then it branches to a relocated idle handler that calls the various idle handling functions. After this patch, POWER8 CPU simulator now boots powernv kernel that is running at non-zero. Cc: Balbir SinghCc: Shreyas B. Prabhu Cc: Gautham R. Shenoy Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 16 ++ arch/powerpc/kernel/exceptions-64s.S | 50 ++-- 2 files changed, 45 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 2e4e7d8..84d49b1 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -93,6 +93,10 @@ ld reg,PACAKBASE(r13); /* get high part of */ \ ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l; +#define __LOAD_HANDLER(reg, label) \ + ld reg,PACAKBASE(r13); \ + ori reg,reg,(ABS_ADDR(label))@l; + /* Exception register prefixes */ #define EXC_HV H #define EXC_STD @@ -208,6 +212,18 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define kvmppc_interrupt kvmppc_interrupt_pr #endif +#ifdef CONFIG_RELOCATABLE +#define BRANCH_TO_COMMON(reg, label) \ + __LOAD_HANDLER(reg, label); \ + mtctr reg;\ + bctr + +#else +#define BRANCH_TO_COMMON(reg, label) \ + b label + +#endif + #define __KVM_HANDLER_PROLOG(area, n) \ BEGIN_FTR_SECTION_NESTED(947) \ ld r10,area+EX_CFAR(r13); \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 08992f8..e680e84 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -95,19 +95,35 @@ __start_interrupts: /* No virt vectors corresponding with 0x0..0x100 */ EXC_VIRT_NONE(0x4000, 0x4100) -EXC_REAL_BEGIN(system_reset, 0x100, 0x200) - SET_SCRATCH0(r13) + #ifdef CONFIG_PPC_P7_NAP -BEGIN_FTR_SECTION - /* Running native on arch 2.06 or later, check if we are -* waking up from nap/sleep/winkle. + /* +* If running native on arch 2.06 or later, check if we are waking up +* from nap/sleep/winkle, and branch to idle handler. */ - mfspr r13,SPRN_SRR1 - rlwinm. r13,r13,47-31,30,31 - beq 9f +#define IDLETEST(n)\ + BEGIN_FTR_SECTION ; \ + mfspr r10,SPRN_SRR1 ; \ + rlwinm. r10,r10,47-31,30,31 ; \ + beq-1f ;\ + cmpwi cr3,r10,2 ; \ + BRANCH_TO_COMMON(r10, system_reset_idle_common) ; \ +1: \ + END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +#else +#define IDLETEST NOTEST +#endif - cmpwi cr3,r13,2 - GET_PACA(r13) +EXC_REAL_BEGIN(system_reset, 0x100, 0x200) + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, +IDLETEST, 0x100) + +EXC_REAL_END(system_reset, 0x100, 0x200) +EXC_VIRT_NONE(0x4100, 0x4200) + +#ifdef CONFIG_PPC_P7_NAP +EXC_COMMON_BEGIN(system_reset_idle_common) bl pnv_restore_hyp_resource li r0,PNV_THREAD_RUNNING @@ -130,14 +146,8 @@ BEGIN_FTR_SECTION blt cr3,2f b pnv_wakeup_loss 2: b pnv_wakeup_noloss +#endif -9: -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -#endif /* CONFIG_PPC_P7_NAP */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, -NOTEST, 0x100) -EXC_REAL_END(system_reset, 0x100, 0x200) -EXC_VIRT_NONE(0x4100, 0x4200) EXC_COMMON(system_reset_common, 0x100, system_reset_exception) #ifdef
[PATCH] powerpc: make _ASM_NOKPROBE_SYMBOL a noop when KPROBES not defined
Signed-off-by: Nicholas Piggin--- The linker orphan sections error patch caught this arch/powerpc/include/asm/ppc_asm.h | 4 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index c73750b..ceec199 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -265,10 +265,14 @@ GLUE(.,name): * latter is for those that incdentially must be excluded from probing * and allows them to be linked at more optimal location within text. */ +#ifdef CONFIG_KPROBES #define _ASM_NOKPROBE_SYMBOL(entry)\ .pushsection "_kprobe_blacklist","aw"; \ PPC_LONG (entry) ; \ .popsection +#else +#define _ASM_NOKPROBE_SYMBOL(entry) +#endif #define FUNC_START(name) _GLOBAL(name) #define FUNC_END(name) -- 2.9.3
[PATCH 10/10] mm: replace access_process_vm() write parameter with gup_flags
This patch removes the write parameter from access_process_vm() and replaces it with a gup_flags parameter as use of this function previously _implied_ FOLL_FORCE, whereas after this patch callers explicitly pass this flag. We make this explicit as use of FOLL_FORCE can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes--- arch/alpha/kernel/ptrace.c | 9 ++--- arch/blackfin/kernel/ptrace.c | 5 +++-- arch/cris/arch-v32/kernel/ptrace.c | 4 ++-- arch/ia64/kernel/ptrace.c | 14 +- arch/m32r/kernel/ptrace.c | 15 ++- arch/mips/kernel/ptrace32.c| 5 +++-- arch/powerpc/kernel/ptrace32.c | 5 +++-- arch/score/kernel/ptrace.c | 10 ++ arch/sparc/kernel/ptrace_64.c | 24 arch/x86/kernel/step.c | 3 ++- arch/x86/um/ptrace_32.c| 3 ++- arch/x86/um/ptrace_64.c| 3 ++- include/linux/mm.h | 3 ++- kernel/ptrace.c| 16 ++-- mm/memory.c| 8 ++-- mm/nommu.c | 6 +++--- mm/util.c | 5 +++-- 17 files changed, 84 insertions(+), 54 deletions(-) diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index d9ee817..940dfb4 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -157,14 +157,16 @@ put_reg(struct task_struct *task, unsigned long regno, unsigned long data) static inline int read_int(struct task_struct *task, unsigned long addr, int * data) { - int copied = access_process_vm(task, addr, data, sizeof(int), 0); + int copied = access_process_vm(task, addr, data, sizeof(int), + FOLL_FORCE); return (copied == sizeof(int)) ? 0 : -EIO; } static inline int write_int(struct task_struct *task, unsigned long addr, int data) { - int copied = access_process_vm(task, addr, , sizeof(int), 1); + int copied = access_process_vm(task, addr, , sizeof(int), + FOLL_FORCE | FOLL_WRITE); return (copied == sizeof(int)) ? 0 : -EIO; } @@ -281,7 +283,8 @@ long arch_ptrace(struct task_struct *child, long request, /* When I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: - copied = access_process_vm(child, addr, , sizeof(tmp), 0); + copied = access_process_vm(child, addr, , sizeof(tmp), + FOLL_FORCE); ret = -EIO; if (copied != sizeof(tmp)) break; diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index 8b8fe67..8d79286 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -271,7 +271,7 @@ long arch_ptrace(struct task_struct *child, long request, case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: copied = access_process_vm(child, addr, , - to_copy, 0); + to_copy, FOLL_FORCE); if (copied) break; @@ -324,7 +324,8 @@ long arch_ptrace(struct task_struct *child, long request, case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: copied = access_process_vm(child, addr, , - to_copy, 1); + to_copy, + FOLL_FORCE | FOLL_WRITE); break; case BFIN_MEM_ACCESS_DMA: if (safe_dma_memcpy(paddr, , to_copy)) diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index f085229..f0df654 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -147,7 +147,7 @@ long arch_ptrace(struct task_struct *child, long request, /* The trampoline page is globally mapped, no page table to traverse.*/ tmp = *(unsigned long*)addr; } else { - copied = access_process_vm(child, addr, , sizeof(tmp), 0); + copied = access_process_vm(child, addr, , sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -279,7 +279,7 @@ static int insn_size(struct task_struct *child, unsigned long pc) int opsize = 0; /* Read the
[PATCH 09/10] mm: replace access_remote_vm() write parameter with gup_flags
This patch removes the write parameter from access_remote_vm() and replaces it with a gup_flags parameter as use of this function previously _implied_ FOLL_FORCE, whereas after this patch callers explicitly pass this flag. We make this explicit as use of FOLL_FORCE can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes--- fs/proc/base.c | 19 +-- include/linux/mm.h | 2 +- mm/memory.c| 11 +++ mm/nommu.c | 7 +++ 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index c2964d8..8e65446 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -252,7 +252,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, * Inherently racy -- command line shares address space * with code and data. */ - rv = access_remote_vm(mm, arg_end - 1, , 1, 0); + rv = access_remote_vm(mm, arg_end - 1, , 1, FOLL_FORCE); if (rv <= 0) goto out_free_page; @@ -270,7 +270,8 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, int nr_read; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, + FOLL_FORCE); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -305,7 +306,8 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, + FOLL_FORCE); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -354,7 +356,8 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, + FOLL_FORCE); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -832,6 +835,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, unsigned long addr = *ppos; ssize_t copied; char *page; + unsigned int flags = FOLL_FORCE; if (!mm) return 0; @@ -844,6 +848,9 @@ static ssize_t mem_rw(struct file *file, char __user *buf, if (!atomic_inc_not_zero(>mm_users)) goto free; + if (write) + flags |= FOLL_WRITE; + while (count > 0) { int this_len = min_t(int, count, PAGE_SIZE); @@ -852,7 +859,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, break; } - this_len = access_remote_vm(mm, addr, page, this_len, write); + this_len = access_remote_vm(mm, addr, page, this_len, flags); if (!this_len) { if (!copied) copied = -EIO; @@ -965,7 +972,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, this_len = min(max_len, this_len); retval = access_remote_vm(mm, (env_start + src), - page, this_len, 0); + page, this_len, FOLL_FORCE); if (retval <= 0) { ret = retval; diff --git a/include/linux/mm.h b/include/linux/mm.h index 2a481d3..3e5234e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1268,7 +1268,7 @@ static inline int fixup_user_fault(struct task_struct *tsk, extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write); + void *buf, int len, unsigned int gup_flags); long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, diff --git a/mm/memory.c b/mm/memory.c index 79ebed3..bac2d99 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3935,19 +3935,14 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, * @addr: start address to access * @buf: source or destination buffer * @len: number of bytes to transfer - * @write:
[PATCH 08/10] mm: replace __access_remote_vm() write parameter with gup_flags
This patch removes the write parameter from __access_remote_vm() and replaces it with a gup_flags parameter as use of this function previously _implied_ FOLL_FORCE, whereas after this patch callers explicitly pass this flag. We make this explicit as use of FOLL_FORCE can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes--- mm/memory.c | 23 +++ mm/nommu.c | 9 ++--- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 20a9adb..79ebed3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3869,14 +3869,11 @@ EXPORT_SYMBOL_GPL(generic_access_phys); * given task for page fault accounting. */ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, void *buf, int len, int write) + unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; void *old_buf = buf; - unsigned int flags = FOLL_FORCE; - - if (write) - flags |= FOLL_WRITE; + int write = gup_flags & FOLL_WRITE; down_read(>mmap_sem); /* ignore errors, just check how much was successfully transferred */ @@ -3886,7 +3883,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, struct page *page = NULL; ret = get_user_pages_remote(tsk, mm, addr, 1, - flags, , ); + gup_flags, , ); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT break; @@ -3945,7 +3942,12 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, int write) { - return __access_remote_vm(NULL, mm, addr, buf, len, write); + unsigned int flags = FOLL_FORCE; + + if (write) + flags |= FOLL_WRITE; + + return __access_remote_vm(NULL, mm, addr, buf, len, flags); } /* @@ -3958,12 +3960,17 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, { struct mm_struct *mm; int ret; + unsigned int flags = FOLL_FORCE; mm = get_task_mm(tsk); if (!mm) return 0; - ret = __access_remote_vm(tsk, mm, addr, buf, len, write); + if (write) + flags |= FOLL_WRITE; + + ret = __access_remote_vm(tsk, mm, addr, buf, len, flags); + mmput(mm); return ret; diff --git a/mm/nommu.c b/mm/nommu.c index 70cb844..bde7df3 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1809,9 +1809,10 @@ void filemap_map_pages(struct fault_env *fe, EXPORT_SYMBOL(filemap_map_pages); static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, void *buf, int len, int write) + unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; + int write = gup_flags & FOLL_WRITE; down_read(>mmap_sem); @@ -1853,7 +1854,8 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, int write) { - return __access_remote_vm(NULL, mm, addr, buf, len, write); + return __access_remote_vm(NULL, mm, addr, buf, len, + write ? FOLL_WRITE : 0); } /* @@ -1871,7 +1873,8 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in if (!mm) return 0; - len = __access_remote_vm(tsk, mm, addr, buf, len, write); + len = __access_remote_vm(tsk, mm, addr, buf, len, + write ? FOLL_WRITE : 0); mmput(mm); return len; -- 2.10.0
[PATCH 07/10] mm: replace get_user_pages_remote() write/force parameters with gup_flags
This patch removes the write and force parameters from get_user_pages_remote() and replaces them with a gup_flags parameter to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes--- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 7 +-- drivers/gpu/drm/i915/i915_gem_userptr.c | 6 +- drivers/infiniband/core/umem_odp.c | 7 +-- fs/exec.c | 9 +++-- include/linux/mm.h | 2 +- kernel/events/uprobes.c | 6 -- mm/gup.c| 22 +++--- mm/memory.c | 6 +- security/tomoyo/domain.c| 2 +- 9 files changed, 40 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 5ce3603..0370b84 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -748,19 +748,22 @@ static struct page **etnaviv_gem_userptr_do_get_pages( int ret = 0, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT; struct page **pvec; uintptr_t ptr; + unsigned int flags = 0; pvec = drm_malloc_ab(npages, sizeof(struct page *)); if (!pvec) return ERR_PTR(-ENOMEM); + if (!etnaviv_obj->userptr.ro) + flags |= FOLL_WRITE; + pinned = 0; ptr = etnaviv_obj->userptr.ptr; down_read(>mmap_sem); while (pinned < npages) { ret = get_user_pages_remote(task, mm, ptr, npages - pinned, - !etnaviv_obj->userptr.ro, 0, - pvec + pinned, NULL); + flags, pvec + pinned, NULL); if (ret < 0) break; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index e537930..c6f780f 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -508,6 +508,10 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) pvec = drm_malloc_gfp(npages, sizeof(struct page *), GFP_TEMPORARY); if (pvec != NULL) { struct mm_struct *mm = obj->userptr.mm->mm; + unsigned int flags = 0; + + if (!obj->userptr.read_only) + flags |= FOLL_WRITE; ret = -EFAULT; if (atomic_inc_not_zero(>mm_users)) { @@ -517,7 +521,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) (work->task, mm, obj->userptr.ptr + pinned * PAGE_SIZE, npages - pinned, -!obj->userptr.read_only, 0, +flags, pvec + pinned, NULL); if (ret < 0) break; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 75077a0..1f0fe32 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -527,6 +527,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, u64 off; int j, k, ret = 0, start_idx, npages = 0; u64 base_virt_addr; + unsigned int flags = 0; if (access_mask == 0) return -EINVAL; @@ -556,6 +557,9 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, goto out_put_task; } + if (access_mask & ODP_WRITE_ALLOWED_BIT) + flags |= FOLL_WRITE; + start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT; k = start_idx; @@ -574,8 +578,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, */ npages = get_user_pages_remote(owning_process, owning_mm, user_virt, gup_num_pages, - access_mask & ODP_WRITE_ALLOWED_BIT, - 0, local_page_list, NULL); + flags, local_page_list, NULL); up_read(_mm->mmap_sem); if (npages < 0) diff --git a/fs/exec.c b/fs/exec.c index 6fcfb3f..4e497b9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -191,6 +191,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, { struct page *page; int ret; + unsigned int gup_flags = FOLL_FORCE; #ifdef CONFIG_STACK_GROWSUP if (write) { @@ -199,12 +200,16 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned
[PATCH 06/10] mm: replace get_user_pages() write/force parameters with gup_flags
This patch removes the write and force parameters from get_user_pages() and replaces them with a gup_flags parameter to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes--- arch/cris/arch-v32/drivers/cryptocop.c | 4 +--- arch/ia64/kernel/err_inject.c | 2 +- arch/x86/mm/mpx.c | 5 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 7 +-- drivers/gpu/drm/radeon/radeon_ttm.c| 3 ++- drivers/gpu/drm/via/via_dmablit.c | 4 ++-- drivers/infiniband/core/umem.c | 6 +- drivers/infiniband/hw/mthca/mthca_memfree.c| 2 +- drivers/infiniband/hw/qib/qib_user_pages.c | 3 ++- drivers/infiniband/hw/usnic/usnic_uiom.c | 5 - drivers/media/v4l2-core/videobuf-dma-sg.c | 7 +-- drivers/misc/mic/scif/scif_rma.c | 3 +-- drivers/misc/sgi-gru/grufault.c| 2 +- drivers/platform/goldfish/goldfish_pipe.c | 3 ++- drivers/rapidio/devices/rio_mport_cdev.c | 3 ++- .../vc04_services/interface/vchiq_arm/vchiq_2835_arm.c | 3 +-- .../vc04_services/interface/vchiq_arm/vchiq_arm.c | 3 +-- drivers/virt/fsl_hypervisor.c | 4 ++-- include/linux/mm.h | 2 +- mm/gup.c | 12 +++- mm/mempolicy.c | 2 +- mm/nommu.c | 18 -- 22 files changed, 49 insertions(+), 54 deletions(-) diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index b5698c8..099e170 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -2722,7 +2722,6 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig err = get_user_pages((unsigned long int)(oper.indata + prev_ix), noinpages, 0, /* read access only for in data */ -0, /* no force */ inpages, NULL); @@ -2736,8 +2735,7 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig if (oper.do_cipher){ err = get_user_pages((unsigned long int)oper.cipher_outdata, nooutpages, -1, /* write access for out data */ -0, /* no force */ +FOLL_WRITE, /* write access for out data */ outpages, NULL); up_read(>mm->mmap_sem); diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index 09f8457..5ed0ea9 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -142,7 +142,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr, u64 virt_addr=simple_strtoull(buf, NULL, 16); int ret; - ret = get_user_pages(virt_addr, 1, VM_READ, 0, NULL, NULL); + ret = get_user_pages(virt_addr, 1, FOLL_WRITE, NULL, NULL); if (ret<=0) { #ifdef ERR_INJ_DEBUG printk("Virtual address %lx is not existing.\n",virt_addr); diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 8047687..e4f8009 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -544,10 +544,9 @@ static int mpx_resolve_fault(long __user *addr, int write) { long gup_ret; int nr_pages = 1; - int force = 0; - gup_ret = get_user_pages((unsigned long)addr, nr_pages, write, - force, NULL, NULL); + gup_ret = get_user_pages((unsigned long)addr, nr_pages, + write ? FOLL_WRITE : 0, NULL, NULL); /* * get_user_pages() returns number of pages gotten. * 0 means we failed to fault in and get anything, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 887483b..dcaf691 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -555,10 +555,13 @@ struct amdgpu_ttm_tt { int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); + unsigned int flags = 0; unsigned pinned = 0; int r; + if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) + flags |= FOLL_WRITE; + if (gtt->userflags &
[PATCH 05/10] mm: replace get_vaddr_frames() write/force parameters with gup_flags
This patch removes the write and force parameters from get_vaddr_frames() and replaces them with a gup_flags parameter to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes--- drivers/gpu/drm/exynos/exynos_drm_g2d.c| 3 ++- drivers/media/platform/omap/omap_vout.c| 2 +- drivers/media/v4l2-core/videobuf2-memops.c | 6 +- include/linux/mm.h | 2 +- mm/frame_vector.c | 13 ++--- 5 files changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index aa92dec..fbd13fa 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -488,7 +488,8 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev, goto err_free; } - ret = get_vaddr_frames(start, npages, true, true, g2d_userptr->vec); + ret = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, + g2d_userptr->vec); if (ret != npages) { DRM_ERROR("failed to get user pages from userptr.\n"); if (ret < 0) diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c index e668dde..a31b95c 100644 --- a/drivers/media/platform/omap/omap_vout.c +++ b/drivers/media/platform/omap/omap_vout.c @@ -214,7 +214,7 @@ static int omap_vout_get_userptr(struct videobuf_buffer *vb, u32 virtp, if (!vec) return -ENOMEM; - ret = get_vaddr_frames(virtp, 1, true, false, vec); + ret = get_vaddr_frames(virtp, 1, FOLL_WRITE, vec); if (ret != 1) { frame_vector_destroy(vec); return -EINVAL; diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c index 3c3b517..1cd322e 100644 --- a/drivers/media/v4l2-core/videobuf2-memops.c +++ b/drivers/media/v4l2-core/videobuf2-memops.c @@ -42,6 +42,10 @@ struct frame_vector *vb2_create_framevec(unsigned long start, unsigned long first, last; unsigned long nr; struct frame_vector *vec; + unsigned int flags = FOLL_FORCE; + + if (write) + flags |= FOLL_WRITE; first = start >> PAGE_SHIFT; last = (start + length - 1) >> PAGE_SHIFT; @@ -49,7 +53,7 @@ struct frame_vector *vb2_create_framevec(unsigned long start, vec = frame_vector_create(nr); if (!vec) return ERR_PTR(-ENOMEM); - ret = get_vaddr_frames(start & PAGE_MASK, nr, write, true, vec); + ret = get_vaddr_frames(start & PAGE_MASK, nr, flags, vec); if (ret < 0) goto out_destroy; /* We accept only complete set of PFNs */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 27ab538..5ff084f6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1305,7 +1305,7 @@ struct frame_vector { struct frame_vector *frame_vector_create(unsigned int nr_frames); void frame_vector_destroy(struct frame_vector *vec); int get_vaddr_frames(unsigned long start, unsigned int nr_pfns, -bool write, bool force, struct frame_vector *vec); +unsigned int gup_flags, struct frame_vector *vec); void put_vaddr_frames(struct frame_vector *vec); int frame_vector_to_pages(struct frame_vector *vec); void frame_vector_to_pfns(struct frame_vector *vec); diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 81b6749..db77dcb 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -11,10 +11,7 @@ * get_vaddr_frames() - map virtual addresses to pfns * @start: starting user address * @nr_frames: number of pages / pfns from start to map - * @write: whether pages will be written to by the caller - * @force: whether to force write access even if user mapping is - * readonly. See description of the same argument of - get_user_pages(). + * @gup_flags: flags modifying lookup behaviour * @vec: structure which receives pages / pfns of the addresses mapped. * It should have space for at least nr_frames entries. * @@ -34,23 +31,17 @@ * This function takes care of grabbing mmap_sem as necessary. */ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, -bool write, bool force, struct frame_vector *vec) +unsigned int gup_flags, struct frame_vector *vec) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int ret = 0; int err; int locked; - unsigned int gup_flags = 0; if (nr_frames == 0) return 0; - if (write) - gup_flags |= FOLL_WRITE; - if (force) - gup_flags |= FOLL_FORCE; - if
[PATCH 04/10] mm: replace get_user_pages_locked() write/force parameters with gup_flags
This patch removes the write and force parameters from get_user_pages_locked() and replaces them with a gup_flags parameter to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes--- include/linux/mm.h | 2 +- mm/frame_vector.c | 8 +++- mm/gup.c | 12 +++- mm/nommu.c | 5 - 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 6adc4bc..27ab538 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1282,7 +1282,7 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); long get_user_pages_locked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, int *locked); + unsigned int gup_flags, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 381bb07..81b6749 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -41,10 +41,16 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, int ret = 0; int err; int locked; + unsigned int gup_flags = 0; if (nr_frames == 0) return 0; + if (write) + gup_flags |= FOLL_WRITE; + if (force) + gup_flags |= FOLL_FORCE; + if (WARN_ON_ONCE(nr_frames > vec->nr_allocated)) nr_frames = vec->nr_allocated; @@ -59,7 +65,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, vec->got_ref = true; vec->is_pfns = false; ret = get_user_pages_locked(start, nr_frames, - write, force, (struct page **)(vec->ptrs), ); + gup_flags, (struct page **)(vec->ptrs), ); goto out; } diff --git a/mm/gup.c b/mm/gup.c index cfcb014..7a0d033 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -838,18 +838,12 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, * up_read(>mmap_sem); */ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, int *locked) { - unsigned int flags = FOLL_TOUCH; - - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; - return __get_user_pages_locked(current, current->mm, start, nr_pages, - pages, NULL, locked, true, flags); + pages, NULL, locked, true, + gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages_locked); diff --git a/mm/nommu.c b/mm/nommu.c index 7e27add..842cfdd 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -176,9 +176,12 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, EXPORT_SYMBOL(get_user_pages); long get_user_pages_locked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, int *locked) { + int write = gup_flags & FOLL_WRITE; + int force = gup_flags & FOLL_FORCE; + return get_user_pages(start, nr_pages, write, force, pages, NULL); } EXPORT_SYMBOL(get_user_pages_locked); -- 2.10.0
[PATCH 03/10] mm: replace get_user_pages_unlocked() write/force parameters with gup_flags
This patch removes the write and force parameters from get_user_pages_unlocked() and replaces them with a gup_flags parameter to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes--- arch/mips/mm/gup.c | 2 +- arch/s390/mm/gup.c | 3 ++- arch/sh/mm/gup.c | 3 ++- arch/sparc/mm/gup.c| 3 ++- arch/x86/mm/gup.c | 2 +- drivers/media/pci/ivtv/ivtv-udma.c | 4 ++-- drivers/media/pci/ivtv/ivtv-yuv.c | 5 +++-- drivers/scsi/st.c | 5 ++--- drivers/video/fbdev/pvr2fb.c | 4 ++-- include/linux/mm.h | 2 +- mm/gup.c | 14 -- mm/nommu.c | 11 ++- mm/util.c | 3 ++- net/ceph/pagevec.c | 2 +- 14 files changed, 27 insertions(+), 36 deletions(-) diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 42d124f..d8c3c15 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -287,7 +287,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, pages += nr; ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, - write, 0, pages); + pages, write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index adb0c34..18d4107 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -266,7 +266,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, /* Try to get the remaining pages with get_user_pages */ start += nr << PAGE_SHIFT; pages += nr; - ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages); + ret = get_user_pages_unlocked(start, nr_pages - nr, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) ret = (ret < 0) ? nr : ret + nr; diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c index 40fa6c8..063c298 100644 --- a/arch/sh/mm/gup.c +++ b/arch/sh/mm/gup.c @@ -258,7 +258,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, pages += nr; ret = get_user_pages_unlocked(start, - (end - start) >> PAGE_SHIFT, write, 0, pages); + (end - start) >> PAGE_SHIFT, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 4e06750..cd0e32b 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -238,7 +238,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, pages += nr; ret = get_user_pages_unlocked(start, - (end - start) >> PAGE_SHIFT, write, 0, pages); + (end - start) >> PAGE_SHIFT, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index b8b6a60..0d4fb3e 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -435,7 +435,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, - write, 0, pages); + pages, write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/drivers/media/pci/ivtv/ivtv-udma.c b/drivers/media/pci/ivtv/ivtv-udma.c index 4769469..2c9232e 100644 --- a/drivers/media/pci/ivtv/ivtv-udma.c +++ b/drivers/media/pci/ivtv/ivtv-udma.c @@ -124,8 +124,8 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr, } /* Get user pages for DMA Xfer */ - err = get_user_pages_unlocked(user_dma.uaddr, user_dma.page_count, 0, - 1, dma->map); + err = get_user_pages_unlocked(user_dma.uaddr, user_dma.page_count, + dma->map, FOLL_FORCE); if (user_dma.page_count != err) { IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n", diff --git a/drivers/media/pci/ivtv/ivtv-yuv.c b/drivers/media/pci/ivtv/ivtv-yuv.c index b094054..f7299d3 100644 --- a/drivers/media/pci/ivtv/ivtv-yuv.c +++ b/drivers/media/pci/ivtv/ivtv-yuv.c @@ -76,11 +76,12 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct
[PATCH 02/10] mm: remove write/force parameters from __get_user_pages_unlocked()
This patch removes the write and force parameters from __get_user_pages_unlocked() to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes--- include/linux/mm.h | 3 +-- mm/gup.c | 17 + mm/nommu.c | 12 +--- mm/process_vm_access.c | 7 +-- virt/kvm/async_pf.c| 3 ++- virt/kvm/kvm_main.c| 11 --- 6 files changed, 34 insertions(+), 19 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e9caec6..2db98b6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1285,8 +1285,7 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags); + struct page **pages, unsigned int gup_flags); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, diff --git a/mm/gup.c b/mm/gup.c index ba83942..3d620dd 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -865,17 +865,11 @@ EXPORT_SYMBOL(get_user_pages_locked); */ __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags) + struct page **pages, unsigned int gup_flags) { long ret; int locked = 1; - if (write) - gup_flags |= FOLL_WRITE; - if (force) - gup_flags |= FOLL_FORCE; - down_read(>mmap_sem); ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL, , false, gup_flags); @@ -905,8 +899,15 @@ EXPORT_SYMBOL(__get_user_pages_unlocked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages) { + unsigned int flags = FOLL_TOUCH; + + if (write) + flags |= FOLL_WRITE; + if (force) + flags |= FOLL_FORCE; + return __get_user_pages_unlocked(current, current->mm, start, nr_pages, -write, force, pages, FOLL_TOUCH); +pages, flags); } EXPORT_SYMBOL(get_user_pages_unlocked); diff --git a/mm/nommu.c b/mm/nommu.c index 95daf81..925dcc1 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -185,8 +185,7 @@ EXPORT_SYMBOL(get_user_pages_locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags) + struct page **pages, unsigned int gup_flags) { long ret; down_read(>mmap_sem); @@ -200,8 +199,15 @@ EXPORT_SYMBOL(__get_user_pages_unlocked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages) { + unsigned int flags = 0; + + if (write) + flags |= FOLL_WRITE; + if (force) + flags |= FOLL_FORCE; + return __get_user_pages_unlocked(current, current->mm, start, nr_pages, -write, force, pages, 0); +pages, flags); } EXPORT_SYMBOL(get_user_pages_unlocked); diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 07514d4..be8dc8d 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -88,12 +88,16 @@ static int process_vm_rw_single_vec(unsigned long addr, ssize_t rc = 0; unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES / sizeof(struct pages *); + unsigned int flags = FOLL_REMOTE; /* Work out address and page range required */ if (len == 0) return 0; nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1; + if (vm_write) + flags |= FOLL_WRITE; + while (!rc && nr_pages && iov_iter_count(iter)) { int pages = min(nr_pages, max_pages_per_loop); size_t bytes; @@ -104,8
[PATCH 01/10] mm: remove write/force parameters from __get_user_pages_locked()
This patch removes the write and force parameters from __get_user_pages_locked() to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes--- mm/gup.c | 47 +-- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 96b2b2f..ba83942 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -729,7 +729,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, struct vm_area_struct **vmas, int *locked, bool notify_drop, @@ -747,10 +746,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, if (pages) flags |= FOLL_GET; - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; pages_done = 0; lock_dropped = false; @@ -846,9 +841,15 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked) { + unsigned int flags = FOLL_TOUCH; + + if (write) + flags |= FOLL_WRITE; + if (force) + flags |= FOLL_FORCE; + return __get_user_pages_locked(current, current->mm, start, nr_pages, - write, force, pages, NULL, locked, true, - FOLL_TOUCH); + pages, NULL, locked, true, flags); } EXPORT_SYMBOL(get_user_pages_locked); @@ -869,9 +870,15 @@ __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct m { long ret; int locked = 1; + + if (write) + gup_flags |= FOLL_WRITE; + if (force) + gup_flags |= FOLL_FORCE; + down_read(>mmap_sem); - ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, NULL, , false, gup_flags); + ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL, + , false, gup_flags); if (locked) up_read(>mmap_sem); return ret; @@ -963,9 +970,15 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, int write, int force, struct page **pages, struct vm_area_struct **vmas) { - return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, vmas, NULL, false, - FOLL_TOUCH | FOLL_REMOTE); + unsigned int flags = FOLL_TOUCH | FOLL_REMOTE; + + if (write) + flags |= FOLL_WRITE; + if (force) + flags |= FOLL_FORCE; + + return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, + NULL, false, flags); } EXPORT_SYMBOL(get_user_pages_remote); @@ -979,9 +992,15 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas) { + unsigned int flags = FOLL_TOUCH; + + if (write) + flags |= FOLL_WRITE; + if (force) + flags |= FOLL_FORCE; + return __get_user_pages_locked(current, current->mm, start, nr_pages, - write, force, pages, vmas, NULL, false, - FOLL_TOUCH); + pages, vmas, NULL, false, flags); } EXPORT_SYMBOL(get_user_pages); -- 2.10.0
[PATCH 00/10] mm: adjust get_user_pages* functions to explicitly pass FOLL_* flags
This patch series adjusts functions in the get_user_pages* family such that desired FOLL_* flags are passed as an argument rather than implied by flags. The purpose of this change is to make the use of FOLL_FORCE explicit so it is easier to grep for and clearer to callers that this flag is being used. The use of FOLL_FORCE is an issue as it overrides missing VM_READ/VM_WRITE flags for the VMA whose pages we are reading from/writing to, which can result in surprising behaviour. The patch series came out of the discussion around commit 38e0885, which addressed a BUG_ON() being triggered when a page was faulted in with PROT_NONE set but having been overridden by FOLL_FORCE. do_numa_page() was run on the assumption the page _must_ be one marked for NUMA node migration as an actual PROT_NONE page would have been dealt with prior to this code path, however FOLL_FORCE introduced a situation where this assumption did not hold. See https://marc.info/?l=linux-mm=147585445805166 for the patch proposal. Lorenzo Stoakes (10): mm: remove write/force parameters from __get_user_pages_locked() mm: remove write/force parameters from __get_user_pages_unlocked() mm: replace get_user_pages_unlocked() write/force parameters with gup_flags mm: replace get_user_pages_locked() write/force parameters with gup_flags mm: replace get_vaddr_frames() write/force parameters with gup_flags mm: replace get_user_pages() write/force parameters with gup_flags mm: replace get_user_pages_remote() write/force parameters with gup_flags mm: replace __access_remote_vm() write parameter with gup_flags mm: replace access_remote_vm() write parameter with gup_flags mm: replace access_process_vm() write parameter with gup_flags arch/alpha/kernel/ptrace.c | 9 ++-- arch/blackfin/kernel/ptrace.c | 5 ++- arch/cris/arch-v32/drivers/cryptocop.c | 4 +- arch/cris/arch-v32/kernel/ptrace.c | 4 +- arch/ia64/kernel/err_inject.c | 2 +- arch/ia64/kernel/ptrace.c | 14 +++--- arch/m32r/kernel/ptrace.c | 15 --- arch/mips/kernel/ptrace32.c| 5 ++- arch/mips/mm/gup.c | 2 +- arch/powerpc/kernel/ptrace32.c | 5 ++- arch/s390/mm/gup.c | 3 +- arch/score/kernel/ptrace.c | 10 +++-- arch/sh/mm/gup.c | 3 +- arch/sparc/kernel/ptrace_64.c | 24 +++ arch/sparc/mm/gup.c| 3 +- arch/x86/kernel/step.c | 3 +- arch/x86/mm/gup.c | 2 +- arch/x86/mm/mpx.c | 5 +-- arch/x86/um/ptrace_32.c| 3 +- arch/x86/um/ptrace_64.c| 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 7 ++- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 7 ++- drivers/gpu/drm/exynos/exynos_drm_g2d.c| 3 +- drivers/gpu/drm/i915/i915_gem_userptr.c| 6 ++- drivers/gpu/drm/radeon/radeon_ttm.c| 3 +- drivers/gpu/drm/via/via_dmablit.c | 4 +- drivers/infiniband/core/umem.c | 6 ++- drivers/infiniband/core/umem_odp.c | 7 ++- drivers/infiniband/hw/mthca/mthca_memfree.c| 2 +- drivers/infiniband/hw/qib/qib_user_pages.c | 3 +- drivers/infiniband/hw/usnic/usnic_uiom.c | 5 ++- drivers/media/pci/ivtv/ivtv-udma.c | 4 +- drivers/media/pci/ivtv/ivtv-yuv.c | 5 ++- drivers/media/platform/omap/omap_vout.c| 2 +- drivers/media/v4l2-core/videobuf-dma-sg.c | 7 ++- drivers/media/v4l2-core/videobuf2-memops.c | 6 ++- drivers/misc/mic/scif/scif_rma.c | 3 +- drivers/misc/sgi-gru/grufault.c| 2 +- drivers/platform/goldfish/goldfish_pipe.c | 3 +- drivers/rapidio/devices/rio_mport_cdev.c | 3 +- drivers/scsi/st.c | 5 +-- .../interface/vchiq_arm/vchiq_2835_arm.c | 3 +- .../vc04_services/interface/vchiq_arm/vchiq_arm.c | 3 +- drivers/video/fbdev/pvr2fb.c | 4 +- drivers/virt/fsl_hypervisor.c | 4 +- fs/exec.c | 9 +++- fs/proc/base.c | 19 +--- include/linux/mm.h | 18 kernel/events/uprobes.c| 6 ++- kernel/ptrace.c| 16 --- mm/frame_vector.c | 9 ++-- mm/gup.c | 50 ++ mm/memory.c| 16 ---
[PATCH] powerpc/boot: fix boot on systems with uncompressed kernel image
This commit broke boot on systems with an uncompressed kernel image, namely systems using a cuImage. On such systems the compressed boot image (boot wrapper, uncompressed kernel image, ..) is decompressed by u-boot already, therefore the boot wrapper code sees an uncompressed kernel image. The old decompression code silently assumed an uncompressed kernel image if it found no valid gzip signature, whilst the new code bailed out in this case. Fix this by re-introducing such a fallback if no valid compressed image is found. Fixes: 1b7898ee276b ("Use the pre-boot decompression API") Signed-off-by: Heiner Kallweit--- arch/powerpc/boot/main.c | 18 -- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index f7a184b..57d42d1 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -32,9 +32,16 @@ static struct addr_range prep_kernel(void) void *addr = 0; struct elf_info ei; long len; + int uncompressed_image = 0; - partial_decompress(vmlinuz_addr, vmlinuz_size, + len = partial_decompress(vmlinuz_addr, vmlinuz_size, elfheader, sizeof(elfheader), 0); + /* assume uncompressed data if -1 is returned */ + if (len == -1) { + uncompressed_image = 1; + memcpy(elfheader, vmlinuz_addr, sizeof(elfheader)); + printf("No valid compressed data found, assume uncompressed data\n\r"); + } if (!parse_elf64(elfheader, ) && !parse_elf32(elfheader, )) fatal("Error: not a valid PPC32 or PPC64 ELF file!\n\r"); @@ -67,6 +74,13 @@ static struct addr_range prep_kernel(void) "device tree\n\r"); } + if (uncompressed_image) { + memcpy(addr, vmlinuz_addr + ei.elfoffset, ei.loadsize); + printf("0x%lx bytes of uncompressed data copied\n\r", + ei.loadsize); + goto out; + } + /* Finally, decompress the kernel */ printf("Decompressing (0x%p <- 0x%p:0x%p)...\n\r", addr, vmlinuz_addr, vmlinuz_addr+vmlinuz_size); @@ -82,7 +96,7 @@ static struct addr_range prep_kernel(void) len, ei.loadsize); printf("Done! Decompressed 0x%lx bytes\n\r", len); - +out: flush_cache(addr, ei.loadsize); return (struct addr_range){addr, ei.memsize}; -- 2.10.0
Re: Commit 1b7898ee276b "powerpc/boot: Use the pre-boot decompression API" breaks boot
Am 12.10.2016 um 06:26 schrieb Oliver O'Halloran: > On Tue, Oct 11, 2016 at 7:06 AM, Heiner Kallweitwrote: >>> IMHO in case of using cuboot no CONFIG_KERNEL_ config option >>> should be set and Makefile + code in arch/powerpc/boot should be able >>> to deal with this situation: >>> - don't copy and build the decompression stuff >>> - use an alternative version of prep_kernel() in main.c which doesn't >>> attempt to decompress the kernel image >>> >>> This should be a cleaner solution than probing the kernel image whether >>> it's compressed or not. >>> >> >> This would be the patch implementing the idea. Advantage is that all >> the unnecessary decompression code isn't built. Works fine for me. > > I don't think this approach is viable. The wrapper code is shared > among the various output image formats some of which *will* contain a > compressed kernel image so we can't simply remove the decompressor > from the wrapper. A random example I found in the makefile was > CONFIG_BAMBOO: > >> image-$(CONFIG_BAMBOO) += treeImage.bamboo cuImage.bamboo > > When building for this platform Kbuild will produce treeboot and a > cuboot image. Unlike uboot, Treeboot doesn't do any decompression so > the wrapper needs to decompress the kernel itself. The probing > solution more or less matches the old behaviour (which we know works) > so I think we should just stick with that. > > - Oliver > Indeed, I also figured that out later. As you say, then let's stick with re-introducing the probing. I'll send the patch for this. Heiner
Re: [PATCH] powerpc/fadump: Fix the race in crash_fadump().
On 10/10/2016 04:22 PM, Michael Ellerman wrote: > Mahesh J Salgaonkarwrites: > >> From: Mahesh Salgaonkar >> >> There are chances that multiple CPUs can call crash_fadump() simultaneously >> and would start duplicating same info to vmcoreinfo ELF note section. This >> causes makedumpfile to fail during kdump capture. One example is, >> triggering dumprestart from HMC which sends system reset to all the CPUs at >> once. > ... >> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c >> index b3a6633..2ed9d1c 100644 >> --- a/arch/powerpc/kernel/fadump.c >> +++ b/arch/powerpc/kernel/fadump.c >> @@ -402,8 +402,14 @@ void crash_fadump(struct pt_regs *regs, const char *str) >> { >> struct fadump_crash_info_header *fdh = NULL; >> >> -if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) >> +mutex_lock(_mutex); > > What happens when a crashing CPU can't get the mutex and goes to sleep? Got your point. I think I should use mutex_trylock() here. There is only two reason crashing CPU can't get mutex, 1) Another CPU also crashing that got the mutex and on its way to trigger fadump. OR 2) We are in middle of fadump register/un-register, in which case we can just return and go to normal panic. Thanks, -Mahesh.
Re: [PATCH] powerpc: cmp -> cmpd for 64-bit
* Segher Boessenkool[2016-10-12 08:26:48]: > On Wed, Oct 12, 2016 at 02:05:19PM +1100, Michael Ellerman wrote: > > Segher Boessenkool writes: > > [snip] > > > > --- a/arch/powerpc/include/asm/cpuidle.h > > > +++ b/arch/powerpc/include/asm/cpuidle.h > > > @@ -26,7 +26,7 @@ extern u64 pnv_first_deep_stop_state; > > > > #define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ > > /* Magic NAP/SLEEP/WINKLE mode enter sequence */\ > > > std r0,0(r1); \ > > > ptesync;\ > > > ld r0,0(r1); \ > > > -1: cmp cr0,r0,r0; \ > > > +1: cmpdcr0,r0,r0; \ > > > bne 1b; \ > > > IDLE_INST; \ > > > b . > > > > What's this one doing, is it a bug? I can't really tell without knowing > > what the magic sequence is meant to do. This one is the recommended idle state entry sequence described in ISA. We need to ensure the context is fully saved and also create a register dependency using cmp and loop which will ideally not be taken. This will get the thread (pipeline) ready to start losing state when the idle instruction is executed. ISA 2.07 Section: 3.3.2.1 Entering and Exiting Power-Saving Mode > > It looks like it is making sure the ptesync is done. The ld/cmp/bne > is the usual to make sure the ld is done, and in std/ptesync/ld the ld > won't be done before the ptesync is done. > > The cmp always compares equal, of course, so both cmpw and cmpd would > work fine here. cmpd looks better after ld ;-) Yes :) cmpd or cmpw would provide same result as far as this code sequence is concerned. I agree that cpmd is more appropriate here. --Vaidy
Re: [PATCH v2 0/7] PCI: layerscape: Cleanups
On 10/12/2016 11:07 AM, Bjorn Helgaas wrote: > I applied these to pci/host-layerscape for v4.9. I hope to ask Linus to > pull them tomorrow, so if you see any issues, let me know soon. > good to me. Thanks. Roy
Re: [PATCH v2 0/7] PCI: layerscape: Cleanups
On Wed, Oct 12, 2016 at 08:57:22AM -0500, Bjorn Helgaas wrote: > - Add local "dev" pointers to reduce repetition of things like > ">dev". > > - Remove platform drvdata because it appears unused (we called > platform_set_drvdata() but not platform_get_drvdata()). > > - Remove redundant struct members. > > - Pass device-specific struct to internal functions for consistency. > > - Move struct pcie_port setup to probe function for consistency. > > - Remove unused ls_add_pcie_port() platform_device argument. > > Nothing here should change the behavior of the driver. > > Changes from v1: > I dropped the following patch because it was a lot of churn for > questionable benefit: > PCI: layerscape: Name private struct pointer "ls" consistently > > --- > > Bjorn Helgaas (7): > PCI: layerscape: Add local struct device pointers > PCI: layerscape: Remove unused platform data > PCI: layerscape: Remove redundant struct ls_pcie.dbi > PCI: layerscape: Pass device-specific struct to internal functions > PCI: layerscape: Move struct pcie_port setup to probe function > PCI: layerscape: Remove unused ls_add_pcie_port() platform_device arg > PCI: layerscape: Reorder struct ls_pcie > > > drivers/pci/host/pci-layerscape.c | 65 > +++-- > 1 file changed, 33 insertions(+), 32 deletions(-) I applied these to pci/host-layerscape for v4.9. I hope to ask Linus to pull them tomorrow, so if you see any issues, let me know soon.
Re: [PATCH] powerpc/mm: Prevent unlikely crash in copro_calculate_slb()
ping? The patch still applies cleanly on recent trees. Fred Le 17/06/2016 à 18:53, Frederic Barrat a écrit : If a cxl adapter faults on an invalid address for a kernel context, we may enter copro_calculate_slb() with a NULL mm pointer (kernel context) and an effective address which looks like a user address. Which will cause a crash when dereferencing mm. It is clearly an AFU bug, but there's no reason to crash either. So return an error, so that cxl can ack the interrupt with an address error. Signed-off-by: Frederic BarratCc: --- arch/powerpc/mm/copro_fault.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 6527882..ddfd274 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -106,6 +106,8 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) switch (REGION_ID(ea)) { case USER_REGION_ID: pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea); + if (mm == NULL) + return 1; psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); vsid = get_vsid(mm->context.id, ea, ssize);
Re: [PATCH v15 03/15] selftests/powerpc: Add ptrace tests for EBB
On Fri, Oct 07, 2016 at 08:44:48AM +1100, Michael Ellerman wrote: > wei.guo.si...@gmail.com writes: > > > From: Anshuman Khandual> > > > This patch adds ptrace interface test for EBB/PMU specific > > registers. This also adds some generic ptrace interface > > based helper functions to be used by other patches later > > on in the series. > > This is consistently failing for me on a P8 Tuleta (pvr 004b 0201): > > # ./ptrace-ebb > test: ptrace_ebb_pmu > tags: git_version:v4.8-rc5-176-g89cf1de0ae90 > EBBRR: 100059f8 > EBBHR: 100053cc; expected: 100053cc > BESCR: 8001 > SIAR: 100012d0 > SDAR: 3fff7e4cc000 > SIER: 300; expected: 200 > MMCR2: 0; expected: 0 > MMCR0: 18080; expected: 18080 > failure: ptrace_ebb_pmu > > cheers Michael, Yes.. SIER has different value in baremetal and virtual machine due to different MSR[HV] value. I will correct it. Originally I only tested in virtual BE/LE machines. Currently all tests cases (with fix) passed on one baremetal P8 machine with LE OS installed. And I will try to find another baremetal with BE OS installed to test. Thanks for indicating it. BR, Simon
[PATCH v2 7/7] PCI: layerscape: Reorder struct ls_pcie
Reorder struct ls_pcie to put generic fields first. No functional change intended. Signed-off-by: Bjorn Helgaas--- drivers/pci/host/pci-layerscape.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c index 3a86c1a..2cb7315 100644 --- a/drivers/pci/host/pci-layerscape.c +++ b/drivers/pci/host/pci-layerscape.c @@ -45,9 +45,9 @@ struct ls_pcie_drvdata { }; struct ls_pcie { + struct pcie_port pp;/* pp.dbi_base is DT regs */ void __iomem *lut; struct regmap *scfg; - struct pcie_port pp; const struct ls_pcie_drvdata *drvdata; int index; };
[PATCH v2 6/7] PCI: layerscape: Remove unused ls_add_pcie_port() platform_device arg
ls_add_pcie_port() doesn't use the platform_device pointer passed to it, so remove it. No functional change intended. Signed-off-by: Bjorn Helgaas--- drivers/pci/host/pci-layerscape.c |5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c index 2d77104..3a86c1a 100644 --- a/drivers/pci/host/pci-layerscape.c +++ b/drivers/pci/host/pci-layerscape.c @@ -213,8 +213,7 @@ static const struct of_device_id ls_pcie_of_match[] = { { }, }; -static int __init ls_add_pcie_port(struct ls_pcie *pcie, - struct platform_device *pdev) +static int __init ls_add_pcie_port(struct ls_pcie *pcie) { struct pcie_port *pp = >pp; struct device *dev = pp->dev; @@ -263,7 +262,7 @@ static int __init ls_pcie_probe(struct platform_device *pdev) if (!ls_pcie_is_bridge(pcie)) return -ENODEV; - ret = ls_add_pcie_port(pcie, pdev); + ret = ls_add_pcie_port(pcie); if (ret < 0) return ret;
[PATCH v2 5/7] PCI: layerscape: Move struct pcie_port setup to probe function
Do the basic pcie_port setup in the probe function for consistency with other drivers. No functional change intended. Signed-off-by: Bjorn Helgaas--- drivers/pci/host/pci-layerscape.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c index 2b31296..2d77104 100644 --- a/drivers/pci/host/pci-layerscape.c +++ b/drivers/pci/host/pci-layerscape.c @@ -216,13 +216,10 @@ static const struct of_device_id ls_pcie_of_match[] = { static int __init ls_add_pcie_port(struct ls_pcie *pcie, struct platform_device *pdev) { - struct device *dev = >dev; struct pcie_port *pp = >pp; + struct device *dev = pp->dev; int ret; - pp->dev = dev; - pp->ops = pcie->drvdata->ops; - ret = dw_pcie_host_init(pp); if (ret) { dev_err(dev, "failed to initialize host\n"); @@ -237,6 +234,7 @@ static int __init ls_pcie_probe(struct platform_device *pdev) struct device *dev = >dev; const struct of_device_id *match; struct ls_pcie *pcie; + struct pcie_port *pp; struct resource *dbi_base; int ret; @@ -248,6 +246,10 @@ static int __init ls_pcie_probe(struct platform_device *pdev) if (!pcie) return -ENOMEM; + pp = >pp; + pp->dev = dev; + pp->ops = pcie->drvdata->ops; + dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); pcie->pp.dbi_base = devm_ioremap_resource(dev, dbi_base); if (IS_ERR(pcie->pp.dbi_base)) {
[PATCH v2 4/7] PCI: layerscape: Pass device-specific struct to internal functions
Only interfaces used from outside the driver, e.g., those called by the DesignWare core, need to accept pointers to the generic struct pcie_port. Internal interfaces can accept pointers to the device-specific struct, which makes them more straightforward. No functional change intended. Signed-off-by: Bjorn Helgaas--- drivers/pci/host/pci-layerscape.c |6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c index bdafe55..2b31296 100644 --- a/drivers/pci/host/pci-layerscape.c +++ b/drivers/pci/host/pci-layerscape.c @@ -213,12 +213,12 @@ static const struct of_device_id ls_pcie_of_match[] = { { }, }; -static int __init ls_add_pcie_port(struct pcie_port *pp, +static int __init ls_add_pcie_port(struct ls_pcie *pcie, struct platform_device *pdev) { struct device *dev = >dev; + struct pcie_port *pp = >pp; int ret; - struct ls_pcie *pcie = to_ls_pcie(pp); pp->dev = dev; pp->ops = pcie->drvdata->ops; @@ -261,7 +261,7 @@ static int __init ls_pcie_probe(struct platform_device *pdev) if (!ls_pcie_is_bridge(pcie)) return -ENODEV; - ret = ls_add_pcie_port(>pp, pdev); + ret = ls_add_pcie_port(pcie, pdev); if (ret < 0) return ret;
[PATCH v2 3/7] PCI: layerscape: Remove redundant struct ls_pcie.dbi
Remove the struct ls_pcie.dbi member, which is a duplicate of the generic pp.dbi_base member. No functional change intended. Signed-off-by: Bjorn Helgaas--- drivers/pci/host/pci-layerscape.c | 24 +++- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c index ebed415..bdafe55 100644 --- a/drivers/pci/host/pci-layerscape.c +++ b/drivers/pci/host/pci-layerscape.c @@ -45,7 +45,6 @@ struct ls_pcie_drvdata { }; struct ls_pcie { - void __iomem *dbi; void __iomem *lut; struct regmap *scfg; struct pcie_port pp; @@ -59,7 +58,7 @@ static bool ls_pcie_is_bridge(struct ls_pcie *pcie) { u32 header_type; - header_type = ioread8(pcie->dbi + PCI_HEADER_TYPE); + header_type = ioread8(pcie->pp.dbi_base + PCI_HEADER_TYPE); header_type &= 0x7f; return header_type == PCI_HEADER_TYPE_BRIDGE; @@ -68,13 +67,13 @@ static bool ls_pcie_is_bridge(struct ls_pcie *pcie) /* Clear multi-function bit */ static void ls_pcie_clear_multifunction(struct ls_pcie *pcie) { - iowrite8(PCI_HEADER_TYPE_BRIDGE, pcie->dbi + PCI_HEADER_TYPE); + iowrite8(PCI_HEADER_TYPE_BRIDGE, pcie->pp.dbi_base + PCI_HEADER_TYPE); } /* Fix class value */ static void ls_pcie_fix_class(struct ls_pcie *pcie) { - iowrite16(PCI_CLASS_BRIDGE_PCI, pcie->dbi + PCI_CLASS_DEVICE); + iowrite16(PCI_CLASS_BRIDGE_PCI, pcie->pp.dbi_base + PCI_CLASS_DEVICE); } /* Drop MSG TLP except for Vendor MSG */ @@ -82,9 +81,9 @@ static void ls_pcie_drop_msg_tlp(struct ls_pcie *pcie) { u32 val; - val = ioread32(pcie->dbi + PCIE_STRFMR1); + val = ioread32(pcie->pp.dbi_base + PCIE_STRFMR1); val &= 0xDFFF; - iowrite32(val, pcie->dbi + PCIE_STRFMR1); + iowrite32(val, pcie->pp.dbi_base + PCIE_STRFMR1); } static int ls1021_pcie_link_up(struct pcie_port *pp) @@ -149,11 +148,11 @@ static void ls_pcie_host_init(struct pcie_port *pp) { struct ls_pcie *pcie = to_ls_pcie(pp); - iowrite32(1, pcie->dbi + PCIE_DBI_RO_WR_EN); + iowrite32(1, pcie->pp.dbi_base + PCIE_DBI_RO_WR_EN); ls_pcie_fix_class(pcie); ls_pcie_clear_multifunction(pcie); ls_pcie_drop_msg_tlp(pcie); - iowrite32(0, pcie->dbi + PCIE_DBI_RO_WR_EN); + iowrite32(0, pcie->pp.dbi_base + PCIE_DBI_RO_WR_EN); } static int ls_pcie_msi_host_init(struct pcie_port *pp, @@ -222,7 +221,6 @@ static int __init ls_add_pcie_port(struct pcie_port *pp, struct ls_pcie *pcie = to_ls_pcie(pp); pp->dev = dev; - pp->dbi_base = pcie->dbi; pp->ops = pcie->drvdata->ops; ret = dw_pcie_host_init(pp); @@ -251,14 +249,14 @@ static int __init ls_pcie_probe(struct platform_device *pdev) return -ENOMEM; dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); - pcie->dbi = devm_ioremap_resource(dev, dbi_base); - if (IS_ERR(pcie->dbi)) { + pcie->pp.dbi_base = devm_ioremap_resource(dev, dbi_base); + if (IS_ERR(pcie->pp.dbi_base)) { dev_err(dev, "missing *regs* space\n"); - return PTR_ERR(pcie->dbi); + return PTR_ERR(pcie->pp.dbi_base); } pcie->drvdata = match->data; - pcie->lut = pcie->dbi + pcie->drvdata->lut_offset; + pcie->lut = pcie->pp.dbi_base + pcie->drvdata->lut_offset; if (!ls_pcie_is_bridge(pcie)) return -ENODEV;
[PATCH v2 2/7] PCI: layerscape: Remove unused platform data
The layerscape driver never uses the platform drvdata pointer, so don't bother setting it. No functional change intended. Signed-off-by: Bjorn Helgaas--- drivers/pci/host/pci-layerscape.c |2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c index 08b511e..ebed415 100644 --- a/drivers/pci/host/pci-layerscape.c +++ b/drivers/pci/host/pci-layerscape.c @@ -267,8 +267,6 @@ static int __init ls_pcie_probe(struct platform_device *pdev) if (ret < 0) return ret; - platform_set_drvdata(pdev, pcie); - return 0; }
[PATCH v2 1/7] PCI: layerscape: Add local struct device pointers
Use a local "struct device *dev" for brevity and consistency with other drivers. No functional change intended. Signed-off-by: Bjorn Helgaas--- drivers/pci/host/pci-layerscape.c | 26 +++--- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c index 114ba81..08b511e 100644 --- a/drivers/pci/host/pci-layerscape.c +++ b/drivers/pci/host/pci-layerscape.c @@ -106,18 +106,19 @@ static int ls1021_pcie_link_up(struct pcie_port *pp) static void ls1021_pcie_host_init(struct pcie_port *pp) { + struct device *dev = pp->dev; struct ls_pcie *pcie = to_ls_pcie(pp); u32 index[2]; - pcie->scfg = syscon_regmap_lookup_by_phandle(pp->dev->of_node, + pcie->scfg = syscon_regmap_lookup_by_phandle(dev->of_node, "fsl,pcie-scfg"); if (IS_ERR(pcie->scfg)) { - dev_err(pp->dev, "No syscfg phandle specified\n"); + dev_err(dev, "No syscfg phandle specified\n"); pcie->scfg = NULL; return; } - if (of_property_read_u32_array(pp->dev->of_node, + if (of_property_read_u32_array(dev->of_node, "fsl,pcie-scfg", index, 2)) { pcie->scfg = NULL; return; @@ -158,8 +159,9 @@ static void ls_pcie_host_init(struct pcie_port *pp) static int ls_pcie_msi_host_init(struct pcie_port *pp, struct msi_controller *chip) { + struct device *dev = pp->dev; + struct device_node *np = dev->of_node; struct device_node *msi_node; - struct device_node *np = pp->dev->of_node; /* * The MSI domain is set by the generic of_msi_configure(). This @@ -169,7 +171,7 @@ static int ls_pcie_msi_host_init(struct pcie_port *pp, */ msi_node = of_parse_phandle(np, "msi-parent", 0); if (!msi_node) { - dev_err(pp->dev, "failed to find msi-parent\n"); + dev_err(dev, "failed to find msi-parent\n"); return -EINVAL; } @@ -215,16 +217,17 @@ static const struct of_device_id ls_pcie_of_match[] = { static int __init ls_add_pcie_port(struct pcie_port *pp, struct platform_device *pdev) { + struct device *dev = >dev; int ret; struct ls_pcie *pcie = to_ls_pcie(pp); - pp->dev = >dev; + pp->dev = dev; pp->dbi_base = pcie->dbi; pp->ops = pcie->drvdata->ops; ret = dw_pcie_host_init(pp); if (ret) { - dev_err(pp->dev, "failed to initialize host\n"); + dev_err(dev, "failed to initialize host\n"); return ret; } @@ -233,23 +236,24 @@ static int __init ls_add_pcie_port(struct pcie_port *pp, static int __init ls_pcie_probe(struct platform_device *pdev) { + struct device *dev = >dev; const struct of_device_id *match; struct ls_pcie *pcie; struct resource *dbi_base; int ret; - match = of_match_device(ls_pcie_of_match, >dev); + match = of_match_device(ls_pcie_of_match, dev); if (!match) return -ENODEV; - pcie = devm_kzalloc(>dev, sizeof(*pcie), GFP_KERNEL); + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) return -ENOMEM; dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); - pcie->dbi = devm_ioremap_resource(>dev, dbi_base); + pcie->dbi = devm_ioremap_resource(dev, dbi_base); if (IS_ERR(pcie->dbi)) { - dev_err(>dev, "missing *regs* space\n"); + dev_err(dev, "missing *regs* space\n"); return PTR_ERR(pcie->dbi); }
[PATCH v2 0/7] PCI: layerscape: Cleanups
- Add local "dev" pointers to reduce repetition of things like ">dev". - Remove platform drvdata because it appears unused (we called platform_set_drvdata() but not platform_get_drvdata()). - Remove redundant struct members. - Pass device-specific struct to internal functions for consistency. - Move struct pcie_port setup to probe function for consistency. - Remove unused ls_add_pcie_port() platform_device argument. Nothing here should change the behavior of the driver. Changes from v1: I dropped the following patch because it was a lot of churn for questionable benefit: PCI: layerscape: Name private struct pointer "ls" consistently --- Bjorn Helgaas (7): PCI: layerscape: Add local struct device pointers PCI: layerscape: Remove unused platform data PCI: layerscape: Remove redundant struct ls_pcie.dbi PCI: layerscape: Pass device-specific struct to internal functions PCI: layerscape: Move struct pcie_port setup to probe function PCI: layerscape: Remove unused ls_add_pcie_port() platform_device arg PCI: layerscape: Reorder struct ls_pcie drivers/pci/host/pci-layerscape.c | 65 +++-- 1 file changed, 33 insertions(+), 32 deletions(-)
Re: [PATCH] powerpc: cmp -> cmpd for 64-bit
On Wed, Oct 12, 2016 at 02:05:19PM +1100, Michael Ellerman wrote: > Segher Boessenkoolwrites: > > > PowerPC's "cmp" instruction has four operands. Normally people write > > "cmpw" or "cmpd" for the second cmp operand 0 or 1. But, frequently > > people forget, and write "cmp" with just three operands. > > > > With older binutils this is silently accepted as if this was "cmpw", > > while often "cmpd" is wanted. With newer binutils GAS will complain > > about this for 64-bit code. For 32-bit code it still silently assumes > > "cmpw" is what is meant. > > Thanks. > > Anton already sent a fix for the two vdso ones, which were real bugs, > and that's now in Linus' tree. Ah cool. You'll just need the one then (and many more for book4e, but I cannot really handle that, other people can do that a lot better). > > --- a/arch/powerpc/include/asm/cpuidle.h > > +++ b/arch/powerpc/include/asm/cpuidle.h > > @@ -26,7 +26,7 @@ extern u64 pnv_first_deep_stop_state; > > #define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ > /* Magic NAP/SLEEP/WINKLE mode enter sequence */\ > > std r0,0(r1); \ > > ptesync;\ > > ld r0,0(r1); \ > > -1: cmp cr0,r0,r0; \ > > +1: cmpdcr0,r0,r0; \ > > bne 1b; \ > > IDLE_INST; \ > > b . > > What's this one doing, is it a bug? I can't really tell without knowing > what the magic sequence is meant to do. It looks like it is making sure the ptesync is done. The ld/cmp/bne is the usual to make sure the ld is done, and in std/ptesync/ld the ld won't be done before the ptesync is done. The cmp always compares equal, of course, so both cmpw and cmpd would work fine here. cmpd looks better after ld ;-) Segher
Re: [PATCH v3 08/11] powerpc/tracing: fix compat syscall handling
On 12.10.2016 11:59, Michael Ellerman wrote: I went to test this and noticed the exit and enter events appear to be reversed in time? (your series on top of 24532f768121) thanks for testing the patch - I've found a bug that has sneaked in while cleaning up the patches before submission ... I'll fix it in the next iteration. Marcin
Re: [PATCH v3 03/11] tracing/syscalls: add compat syscall metadata
On 12.10.2016 10:50, Michael Ellerman wrote: <...> It's annoying that we have to duplicate all that just to do a + 1. How about this as a precursor? > <...> Thanks for the suggestion - unless anyone sees a reason to keep the current solution I'll change it. Marcin
[PATCH v2 13/16] scsi: fc: use bsg_job_done
fc_bsg_jobdone() and bsg_job_done() are 1:1 copies now so use the bsg-lib one instead of the FC private implementation. Signed-off-by: Johannes Thumshirn--- drivers/s390/scsi/zfcp_fc.c | 2 +- drivers/scsi/bfa/bfad_bsg.c | 4 ++-- drivers/scsi/ibmvscsi/ibmvfc.c | 2 +- drivers/scsi/libfc/fc_lport.c| 4 ++-- drivers/scsi/lpfc/lpfc_bsg.c | 38 +- drivers/scsi/qla2xxx/qla_bsg.c | 44 drivers/scsi/scsi_transport_fc.c | 41 +++-- include/scsi/scsi_transport_fc.h | 2 -- 8 files changed, 50 insertions(+), 87 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index b1b4129..a0f9c82 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -893,7 +893,7 @@ static void zfcp_fc_ct_els_job_handler(void *data) jr->reply_payload_rcv_len = job->reply_payload.payload_len; jr->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK; jr->result = zfcp_ct_els->status ? -EIO : 0; - fc_bsg_jobdone(job, jr->result, jr->reply_payload_rcv_len); + bsg_job_done(job, jr->result, jr->reply_payload_rcv_len); } static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct bsg_job *job) diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index cdc25e6..a9a0016 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -3179,7 +3179,7 @@ bfad_im_bsg_vendor_request(struct bsg_job *job) bsg_reply->reply_payload_rcv_len = job->reply_payload.payload_len; bsg_reply->result = rc; - fc_bsg_jobdone(job, bsg_reply->result, + bsg_job_done(job, bsg_reply->result, bsg_reply->reply_payload_rcv_len); return rc; error: @@ -3555,7 +3555,7 @@ out: bsg_reply->result = rc; if (rc == BFA_STATUS_OK) - fc_bsg_jobdone(job, bsg_reply->result, + bsg_job_done(job, bsg_reply->result, bsg_reply->reply_payload_rcv_len); return rc; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 9fd8975..85aa8ab 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1940,7 +1940,7 @@ static int ibmvfc_bsg_request(struct bsg_job *job) ibmvfc_free_event(evt); spin_unlock_irqrestore(vhost->host->host_lock, flags); bsg_reply->result = rc; - fc_bsg_jobdone(job, bsg_reply->result, + bsg_job_done(job, bsg_reply->result, bsg_reply->reply_payload_rcv_len); rc = 0; out: diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 58a3ccb..40d9038 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1911,7 +1911,7 @@ static void fc_lport_bsg_resp(struct fc_seq *sp, struct fc_frame *fp, bsg_reply->result = (PTR_ERR(fp) == -FC_EX_CLOSED) ? -ECONNABORTED : -ETIMEDOUT; job->reply_len = sizeof(uint32_t); - fc_bsg_jobdone(job, bsg_reply->result, + bsg_job_done(job, bsg_reply->result, bsg_reply->reply_payload_rcv_len); kfree(info); return; @@ -1946,7 +1946,7 @@ static void fc_lport_bsg_resp(struct fc_seq *sp, struct fc_frame *fp, bsg_reply->reply_payload_rcv_len = job->reply_payload.payload_len; bsg_reply->result = 0; - fc_bsg_jobdone(job, bsg_reply->result, + bsg_job_done(job, bsg_reply->result, bsg_reply->reply_payload_rcv_len); kfree(info); } diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index ca21f25..a862437 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -371,7 +371,7 @@ lpfc_bsg_send_mgmt_cmd_cmp(struct lpfc_hba *phba, if (job) { bsg_reply->result = rc; - fc_bsg_jobdone(job, bsg_reply->result, + bsg_job_done(job, bsg_reply->result, bsg_reply->reply_payload_rcv_len); } return; @@ -645,7 +645,7 @@ lpfc_bsg_rport_els_cmp(struct lpfc_hba *phba, if (job) { bsg_reply->result = rc; - fc_bsg_jobdone(job, bsg_reply->result, + bsg_job_done(job, bsg_reply->result, bsg_reply->reply_payload_rcv_len); } return; @@ -1138,7 +1138,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, job->dd_data = NULL; /* complete the job back to userspace */ spin_unlock_irqrestore(>ct_ev_lock, flags); - fc_bsg_jobdone(job, bsg_reply->result, +
[PATCH v2 10/16] scsi: change FC drivers to use 'struct bsg_job'
Change FC drivers to use 'struct bsg_job' from bsg-lib.h instead of 'struct fc_bsg_job' from scsi_transport_fc.h and remove 'struct fc_bsg_job'. Signed-off-by: Johannes Thumshirn--- drivers/s390/scsi/zfcp_ext.h | 4 +-- drivers/s390/scsi/zfcp_fc.c | 15 drivers/scsi/bfa/bfad_bsg.c | 10 +++--- drivers/scsi/bfa/bfad_im.h | 4 +-- drivers/scsi/ibmvscsi/ibmvfc.c | 9 ++--- drivers/scsi/libfc/fc_lport.c| 10 +++--- drivers/scsi/lpfc/lpfc_bsg.c | 74 drivers/scsi/lpfc/lpfc_crtn.h| 4 +-- drivers/scsi/qla2xxx/qla_bsg.c | 61 + drivers/scsi/qla2xxx/qla_def.h | 2 +- drivers/scsi/qla2xxx/qla_gbl.h | 4 +-- drivers/scsi/qla2xxx/qla_iocb.c | 8 ++--- drivers/scsi/qla2xxx/qla_isr.c | 6 ++-- drivers/scsi/qla2xxx/qla_mr.c| 5 +-- drivers/scsi/scsi_transport_fc.c | 20 +-- include/scsi/libfc.h | 2 +- include/scsi/scsi_transport_fc.h | 63 ++ 17 files changed, 138 insertions(+), 163 deletions(-) diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 5b50065..ab163be 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -82,8 +82,8 @@ extern void zfcp_fc_link_test_work(struct work_struct *); extern void zfcp_fc_wka_ports_force_offline(struct zfcp_fc_wka_ports *); extern int zfcp_fc_gs_setup(struct zfcp_adapter *); extern void zfcp_fc_gs_destroy(struct zfcp_adapter *); -extern int zfcp_fc_exec_bsg_job(struct fc_bsg_job *); -extern int zfcp_fc_timeout_bsg_job(struct fc_bsg_job *); +extern int zfcp_fc_exec_bsg_job(struct bsg_job *); +extern int zfcp_fc_timeout_bsg_job(struct bsg_job *); extern void zfcp_fc_sym_name_update(struct work_struct *); extern unsigned int zfcp_fc_port_scan_backoff(void); extern void zfcp_fc_conditional_port_scan(struct zfcp_adapter *); diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index 1977a66..b1b4129 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "zfcp_ext.h" @@ -885,7 +886,7 @@ out_free: static void zfcp_fc_ct_els_job_handler(void *data) { - struct fc_bsg_job *job = data; + struct bsg_job *job = data; struct zfcp_fsf_ct_els *zfcp_ct_els = job->dd_data; struct fc_bsg_reply *jr = job->reply; @@ -895,7 +896,7 @@ static void zfcp_fc_ct_els_job_handler(void *data) fc_bsg_jobdone(job, jr->result, jr->reply_payload_rcv_len); } -static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct fc_bsg_job *job) +static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct bsg_job *job) { u32 preamble_word1; u8 gs_type; @@ -925,7 +926,7 @@ static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct fc_bsg_job *job) static void zfcp_fc_ct_job_handler(void *data) { - struct fc_bsg_job *job = data; + struct bsg_job *job = data; struct zfcp_fc_wka_port *wka_port; wka_port = zfcp_fc_job_wka_port(job); @@ -934,7 +935,7 @@ static void zfcp_fc_ct_job_handler(void *data) zfcp_fc_ct_els_job_handler(data); } -static int zfcp_fc_exec_els_job(struct fc_bsg_job *job, +static int zfcp_fc_exec_els_job(struct bsg_job *job, struct zfcp_adapter *adapter) { struct zfcp_fsf_ct_els *els = job->dd_data; @@ -957,7 +958,7 @@ static int zfcp_fc_exec_els_job(struct fc_bsg_job *job, return zfcp_fsf_send_els(adapter, d_id, els, job->req->timeout / HZ); } -static int zfcp_fc_exec_ct_job(struct fc_bsg_job *job, +static int zfcp_fc_exec_ct_job(struct bsg_job *job, struct zfcp_adapter *adapter) { int ret; @@ -980,7 +981,7 @@ static int zfcp_fc_exec_ct_job(struct fc_bsg_job *job, return ret; } -int zfcp_fc_exec_bsg_job(struct fc_bsg_job *job) +int zfcp_fc_exec_bsg_job(struct bsg_job *job) { struct Scsi_Host *shost; struct zfcp_adapter *adapter; @@ -1010,7 +1011,7 @@ int zfcp_fc_exec_bsg_job(struct fc_bsg_job *job) } } -int zfcp_fc_timeout_bsg_job(struct fc_bsg_job *job) +int zfcp_fc_timeout_bsg_job(struct bsg_job *job) { /* hardware tracks timeout, reset bsg timeout to not interfere */ return -EAGAIN; diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index d3094270..cdc25e6 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -3130,7 +3130,7 @@ bfad_iocmd_handler(struct bfad_s *bfad, unsigned int cmd, void *iocmd, } static int -bfad_im_bsg_vendor_request(struct fc_bsg_job *job) +bfad_im_bsg_vendor_request(struct bsg_job *job) { struct fc_bsg_request *bsg_request = job->request; struct fc_bsg_reply *bsg_reply = job->reply; @@ -3314,7 +3314,7 @@ bfad_fcxp_free_mem(struct bfad_s *bfad, struct bfad_buf_info
[PATCH v2 02/16] scsi: don't use fc_bsg_job::request and fc_bsg_job::reply directly
Don't use fc_bsg_job::request and fc_bsg_job::reply directly, but use helper variables bsg_request and bsg_reply. This will be helpfull when transitioning to bsg-lib. Signed-off-by: Johannes Thumshirn--- drivers/s390/scsi/zfcp_fc.c | 9 +- drivers/scsi/bfa/bfad_bsg.c | 40 +++--- drivers/scsi/ibmvscsi/ibmvfc.c | 22 ++-- drivers/scsi/libfc/fc_lport.c| 23 ++-- drivers/scsi/lpfc/lpfc_bsg.c | 194 +--- drivers/scsi/qla2xxx/qla_bsg.c | 264 ++- drivers/scsi/qla2xxx/qla_iocb.c | 5 +- drivers/scsi/qla2xxx/qla_isr.c | 46 --- drivers/scsi/qla2xxx/qla_mr.c| 10 +- drivers/scsi/scsi_transport_fc.c | 37 +++--- 10 files changed, 387 insertions(+), 263 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index 237688a..4c4023f 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -900,8 +900,9 @@ static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct fc_bsg_job *job) u32 preamble_word1; u8 gs_type; struct zfcp_adapter *adapter; + struct fc_bsg_request *bsg_request = job->request; - preamble_word1 = job->request->rqst_data.r_ct.preamble_word1; + preamble_word1 = bsg_request->rqst_data.r_ct.preamble_word1; gs_type = (preamble_word1 & 0xff00) >> 24; adapter = (struct zfcp_adapter *) job->shost->hostdata[0]; @@ -938,6 +939,7 @@ static int zfcp_fc_exec_els_job(struct fc_bsg_job *job, { struct zfcp_fsf_ct_els *els = job->dd_data; struct fc_rport *rport = job->rport; + struct fc_bsg_request *bsg_request = job->request; struct zfcp_port *port; u32 d_id; @@ -949,7 +951,7 @@ static int zfcp_fc_exec_els_job(struct fc_bsg_job *job, d_id = port->d_id; put_device(>dev); } else - d_id = ntoh24(job->request->rqst_data.h_els.port_id); + d_id = ntoh24(bsg_request->rqst_data.h_els.port_id); els->handler = zfcp_fc_ct_els_job_handler; return zfcp_fsf_send_els(adapter, d_id, els, job->req->timeout / HZ); @@ -983,6 +985,7 @@ int zfcp_fc_exec_bsg_job(struct fc_bsg_job *job) struct Scsi_Host *shost; struct zfcp_adapter *adapter; struct zfcp_fsf_ct_els *ct_els = job->dd_data; + struct fc_bsg_request *bsg_request = job->request; shost = job->rport ? rport_to_shost(job->rport) : job->shost; adapter = (struct zfcp_adapter *)shost->hostdata[0]; @@ -994,7 +997,7 @@ int zfcp_fc_exec_bsg_job(struct fc_bsg_job *job) ct_els->resp = job->reply_payload.sg_list; ct_els->handler_data = job; - switch (job->request->msgcode) { + switch (bsg_request->msgcode) { case FC_BSG_RPT_ELS: case FC_BSG_HST_ELS_NOLOGIN: return zfcp_fc_exec_els_job(job, adapter); diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index d1ad020..48366d8 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -3132,7 +3132,9 @@ bfad_iocmd_handler(struct bfad_s *bfad, unsigned int cmd, void *iocmd, static int bfad_im_bsg_vendor_request(struct fc_bsg_job *job) { - uint32_t vendor_cmd = job->request->rqst_data.h_vendor.vendor_cmd[0]; + struct fc_bsg_request *bsg_request = job->request; + struct fc_bsg_reply *bsg_reply = job->reply; + uint32_t vendor_cmd = bsg_request->rqst_data.h_vendor.vendor_cmd[0]; struct bfad_im_port_s *im_port = (struct bfad_im_port_s *) job->shost->hostdata[0]; struct bfad_s *bfad = im_port->bfad; @@ -3175,8 +3177,8 @@ bfad_im_bsg_vendor_request(struct fc_bsg_job *job) /* Fill the BSG job reply data */ job->reply_len = job->reply_payload.payload_len; - job->reply->reply_payload_rcv_len = job->reply_payload.payload_len; - job->reply->result = rc; + bsg_reply->reply_payload_rcv_len = job->reply_payload.payload_len; + bsg_reply->result = rc; job->job_done(job); return rc; @@ -3184,9 +3186,9 @@ error: /* free the command buffer */ kfree(payload_kbuf); out: - job->reply->result = rc; + bsg_reply->result = rc; job->reply_len = sizeof(uint32_t); - job->reply->reply_payload_rcv_len = 0; + bsg_reply->reply_payload_rcv_len = 0; return rc; } @@ -3362,18 +3364,20 @@ bfad_im_bsg_els_ct_request(struct fc_bsg_job *job) struct bfad_fcxp*drv_fcxp; struct bfa_fcs_lport_s *fcs_port; struct bfa_fcs_rport_s *fcs_rport; - uint32_t command_type = job->request->msgcode; + struct fc_bsg_request *bsg_request = bsg_request; + struct fc_bsg_reply *bsg_reply = job->reply; + uint32_t command_type = bsg_request->msgcode; unsigned long flags; struct bfad_buf_info *rsp_buf_info; void *req_kbuf = NULL, *rsp_kbuf =
[PATCH v2 05/16] scsi: fc: provide fc_bsg_to_shost() helper
Provide fc_bsg_to_shost() helper that will become handy when we're moving from struct fc_bsg_job to a plain struct bsg_job. Also use this little helper in the LLDDs. Signed-off-by: Johannes Thumshirn--- drivers/s390/scsi/zfcp_fc.c | 4 +-- drivers/scsi/bfa/bfad_bsg.c | 6 ++--- drivers/scsi/ibmvscsi/ibmvfc.c | 4 +-- drivers/scsi/libfc/fc_lport.c| 2 +- drivers/scsi/lpfc/lpfc_bsg.c | 32 drivers/scsi/qla2xxx/qla_bsg.c | 54 drivers/scsi/scsi_transport_fc.c | 2 +- include/scsi/scsi_transport_fc.h | 5 8 files changed, 56 insertions(+), 53 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index 87f6330..813c286 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -905,7 +905,7 @@ static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct fc_bsg_job *job) preamble_word1 = bsg_request->rqst_data.r_ct.preamble_word1; gs_type = (preamble_word1 & 0xff00) >> 24; - adapter = (struct zfcp_adapter *) job->shost->hostdata[0]; + adapter = shost_priv(fc_bsg_to_shost(job)); switch (gs_type) { case FC_FST_ALIAS: @@ -987,7 +987,7 @@ int zfcp_fc_exec_bsg_job(struct fc_bsg_job *job) struct zfcp_fsf_ct_els *ct_els = job->dd_data; struct fc_bsg_request *bsg_request = job->request; - shost = job->rport ? rport_to_shost(job->rport) : job->shost; + shost = job->rport ? rport_to_shost(job->rport) : fc_bsg_to_shost(job); adapter = (struct zfcp_adapter *)shost->hostdata[0]; if (!(atomic_read(>status) & ZFCP_STATUS_COMMON_OPEN)) diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index e49a6c8..d3094270 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -3135,8 +3135,7 @@ bfad_im_bsg_vendor_request(struct fc_bsg_job *job) struct fc_bsg_request *bsg_request = job->request; struct fc_bsg_reply *bsg_reply = job->reply; uint32_t vendor_cmd = bsg_request->rqst_data.h_vendor.vendor_cmd[0]; - struct bfad_im_port_s *im_port = - (struct bfad_im_port_s *) job->shost->hostdata[0]; + struct bfad_im_port_s *im_port = shost_priv(fc_bsg_to_shost(job)); struct bfad_s *bfad = im_port->bfad; struct request_queue *request_q = job->req->q; void *payload_kbuf; @@ -3358,8 +3357,7 @@ int bfad_im_bsg_els_ct_request(struct fc_bsg_job *job) { struct bfa_bsg_data *bsg_data; - struct bfad_im_port_s *im_port = - (struct bfad_im_port_s *) job->shost->hostdata[0]; + struct bfad_im_port_s *im_port = shost_priv(fc_bsg_to_shost(job)); struct bfad_s *bfad = im_port->bfad; bfa_bsg_fcpt_t *bsg_fcpt; struct bfad_fcxp*drv_fcxp; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 1001d4a..f7b50af 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1702,7 +1702,7 @@ static void ibmvfc_bsg_timeout_done(struct ibmvfc_event *evt) **/ static int ibmvfc_bsg_timeout(struct fc_bsg_job *job) { - struct ibmvfc_host *vhost = shost_priv(job->shost); + struct ibmvfc_host *vhost = shost_priv(fc_bsg_to_shost(job)); unsigned long port_id = (unsigned long)job->dd_data; struct ibmvfc_event *evt; struct ibmvfc_tmf *tmf; @@ -1815,7 +1815,7 @@ unlock_out: **/ static int ibmvfc_bsg_request(struct fc_bsg_job *job) { - struct ibmvfc_host *vhost = shost_priv(job->shost); + struct ibmvfc_host *vhost = shost_priv(fc_bsg_to_shost(job)); struct fc_rport *rport = job->rport; struct ibmvfc_passthru_mad *mad; struct ibmvfc_event *evt; diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 4bed7ec..c60fdb9 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -2087,7 +2087,7 @@ int fc_lport_bsg_request(struct fc_bsg_job *job) struct fc_bsg_request *bsg_request = job->request; struct fc_bsg_reply *bsg_reply = job->reply; struct request *rsp = job->req->next_rq; - struct Scsi_Host *shost = job->shost; + struct Scsi_Host *shost = fc_bsg_to_shost(job); struct fc_lport *lport = shost_priv(shost); struct fc_rport *rport; struct fc_rport_priv *rdata; diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 447a7af..bfcc37d 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -384,7 +384,7 @@ lpfc_bsg_send_mgmt_cmd_cmp(struct lpfc_hba *phba, static int lpfc_bsg_send_mgmt_cmd(struct fc_bsg_job *job) { - struct lpfc_vport *vport = (struct lpfc_vport *)job->shost->hostdata; + struct lpfc_vport *vport = shost_priv(fc_bsg_to_shost(job)); struct lpfc_hba *phba = vport->phba; struct lpfc_rport_data *rdata =
[PATCH v2 03/16] scsi: fc: Export fc_bsg_jobdone and use it in FC drivers
Export fc_bsg_jobdone so drivers can use it directly instead of doing the round-trip via struct fc_bsg_job::job_done() and use it in the LLDDs. As we've converted all LLDDs over to use fc_bsg_jobdone() directly, we can remove the function pointer from struct fc_bsg_job as well. Signed-off-by: Johannes Thumshirn--- drivers/s390/scsi/zfcp_fc.c | 2 +- drivers/scsi/bfa/bfad_bsg.c | 4 ++-- drivers/scsi/ibmvscsi/ibmvfc.c | 2 +- drivers/scsi/libfc/fc_lport.c| 4 ++-- drivers/scsi/lpfc/lpfc_bsg.c | 38 +- drivers/scsi/qla2xxx/qla_bsg.c | 44 drivers/scsi/scsi_transport_fc.c | 5 ++--- include/scsi/scsi_transport_fc.h | 2 +- 8 files changed, 50 insertions(+), 51 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index 4c4023f..40d8f06 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -892,7 +892,7 @@ static void zfcp_fc_ct_els_job_handler(void *data) jr->reply_payload_rcv_len = job->reply_payload.payload_len; jr->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK; jr->result = zfcp_ct_els->status ? -EIO : 0; - job->job_done(job); + fc_bsg_jobdone(job); } static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct fc_bsg_job *job) diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index 48366d8..25889b9 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -3180,7 +3180,7 @@ bfad_im_bsg_vendor_request(struct fc_bsg_job *job) bsg_reply->reply_payload_rcv_len = job->reply_payload.payload_len; bsg_reply->result = rc; - job->job_done(job); + fc_bsg_jobdone(job); return rc; error: /* free the command buffer */ @@ -3556,7 +3556,7 @@ out: bsg_reply->result = rc; if (rc == BFA_STATUS_OK) - job->job_done(job); + fc_bsg_jobdone(job); return rc; } diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 8b55279..21c9d28 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1939,7 +1939,7 @@ static int ibmvfc_bsg_request(struct fc_bsg_job *job) ibmvfc_free_event(evt); spin_unlock_irqrestore(vhost->host->host_lock, flags); bsg_reply->result = rc; - job->job_done(job); + fc_bsg_jobdone(job); rc = 0; out: dma_unmap_sg(vhost->dev, job->request_payload.sg_list, diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index a1c12e7..8811fe0 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1912,7 +1912,7 @@ static void fc_lport_bsg_resp(struct fc_seq *sp, struct fc_frame *fp, -ECONNABORTED : -ETIMEDOUT; job->reply_len = sizeof(uint32_t); job->state_flags |= FC_RQST_STATE_DONE; - job->job_done(job); + fc_bsg_jobdone(job); kfree(info); return; } @@ -1947,7 +1947,7 @@ static void fc_lport_bsg_resp(struct fc_seq *sp, struct fc_frame *fp, job->reply_payload.payload_len; bsg_reply->result = 0; job->state_flags |= FC_RQST_STATE_DONE; - job->job_done(job); + fc_bsg_jobdone(job); kfree(info); } fc_frame_free(fp); diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 27b5930..1db9cca 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -371,7 +371,7 @@ lpfc_bsg_send_mgmt_cmd_cmp(struct lpfc_hba *phba, if (job) { bsg_reply->result = rc; - job->job_done(job); + fc_bsg_jobdone(job); } return; } @@ -644,7 +644,7 @@ lpfc_bsg_rport_els_cmp(struct lpfc_hba *phba, if (job) { bsg_reply->result = rc; - job->job_done(job); + fc_bsg_jobdone(job); } return; } @@ -1136,7 +1136,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, job->dd_data = NULL; /* complete the job back to userspace */ spin_unlock_irqrestore(>ct_ev_lock, flags); - job->job_done(job); + fc_bsg_jobdone(job); spin_lock_irqsave(>ct_ev_lock, flags); } } @@ -1361,7 +1361,7 @@ lpfc_bsg_hba_get_event(struct fc_bsg_job *job) spin_unlock_irqrestore(>ct_ev_lock, flags); job->dd_data = NULL; bsg_reply->result = 0; - job->job_done(job); + fc_bsg_jobdone(job); return 0; job_error: @@ -1458,7 +1458,7 @@ lpfc_issue_ct_rsp_cmp(struct lpfc_hba *phba, if (job) {
[PATCH v2 04/16] scsi: Unify interfaces of fc_bsg_jobdone and bsg_job_done
Unify the interfaces of fc_bsg_jobdone and bsg_job_done. This will reduce the diff when moving from 'struct fc_bsg_job' to a plain 'struct bsg_job' later on. Signed-off-by: Johannes Thumshirn--- drivers/s390/scsi/zfcp_fc.c | 2 +- drivers/scsi/bfa/bfad_bsg.c | 6 ++-- drivers/scsi/ibmvscsi/ibmvfc.c | 3 +- drivers/scsi/libfc/fc_lport.c| 6 ++-- drivers/scsi/lpfc/lpfc_bsg.c | 68 +++- drivers/scsi/qla2xxx/qla_bsg.c | 66 +- drivers/scsi/scsi_transport_fc.c | 22 +++-- include/scsi/scsi_transport_fc.h | 3 +- 8 files changed, 116 insertions(+), 60 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index 40d8f06..87f6330 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -892,7 +892,7 @@ static void zfcp_fc_ct_els_job_handler(void *data) jr->reply_payload_rcv_len = job->reply_payload.payload_len; jr->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK; jr->result = zfcp_ct_els->status ? -EIO : 0; - fc_bsg_jobdone(job); + fc_bsg_jobdone(job, jr->result, jr->reply_payload_rcv_len); } static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct fc_bsg_job *job) diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index 25889b9..e49a6c8 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -3180,7 +3180,8 @@ bfad_im_bsg_vendor_request(struct fc_bsg_job *job) bsg_reply->reply_payload_rcv_len = job->reply_payload.payload_len; bsg_reply->result = rc; - fc_bsg_jobdone(job); + fc_bsg_jobdone(job, bsg_reply->result, + bsg_reply->reply_payload_rcv_len); return rc; error: /* free the command buffer */ @@ -3556,7 +3557,8 @@ out: bsg_reply->result = rc; if (rc == BFA_STATUS_OK) - fc_bsg_jobdone(job); + fc_bsg_jobdone(job, bsg_reply->result, + bsg_reply->reply_payload_rcv_len); return rc; } diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 21c9d28..1001d4a 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1939,7 +1939,8 @@ static int ibmvfc_bsg_request(struct fc_bsg_job *job) ibmvfc_free_event(evt); spin_unlock_irqrestore(vhost->host->host_lock, flags); bsg_reply->result = rc; - fc_bsg_jobdone(job); + fc_bsg_jobdone(job, bsg_reply->result, + bsg_reply->reply_payload_rcv_len); rc = 0; out: dma_unmap_sg(vhost->dev, job->request_payload.sg_list, diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 8811fe0..4bed7ec 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1912,7 +1912,8 @@ static void fc_lport_bsg_resp(struct fc_seq *sp, struct fc_frame *fp, -ECONNABORTED : -ETIMEDOUT; job->reply_len = sizeof(uint32_t); job->state_flags |= FC_RQST_STATE_DONE; - fc_bsg_jobdone(job); + fc_bsg_jobdone(job, bsg_reply->result, + bsg_reply->reply_payload_rcv_len); kfree(info); return; } @@ -1947,7 +1948,8 @@ static void fc_lport_bsg_resp(struct fc_seq *sp, struct fc_frame *fp, job->reply_payload.payload_len; bsg_reply->result = 0; job->state_flags |= FC_RQST_STATE_DONE; - fc_bsg_jobdone(job); + fc_bsg_jobdone(job, bsg_reply->result, + bsg_reply->reply_payload_rcv_len); kfree(info); } fc_frame_free(fp); diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 1db9cca..447a7af 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -371,7 +371,8 @@ lpfc_bsg_send_mgmt_cmd_cmp(struct lpfc_hba *phba, if (job) { bsg_reply->result = rc; - fc_bsg_jobdone(job); + fc_bsg_jobdone(job, bsg_reply->result, + bsg_reply->reply_payload_rcv_len); } return; } @@ -644,7 +645,8 @@ lpfc_bsg_rport_els_cmp(struct lpfc_hba *phba, if (job) { bsg_reply->result = rc; - fc_bsg_jobdone(job); + fc_bsg_jobdone(job, bsg_reply->result, + bsg_reply->reply_payload_rcv_len); } return; } @@ -1136,7 +1138,8 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, job->dd_data = NULL; /* complete the job back to userspace */ spin_unlock_irqrestore(>ct_ev_lock, flags); - fc_bsg_jobdone(job); +
[PATCH v2 06/16] scsi: fc: provide fc_bsg_to_rport() helper
Provide fc_bsg_to_rport() helper that will become handy when we're moving from struct fc_bsg_job to a plain struct bsg_job. Also move all LLDDs to use the new helper. Signed-off-by: Johannes Thumshirn--- drivers/s390/scsi/zfcp_fc.c | 5 +++-- drivers/scsi/ibmvscsi/ibmvfc.c | 2 +- drivers/scsi/libfc/fc_lport.c| 4 ++-- drivers/scsi/lpfc/lpfc_bsg.c | 4 ++-- drivers/scsi/qla2xxx/qla_bsg.c | 4 ++-- drivers/scsi/scsi_transport_fc.c | 3 ++- include/scsi/scsi_transport_fc.h | 5 + 7 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index 813c286..1977a66 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -938,7 +938,7 @@ static int zfcp_fc_exec_els_job(struct fc_bsg_job *job, struct zfcp_adapter *adapter) { struct zfcp_fsf_ct_els *els = job->dd_data; - struct fc_rport *rport = job->rport; + struct fc_rport *rport = fc_bsg_to_rport(job); struct fc_bsg_request *bsg_request = job->request; struct zfcp_port *port; u32 d_id; @@ -986,8 +986,9 @@ int zfcp_fc_exec_bsg_job(struct fc_bsg_job *job) struct zfcp_adapter *adapter; struct zfcp_fsf_ct_els *ct_els = job->dd_data; struct fc_bsg_request *bsg_request = job->request; + struct fc_rport *rport = fc_bsg_to_rport(job); - shost = job->rport ? rport_to_shost(job->rport) : fc_bsg_to_shost(job); + shost = rport ? rport_to_shost(rport) : fc_bsg_to_shost(job); adapter = (struct zfcp_adapter *)shost->hostdata[0]; if (!(atomic_read(>status) & ZFCP_STATUS_COMMON_OPEN)) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index f7b50af..35114d9 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1816,7 +1816,7 @@ unlock_out: static int ibmvfc_bsg_request(struct fc_bsg_job *job) { struct ibmvfc_host *vhost = shost_priv(fc_bsg_to_shost(job)); - struct fc_rport *rport = job->rport; + struct fc_rport *rport = fc_bsg_to_rport(job); struct ibmvfc_passthru_mad *mad; struct ibmvfc_event *evt; union ibmvfc_iu rsp_iu; diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index c60fdb9..156708a 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -2102,7 +2102,7 @@ int fc_lport_bsg_request(struct fc_bsg_job *job) switch (bsg_request->msgcode) { case FC_BSG_RPT_ELS: - rport = job->rport; + rport = fc_bsg_to_rport(job); if (!rport) break; @@ -2112,7 +2112,7 @@ int fc_lport_bsg_request(struct fc_bsg_job *job) break; case FC_BSG_RPT_CT: - rport = job->rport; + rport = fc_bsg_to_rport(job); if (!rport) break; diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index bfcc37d..dae7cc3 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -386,7 +386,7 @@ lpfc_bsg_send_mgmt_cmd(struct fc_bsg_job *job) { struct lpfc_vport *vport = shost_priv(fc_bsg_to_shost(job)); struct lpfc_hba *phba = vport->phba; - struct lpfc_rport_data *rdata = job->rport->dd_data; + struct lpfc_rport_data *rdata = fc_bsg_to_rport(job)->dd_data; struct lpfc_nodelist *ndlp = rdata->pnode; struct fc_bsg_reply *bsg_reply = job->reply; struct ulp_bde64 *bpl = NULL; @@ -660,7 +660,7 @@ lpfc_bsg_rport_els(struct fc_bsg_job *job) { struct lpfc_vport *vport = shost_priv(fc_bsg_to_shost(job)); struct lpfc_hba *phba = vport->phba; - struct lpfc_rport_data *rdata = job->rport->dd_data; + struct lpfc_rport_data *rdata = fc_bsg_to_rport(job)->dd_data; struct lpfc_nodelist *ndlp = rdata->pnode; struct fc_bsg_request *bsg_request = job->request; struct fc_bsg_reply *bsg_reply = job->reply; diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 109b852..917eafe 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -264,7 +264,7 @@ qla2x00_process_els(struct fc_bsg_job *bsg_job) uint16_t nextlid = 0; if (bsg_request->msgcode == FC_BSG_RPT_ELS) { - rport = bsg_job->rport; + rport = fc_bsg_to_rport(bsg_job); fcport = *(fc_port_t **) rport->dd_data; host = rport_to_shost(rport); vha = shost_priv(host); @@ -2485,7 +2485,7 @@ qla24xx_bsg_request(struct fc_bsg_job *bsg_job) bsg_reply->reply_payload_rcv_len = 0; if (bsg_request->msgcode == FC_BSG_RPT_ELS) { - rport = bsg_job->rport; + rport = fc_bsg_to_rport(bsg_job); host = rport_to_shost(rport); vha =
Re: [mm] c4344e8035: WARNING: CPU: 0 PID: 101 at mm/memory.c:303 __tlb_remove_page_size+0x25/0x99
On 10/12, Aneesh Kumar K.V wrote: >kernel test robot <xiaolong...@intel.com> writes: > >> FYI, we noticed the following commit: >> >> https://github.com/0day-ci/linux >> Aneesh-Kumar-K-V/mm-Use-the-correct-page-size-when-removing-the-page/20161012-013446 >> commit c4344e80359420d7574b3b90fddf53311f1d24e6 ("mm: Remove the page size >> change check in tlb_remove_page") >> >> in testcase: boot >> >> on test machine: qemu-system-i386 -enable-kvm -cpu Haswell,+smep,+smap -m >> 360M >> >> caused below changes: >> >> >> ++++ >> || eff764128d | c4344e8035 | >> ++++ >> | boot_successes | 59 | 0 | >> | boot_failures | 0 | 43 | >> | WARNING:at_mm/memory.c:#__tlb_remove_page_size | 0 | 43 | >> | calltrace:SyS_execve | 0 | 43 | >> | calltrace:run_init_process | 0 | 21 | >> ++++ >> >> >> >> [4.096204] Write protecting the kernel text: 3148k >> [4.096911] Write protecting the kernel read-only data: 1444k >> [4.120357] [ cut here ] >> [4.121078] WARNING: CPU: 0 PID: 101 at mm/memory.c:303 >> __tlb_remove_page_size+0x25/0x99 >> [4.122380] Modules linked in: >> [4.122788] CPU: 0 PID: 101 Comm: run-parts Not tainted >> 4.8.0-mm1-00315-gc4344e8 #5 >> [4.123956] bd145dc4 b111e5e6 bd145de0 b10320dc 012f b10974d1 >> bd145e70 c4954170 >> [4.125277] c4954170 bd145df4 b103215f 0009 >> bd145e04 b10974d1 >> [4.126424] c4954170 bd145e70 bd145e14 b10263ca bd145e70 bd47bafc >> bd145e40 b109767a >> [4.127622] Call Trace: > >Thanks for the report. The below change should fix this. > >commit 18c929e7cf672da617dc218c6265366bf78b1644 >Author: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> >Date: Wed Oct 12 08:40:41 2016 +0530 > >update mmu gather page size before flushing page table cache > >diff --git a/mm/memory.c b/mm/memory.c >index 26d1ba8c87e6..7e7eccb82a2b 100644 >--- a/mm/memory.c >+++ b/mm/memory.c >@@ -526,7 +526,11 @@ void free_pgd_range(struct mmu_gather *tlb, > end -= PMD_SIZE; > if (addr > end - 1) > return; >- >+ /* >+ * We add page table cache pages with PAGE_SIZE, >+ * (see pte_free_tlb()), flush the tlb if we need >+ */ >+ tlb_remove_check_page_size_change(tlb, PAGE_SIZE); > pgd = pgd_offset(tlb->mm, addr); > do { > next = pgd_addr_end(addr, end); > Just applied this fix on top of commit c4344e8035 and confirmed that reportedwarning is gone with this fix. Tested-by: Xiaolong Ye <xiaolong...@intel.com> = compiler/kconfig/rootfs/sleep/tbox_group/testcase: gcc-6/i386-randconfig-s1-201641/quantal-core-i386.cgz/1/vm-vp-quantal-i386/boot commit: c4344e80359420d7574b3b90fddf53311f1d24e6 384db818365c90b91d8bad80be188765e801cf58 ("update mmu gather page size before flushing page table cache") c4344e80359420d7 384db818365c90b91d8bad80be -- fail:runs %reproductionfail:runs | | | 24:24-100%:5 dmesg.WARNING:at_mm/memory.c:#__tlb_remove_page_size Thanks, Xiaolong
[mm] c4344e8035: WARNING: CPU: 0 PID: 101 at mm/memory.c:303 __tlb_remove_page_size+0x25/0x99
FYI, we noticed the following commit: https://github.com/0day-ci/linux Aneesh-Kumar-K-V/mm-Use-the-correct-page-size-when-removing-the-page/20161012-013446 commit c4344e80359420d7574b3b90fddf53311f1d24e6 ("mm: Remove the page size change check in tlb_remove_page") in testcase: boot on test machine: qemu-system-i386 -enable-kvm -cpu Haswell,+smep,+smap -m 360M caused below changes: ++++ || eff764128d | c4344e8035 | ++++ | boot_successes | 59 | 0 | | boot_failures | 0 | 43 | | WARNING:at_mm/memory.c:#__tlb_remove_page_size | 0 | 43 | | calltrace:SyS_execve | 0 | 43 | | calltrace:run_init_process | 0 | 21 | ++++ [4.096204] Write protecting the kernel text: 3148k [4.096911] Write protecting the kernel read-only data: 1444k [4.120357] [ cut here ] [4.121078] WARNING: CPU: 0 PID: 101 at mm/memory.c:303 __tlb_remove_page_size+0x25/0x99 [4.122380] Modules linked in: [4.122788] CPU: 0 PID: 101 Comm: run-parts Not tainted 4.8.0-mm1-00315-gc4344e8 #5 [4.123956] bd145dc4 b111e5e6 bd145de0 b10320dc 012f b10974d1 bd145e70 c4954170 [4.125277] c4954170 bd145df4 b103215f 0009 bd145e04 b10974d1 [4.126424] c4954170 bd145e70 bd145e14 b10263ca bd145e70 bd47bafc bd145e40 b109767a [4.127622] Call Trace: [4.128255] [ cut here ] [4.128261] WARNING: CPU: 0 PID: 103 at mm/memory.c:303 __tlb_remove_page_size+0x25/0x99 [4.128261] Modules linked in: [4.128264] CPU: 0 PID: 103 Comm: sh Not tainted 4.8.0-mm1-00315-gc4344e8 #5 [4.128268] bd143dc4 b111e5e6 bd143de0 b10320dc 012f b10974d1 bd143e70 c494cd00 [4.128271] c494cd00 bd143df4 b103215f 0009 bd143e04 b10974d1 [4.128274] c494cd00 bd143e70 bd143e14 b10263ca bd143e70 bd47dafc bd143e40 b109767a [4.128275] Call Trace: [4.128281] [] dump_stack+0x16/0x18 [4.128284] [] __warn+0xa5/0xbc [4.128286] [] ? __tlb_remove_page_size+0x25/0x99 [4.128288] [] warn_slowpath_null+0x11/0x16 [4.128290] [] __tlb_remove_page_size+0x25/0x99 [4.128293] [] ___pte_free_tlb+0x57/0x66 [4.128295] [] free_pgd_range+0x135/0x1d0 [4.128298] [] setup_arg_pages+0x219/0x29a [4.128302] [] load_elf_binary+0x2ad/0x94a [4.128305] [] ? _copy_from_user+0x49/0x5c [4.128307] [] search_binary_handler+0x106/0x159 [4.128309] [] do_execveat_common+0x3bf/0x4dc [4.128311] [] do_execve+0x14/0x16 [4.128313] [] SyS_execve+0x16/0x18 [4.128316] [] do_fast_syscall_32+0x8f/0xce [4.128320] [] sysenter_past_esp+0x47/0x75 [4.128322] ---[ end trace 816334aebb0eaffe ]--- [4.132981] [ cut here ] Thanks, Kernel Test Robot # # Automatically generated file; DO NOT EDIT. # Linux/i386 4.8.0-mm1 Kernel Configuration # # CONFIG_64BIT is not set CONFIG_X86_32=y CONFIG_X86=y CONFIG_INSTRUCTION_DECODER=y CONFIG_OUTPUT_FORMAT="elf32-i386" CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig" CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_MMU=y CONFIG_ARCH_MMAP_RND_BITS_MIN=8 CONFIG_ARCH_MMAP_RND_BITS_MAX=16 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 CONFIG_NEED_SG_DMA_LENGTH=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_BUG=y CONFIG_GENERIC_HWEIGHT=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_ARCH_HAS_CPU_RELAX=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_SUSPEND_POSSIBLE=y CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_ARCH_WANT_GENERAL_HUGETLB=y CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_DEBUG_RODATA=y CONFIG_PGTABLE_LEVELS=2 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_EXTABLE_SORT=y CONFIG_THREAD_INFO_IN_TASK=y # # General setup # CONFIG_BROKEN_ON_SMP=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" # CONFIG_COMPILE_TEST is not set CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y CONFIG_HAVE_KERNEL_LZMA=y CONFIG_HAVE_KERNEL_XZ=y CONFIG_HAVE_KERNEL_LZO=y CONFIG_HAVE_KERNEL_LZ4=y # CONFIG_KERNEL_GZIP is not set # CONFIG_KERNEL_BZIP2 is not set # CONFIG_KERNEL_LZMA
Re: [PATCH v3 08/11] powerpc/tracing: fix compat syscall handling
Marcin Nowakowskiwrites: > Adapt the code to make use of new syscall handling interface > > Signed-off-by: Marcin Nowakowski > Cc: Steven Rostedt > Cc: Ingo Molnar > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Michael Ellerman > Cc: linuxppc-dev@lists.ozlabs.org > --- > arch/powerpc/include/asm/ftrace.h | 11 +++ > arch/powerpc/kernel/ftrace.c | 4 I went to test this and noticed the exit and enter events appear to be reversed in time? (your series on top of 24532f768121) ls-4221 [003] 83.766113: compat_sys_rt_sigprocmask -> 0x2 ls-4221 [003] 83.766137: compat_sys_rt_sigprocmask(how: 2, nset: 1010db30, oset: 0, sigsetsize: 8) ls-4221 [003] 83.766175: compat_sys_rt_sigaction -> 0x14 ls-4221 [003] 83.766175: compat_sys_rt_sigaction(sig: 14, act: ffbd33c4, oact: ffbd3338, sigsetsize: 8) ls-4221 [003] 83.766177: compat_sys_rt_sigaction -> 0x15 ls-4221 [003] 83.766177: compat_sys_rt_sigaction(sig: 15, act: ffbd33c4, oact: ffbd3338, sigsetsize: 8) ls-4221 [003] 83.766178: compat_sys_rt_sigaction -> 0x16 ls-4221 [003] 83.766178: compat_sys_rt_sigaction(sig: 16, act: ffbd33d4, oact: ffbd3348, sigsetsize: 8) ls-4221 [003] 83.766179: sys_setpgid -> 0x107d ls-4221 [003] 83.766179: sys_setpgid(pid: 107d, pgid: 107d) ls-4221 [003] 83.766180: compat_sys_rt_sigprocmask -> 0x0 ls-4221 [003] 83.766181: compat_sys_rt_sigprocmask(how: 0, nset: ffbd34b0, oset: ffbd3530, sigsetsize: 8) ls-4221 [003] 83.766186: compat_sys_ioctl -> 0xff ls-4221 [003] 83.766187: compat_sys_ioctl(fd: ff, cmd: 80047476, arg32: ffbd3488) ls-4221 [003] 83.766188: compat_sys_rt_sigprocmask -> 0x2 ls-4221 [003] 83.766189: compat_sys_rt_sigprocmask(how: 2, nset: ffbd3530, oset: 0, sigsetsize: 8) ls-4221 [003] 83.766189: sys_close -> 0x4 ls-4221 [003] 83.766190: sys_close(fd: 4) ls-4221 [003] 83.766191: sys_read -> 0x3 ls-4221 [003] 83.766191: sys_read(fd: 3, buf: ffbd35dc, count: 1) ls-4221 [003] 83.766235: sys_close -> 0x3 ls-4221 [003] 83.766235: sys_close(fd: 3) cheers
Re: [PATCH v3 03/11] tracing/syscalls: add compat syscall metadata
Marcin Nowakowskiwrites: > Now that compat syscalls are properly distinguished from native calls, > we can add metadata for compat syscalls as well. > All the macros used to generate the metadata are the same as for > standard syscalls, but with a compat_ prefix to distinguish them easily. > > Signed-off-by: Marcin Nowakowski > Cc: Steven Rostedt > Cc: Ingo Molnar > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Michael Ellerman > Cc: linuxppc-dev@lists.ozlabs.org > --- > arch/powerpc/include/asm/ftrace.h | 15 +--- > include/linux/compat.h| 74 > +++ > kernel/trace/trace_syscalls.c | 8 +++-- > 3 files changed, 90 insertions(+), 7 deletions(-) > > diff --git a/arch/powerpc/include/asm/ftrace.h > b/arch/powerpc/include/asm/ftrace.h > index 686c5f7..9697a73 100644 > --- a/arch/powerpc/include/asm/ftrace.h > +++ b/arch/powerpc/include/asm/ftrace.h > @@ -73,12 +73,17 @@ struct dyn_arch_ftrace { > static inline bool arch_syscall_match_sym_name(const char *sym, const char > *name) > { > /* > - * Compare the symbol name with the system call name. Skip the .sys or > .SyS > - * prefix from the symbol name and the sys prefix from the system call > name and > - * just match the rest. This is only needed on ppc64 since symbol names > on > - * 32bit do not start with a period so the generic function will work. > + * Compare the symbol name with the system call name. Skip the .sys, > + * .SyS or .compat_sys prefix from the symbol name and the sys prefix > + * from the system call name and just match the rest. This is only > + * needed on ppc64 since symbol names on 32bit do not start with a > + * period so the generic function will work. >*/ > - return !strcmp(sym + 4, name + 3); > + int prefix_len = 3; > + > + if (!strncasecmp(name, "compat_", 7)) > + prefix_len = 10; > + return !strcmp(sym + prefix_len + 1, name + prefix_len); > } It's annoying that we have to duplicate all that just to do a + 1. How about this as a precursor? cheers diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index dd5f916b351d..bd65f2adeb09 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -226,10 +226,6 @@ You need very few things to get the syscalls tracing in an arch. - If the system call table on this arch is more complicated than a simple array of addresses of the system calls, implement an arch_syscall_addr to return the address of a given system call. -- If the symbol names of the system calls do not match the function names on - this arch, define ARCH_HAS_SYSCALL_MATCH_SYM_NAME in asm/ftrace.h and - implement arch_syscall_match_sym_name with the appropriate logic to return - true if the function name corresponds with the symbol name. - Tag this arch as HAVE_SYSCALL_TRACEPOINTS. diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 686c5f70eb84..dc48f5b2878d 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -60,6 +60,12 @@ struct dyn_arch_ftrace { struct module *mod; }; #endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef PPC64_ELF_ABI_v1 +/* On ppc64 ABIv1 (BE) we have to skip the leading '.' in the symbol name */ +#define ARCH_SYM_NAME_SKIP_CHARS 1 +#endif + #endif /* __ASSEMBLY__ */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS @@ -67,20 +73,4 @@ struct dyn_arch_ftrace { #endif #endif -#if defined(CONFIG_FTRACE_SYSCALLS) && !defined(__ASSEMBLY__) -#ifdef PPC64_ELF_ABI_v1 -#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME -static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) -{ - /* -* Compare the symbol name with the system call name. Skip the .sys or .SyS -* prefix from the symbol name and the sys prefix from the system call name and -* just match the rest. This is only needed on ppc64 since symbol names on -* 32bit do not start with a period so the generic function will work. -*/ - return !strcmp(sym + 4, name + 3); -} -#endif -#endif /* CONFIG_FTRACE_SYSCALLS && !__ASSEMBLY__ */ - #endif /* _ASM_POWERPC_FTRACE */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index b2b6efc083a4..91a7315dbe43 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -31,8 +31,11 @@ extern struct syscall_metadata *__stop_syscalls_metadata[]; static struct syscall_metadata **syscalls_metadata; -#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME -static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) +#ifndef ARCH_SYM_NAME_SKIP_CHARS +#define ARCH_SYM_NAME_SKIP_CHARS 0 +#endif + +static
Re: [PATCH] powerpc/64: option to force run-at-load to test relocation
On 12/10/16 17:57, Nicholas Piggin wrote: > This adds a config option that can help exercise the case when > the kernel is not running at PAGE_OFFSET. > > Signed-off-by: Nicholas Piggin> --- > arch/powerpc/Kconfig | 9 + > arch/powerpc/kernel/head_64.S | 4 > arch/powerpc/kernel/setup-common.c | 3 +++ > 3 files changed, 16 insertions(+) > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 65fba4c..5d43cb8 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -478,6 +478,15 @@ config RELOCATABLE > setting can still be useful to bootwrappers that need to know the > load address of the kernel (eg. u-boot/mkimage). > > +config RELOCATABLE_TEST > + bool "Test relocatable kernel" > + depends on (PPC64 && RELOCATABLE) > + default n > + help > + This runs the relocatable kernel at the address it was initially > + loaded at, which tends to be non-zero and therefore test the > + relocation code. > + > config CRASH_DUMP > bool "Build a kdump crash kernel" > depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) > diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S > index 79da0641..bc9ceac 100644 > --- a/arch/powerpc/kernel/head_64.S > +++ b/arch/powerpc/kernel/head_64.S > @@ -111,8 +111,12 @@ __secondary_hold_acknowledge: > .globl __run_at_load > __run_at_load: > DEFINE_FIXED_SYMBOL(__run_at_load) > +#ifdef CONFIG_RELOCATABLE_TEST > + .long 0x1 /* Test relocation, do not relocate to 0 */ > +#else > .long 0x72756e30 /* "run0" -- relocate to 0 by default */ > #endif > +#endif Could we do something like config RELOCATION_VALUE default 0x72756e30 default 1 if CONFIG_RELOCTABLE_TEST and then get .long CONFIG_RELOCATION_VALUE > > . = 0x60 > /* > diff --git a/arch/powerpc/kernel/setup-common.c > b/arch/powerpc/kernel/setup-common.c > index dba265c..18e0f19 100644 > --- a/arch/powerpc/kernel/setup-common.c > +++ b/arch/powerpc/kernel/setup-common.c > @@ -795,6 +795,9 @@ static __init void print_system_info(void) > pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); > #ifdef CONFIG_PPC64 > pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); > + > + if (get_paca()->kernelbase != PAGE_OFFSET) > + pr_info("kernelbase= 0x%llx\n", get_paca()->kernelbase); > #endif > Do we need this? We get physical_offset if we are relocated. > #ifdef CONFIG_PPC_STD_MMU_64 > Balbir Singh.
[PATCH] powerpc: link error on orphan sections
Add --orphan-handling=error to final link flags. This ensures we have to handle all sections. This would have caught subtle breakage such as 7de3b27bac47da9de08409df1d69664acbb72197 at build-time. Also bring some wayward sections into the fold: - .text.hot and .text.unlikely are compiler generated sections. - .sfpr is a linker generated section for register save functions. - .sdata2, .dynsbss, .plt are used by PPC32 - We previously did not specify DWARF_DEBUG or STABS_DEBUG - DWARF_DEBUG did not include DWARF3 .debug_ranges - A number of sections are unused. I don't know if I've exactly got everything right here, particularly with ppc32, so would appreciate people casting their eye over it. Signed-off-by: Nicholas Piggin--- arch/powerpc/Makefile | 2 +- arch/powerpc/kernel/vmlinux.lds.S | 16 ++-- include/asm-generic/vmlinux.lds.h | 3 +++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 50d020a..a3f2784 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -90,7 +90,7 @@ endif LDFLAGS_vmlinux-y := -Bstatic LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie -LDFLAGS_vmlinux:= $(LDFLAGS_vmlinux-y) +LDFLAGS_vmlinux:= $(LDFLAGS_vmlinux-y) --orphan-handling=error ifeq ($(CONFIG_PPC64),y) ifeq ($(call cc-option-yn,-mcmodel=medium),y) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8295f51..9f4d85e 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -97,7 +97,7 @@ SECTIONS .text : AT(ADDR(.text) - LOAD_OFFSET) { ALIGN_FUNCTION(); /* careful! __ftr_alt_* sections need to be close to .text */ - *(.text .fixup __ftr_alt_* .ref.text) + *(.text.hot .text .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text .sfpr) \ SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT @@ -256,7 +256,9 @@ SECTIONS .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA *(.sdata) + *(.sdata2) *(.got.plt) *(.got) + *(.plt) } #else .data : AT(ADDR(.data) - LOAD_OFFSET) { @@ -317,6 +319,16 @@ SECTIONS _end = . ; PROVIDE32 (end = .); - /* Sections to be discarded. */ + STABS_DEBUG + + DWARF_DEBUG + DISCARDS + /DISCARD/ : { + *(*.EMB.apuinfo) + *(.glink .iplt .plt .rela* .comment) + *(.gnu.version*) + *(.gnu.attributes) + *(.eh_frame) + } } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 3e42bcd..264ebb3 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -581,6 +581,7 @@ #define SBSS(sbss_align) \ . = ALIGN(sbss_align); \ .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) { \ + *(.dynsbss) \ *(.sbss)\ *(.scommon) \ } @@ -627,6 +628,8 @@ .debug_str 0 : { *(.debug_str) } \ .debug_loc 0 : { *(.debug_loc) } \ .debug_macinfo 0 : { *(.debug_macinfo) } \ + /* DWARF 3 */ \ + .debug_ranges 0 : { *(.debug_ranges) }\ /* SGI/MIPS DWARF 2 extensions */ \ .debug_weaknames 0 : { *(.debug_weaknames) }\ .debug_funcnames 0 : { *(.debug_funcnames) }\ -- 2.9.3
[PATCH] powerpc/64: option to force run-at-load to test relocation
This adds a config option that can help exercise the case when the kernel is not running at PAGE_OFFSET. Signed-off-by: Nicholas Piggin--- arch/powerpc/Kconfig | 9 + arch/powerpc/kernel/head_64.S | 4 arch/powerpc/kernel/setup-common.c | 3 +++ 3 files changed, 16 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 65fba4c..5d43cb8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -478,6 +478,15 @@ config RELOCATABLE setting can still be useful to bootwrappers that need to know the load address of the kernel (eg. u-boot/mkimage). +config RELOCATABLE_TEST + bool "Test relocatable kernel" + depends on (PPC64 && RELOCATABLE) + default n + help + This runs the relocatable kernel at the address it was initially + loaded at, which tends to be non-zero and therefore test the + relocation code. + config CRASH_DUMP bool "Build a kdump crash kernel" depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 79da0641..bc9ceac 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -111,8 +111,12 @@ __secondary_hold_acknowledge: .globl __run_at_load __run_at_load: DEFINE_FIXED_SYMBOL(__run_at_load) +#ifdef CONFIG_RELOCATABLE_TEST + .long 0x1 /* Test relocation, do not relocate to 0 */ +#else .long 0x72756e30 /* "run0" -- relocate to 0 by default */ #endif +#endif . = 0x60 /* diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index dba265c..18e0f19 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -795,6 +795,9 @@ static __init void print_system_info(void) pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); #ifdef CONFIG_PPC64 pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); + + if (get_paca()->kernelbase != PAGE_OFFSET) + pr_info("kernelbase= 0x%llx\n", get_paca()->kernelbase); #endif #ifdef CONFIG_PPC_STD_MMU_64 -- 2.9.3