date:20161012

[PATCH 3/3] powerpc: build-time sort exception table

2016-10-12 Thread Nicholas Piggin

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/Kconfig  | 1 +
 arch/powerpc/include/asm/module.h | 4 
 scripts/sortextable.c | 2 ++
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5d43cb8..b49062b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -80,6 +80,7 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK
 config PPC
bool
default y
+   select BUILDTIME_EXTABLE_SORT
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
select BINFMT_ELF
diff --git a/arch/powerpc/include/asm/module.h 
b/arch/powerpc/include/asm/module.h
index cd4ffd8..cc12c61 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -90,10 +90,6 @@ static inline int module_finalize_ftrace(struct module *mod, 
const Elf_Shdr *sec
 }
 #endif
 
-struct exception_table_entry;
-void sort_ex_table(struct exception_table_entry *start,
-  struct exception_table_entry *finish);
-
 #if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64)
 #define ARCH_RELOCATES_KCRCTAB
 #define reloc_start PHYSICAL_START
diff --git a/scripts/sortextable.c b/scripts/sortextable.c
index f453b7c..365a907 100644
--- a/scripts/sortextable.c
+++ b/scripts/sortextable.c
@@ -316,6 +316,8 @@ do_file(char const *const fname)
case EM_S390:
case EM_AARCH64:
case EM_PARISC:
+   case EM_PPC:
+   case EM_PPC64:
custom_sort = sort_relative_table;
break;
case EM_ARCOMPACT:
-- 
2.9.3

[PATCH 2/3] powerpc: relative exception tables

2016-10-12 Thread Nicholas Piggin

This halves the exception table size on 64-bit builds, and it
allows build-time sorting of exception tables to work on
relocated kernels.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/linkage.h| 20 -
 arch/powerpc/include/asm/uaccess.h| 27 ++-
 arch/powerpc/kernel/kprobes.c |  2 +-
 arch/powerpc/kernel/traps.c   |  2 +-
 arch/powerpc/mm/fault.c   |  2 +-
 arch/powerpc/platforms/embedded6xx/holly.c|  2 +-
 arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c |  2 +-
 arch/powerpc/sysdev/fsl_rio.c |  2 +-
 8 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/linkage.h 
b/arch/powerpc/include/asm/linkage.h
index fcb9e0d..6898bf5 100644
--- a/arch/powerpc/include/asm/linkage.h
+++ b/arch/powerpc/include/asm/linkage.h
@@ -16,20 +16,20 @@
 /*
  * Helper macro for exception table entries
  */
-#define EX_TABLE(_fault, _target)  \
-   ".section __ex_table,\"a\"\n"   \
-   PPC_LONG_ALIGN "\n" \
-   PPC_LONG #_fault "\n"   \
-   PPC_LONG #_target "\n"  \
+#define EX_TABLE(_fault, _target)  \
+   ".section __ex_table,\"a\"\n"   \
+   ".balign 4\n"   \
+   ".long (" #_fault  ") - . \n"   \
+   ".long (" #_target ") - . \n"   \
".previous\n"
 
 #else /* __ASSEMBLY__ */
 
-#define EX_TABLE(_fault, _target)  \
-   .section __ex_table,"a" ;   \
-   PPC_LONG_ALIGN ;\
-   PPC_LONG _fault ;   \
-   PPC_LONG _target ;  \
+#define EX_TABLE(_fault, _target)  \
+   .section __ex_table,"a" ;   \
+   .balign 4;  \
+   .long (_fault) - . ;\
+   .long (_target) - . ;   \
.previous
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/uaccess.h 
b/arch/powerpc/include/asm/uaccess.h
index caff75e..f485a01 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -63,23 +63,30 @@
 __access_ok((__force unsigned long)(addr), (size), get_fs()))
 
 /*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
+ * The exception table consists of pairs of relative addresses: the first is
+ * the address of an instruction that is allowed to fault, and the second is
  * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
+ * modified, so it is entirely up to the continuation code to figure out what
+ * to do.
  *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path.  This means when everything is well,
- * we don't even have to jump over them.  Further, they do not intrude
- * on our cache or tlb entries.
+ * All the routines below use bits of fixup code that are out of line with the
+ * main instruction path.  This means when everything is well, we don't even
+ * have to jump over them.  Further, they do not intrude on our cache or tlb
+ * entries.
  */
 
+#define ARCH_HAS_RELATIVE_EXTABLE
+
 struct exception_table_entry {
-   unsigned long insn;
-   unsigned long fixup;
+   int insn;
+   int fixup;
 };
 
+static inline unsigned long extable_fixup(const struct exception_table_entry 
*x)
+{
+   return (unsigned long)>fixup + x->fixup;
+}
+
 /*
  * These are the main single-value transfer routines.  They automatically
  * use the right size if we just have the right pointer type.
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index e785cc9..9479d8e 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -449,7 +449,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, 
int trapnr)
 * zero, try to fix up.
 */
if ((entry = search_exception_tables(regs->nip)) != NULL) {
-   regs->nip = entry->fixup;
+   regs->nip = extable_fixup(entry);
return 1;
}
 
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index a1f8f56..ec5fd09 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -366,7 +366,7 @@ static inline int check_io_access(struct pt_regs *regs)
   (*nip & 0x100)? "OUT to": "IN from",
   regs->gpr[rb] - _IO_BASE, nip);
regs->msr |= MSR_RI;
-   regs->nip = entry->fixup;
+

[PATCH 1/3] powerpc: EX_TABLE macro for exception tables

2016-10-12 Thread Nicholas Piggin

This macro is taken from s390, and allows more flexibility in
changing exception table format.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/futex.h  |  14 +-
 arch/powerpc/include/asm/io.h |  18 +-
 arch/powerpc/include/asm/linkage.h|  22 +++
 arch/powerpc/include/asm/uaccess.h|  24 +--
 arch/powerpc/include/asm/word-at-a-time.h |   5 +-
 arch/powerpc/lib/checksum_32.S|  47 +++---
 arch/powerpc/lib/checksum_64.S|  20 +--
 arch/powerpc/lib/copy_32.S|  56 +++---
 arch/powerpc/lib/copyuser_64.S| 271 +++---
 arch/powerpc/lib/copyuser_power7.S|  21 +--
 arch/powerpc/lib/ldstfp.S |  25 ++-
 arch/powerpc/lib/sstep.c  |  15 +-
 arch/powerpc/lib/string.S |  11 +-
 arch/powerpc/lib/string_64.S  |  16 +-
 arch/powerpc/sysdev/fsl_rio.c |   5 +-
 arch/powerpc/sysdev/tsi108_pci.c  |   5 +-
 16 files changed, 248 insertions(+), 327 deletions(-)

diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 2a9cf84..eaada6c 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -23,10 +23,8 @@
 "4:li  %1,%3\n" \
"b  3b\n" \
".previous\n" \
-   ".section __ex_table,\"a\"\n" \
-   ".align 3\n" \
-   PPC_LONG "1b,4b,2b,4b\n" \
-   ".previous" \
+   EX_TABLE(1b, 4b) \
+   EX_TABLE(2b, 4b) \
: "=" (oldval), "=" (ret) \
: "b" (uaddr), "i" (-EFAULT), "r" (oparg) \
: "cr0", "memory")
@@ -104,11 +102,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 "3:.section .fixup,\"ax\"\n\
 4: li  %0,%6\n\
b   3b\n\
-   .previous\n\
-   .section __ex_table,\"a\"\n\
-   .align 3\n\
-   " PPC_LONG "1b,4b,2b,4b\n\
-   .previous" \
+   .previous\n"
+   EX_TABLE(1b, 4b)
+   EX_TABLE(2b, 4b)
 : "+r" (ret), "=" (prev), "+m" (*uaddr)
 : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)
 : "cc", "memory");
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index f6fda84..5219a19 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -458,13 +458,10 @@ static inline unsigned int name(unsigned int port)
\
"5: li  %0,-1\n"\
"   b   4b\n"   \
".previous\n"   \
-   ".section __ex_table,\"a\"\n"   \
-   "   .align  2\n"\
-   "   .long   0b,5b\n"\
-   "   .long   1b,5b\n"\
-   "   .long   2b,5b\n"\
-   "   .long   3b,5b\n"\
-   ".previous" \
+   EX_TABLE(0b, 5b)\
+   EX_TABLE(1b, 5b)\
+   EX_TABLE(2b, 5b)\
+   EX_TABLE(3b, 5b)\
: "=" (x) \
: "r" (port + _IO_BASE) \
: "memory");\
@@ -479,11 +476,8 @@ static inline void name(unsigned int val, unsigned int 
port) \
"0:" op " %0,0,%1\n"\
"1: sync\n" \
"2:\n"  \
-   ".section __ex_table,\"a\"\n"   \
-   "   .align  2\n"\
-   "   .long   0b,2b\n"\
-   "   .long   1b,2b\n"\
-   ".previous" \
+   EX_TABLE(0b, 2b)\
+   EX_TABLE(1b, 2b)\
: : "r" (val), "r" (port + _IO_BASE)\
: "memory");\
 }
diff --git a/arch/powerpc/include/asm/linkage.h 
b/arch/powerpc/include/asm/linkage.h
index 0cf5e21..fcb9e0d 100644
--- a/arch/powerpc/include/asm/linkage.h
+++ b/arch/powerpc/include/asm/linkage.h
@@ -12,4 +12,26 @@
 "\t.globl ." #alias "\n\t.set ." #alias ", ." #name)
 #endif
 
+#ifndef __ASSEMBLY__
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target)  \
+   ".section __ex_table,\"a\"\n"   \
+   PPC_LONG_ALIGN "\n" \
+   PPC_LONG #_fault "\n"   \
+   PPC_LONG #_target "\n"  \
+   ".previous\n"
+
+#else /* __ASSEMBLY__ */
+
+#define EX_TABLE(_fault, _target)  \
+   .section __ex_table,"a" ;   \
+   PPC_LONG_ALIGN ;\
+   PPC_LONG _fault ;   \
+

[PATCH 0/3][RFC] powerpc: relative exception tables, and build-time sort

2016-10-12 Thread Nicholas Piggin

This implements relative exception tables for powerpc, and converts it
to use build-time sorting. I've tested 64s only so far, but 32-bit seems
to build. Will obviously require some more testing and reviews.

Thanks,
Nick

Nicholas Piggin (3):
  powerpc: EX_TABLE macro for exception tables
  powerpc: relative exception tables
  powerpc: build-time sort exception table

 arch/powerpc/Kconfig  |   1 +
 arch/powerpc/include/asm/futex.h  |  14 +-
 arch/powerpc/include/asm/io.h |  18 +-
 arch/powerpc/include/asm/linkage.h|  22 ++
 arch/powerpc/include/asm/module.h |   4 -
 arch/powerpc/include/asm/uaccess.h|  51 ++--
 arch/powerpc/include/asm/word-at-a-time.h |   5 +-
 arch/powerpc/kernel/kprobes.c |   2 +-
 arch/powerpc/kernel/traps.c   |   2 +-
 arch/powerpc/lib/checksum_32.S|  47 ++--
 arch/powerpc/lib/checksum_64.S|  20 +-
 arch/powerpc/lib/copy_32.S|  56 ++---
 arch/powerpc/lib/copyuser_64.S| 271 +++---
 arch/powerpc/lib/copyuser_power7.S|  21 +-
 arch/powerpc/lib/ldstfp.S |  25 +-
 arch/powerpc/lib/sstep.c  |  15 +-
 arch/powerpc/lib/string.S |  11 +-
 arch/powerpc/lib/string_64.S  |  16 +-
 arch/powerpc/mm/fault.c   |   2 +-
 arch/powerpc/platforms/embedded6xx/holly.c|   2 +-
 arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c |   2 +-
 arch/powerpc/sysdev/fsl_rio.c |   7 +-
 arch/powerpc/sysdev/tsi108_pci.c  |   5 +-
 scripts/sortextable.c |   2 +
 24 files changed, 274 insertions(+), 347 deletions(-)

-- 
2.9.3

[PATCH 0/3][RFC] powerpc: relative exception tables, and build-time sort

2016-10-12 Thread Nicholas Piggin

This implements relative exception tables for powerpc, and converts it
to use build-time sorting. I've tested 64s only so far, but 32-bit seems
to build. Will obviously require some more testing and reviews.

Thanks,
Nick

Nicholas Piggin (3):
  powerpc: EX_TABLE macro for exception tables
  powerpc: relative exception tables
  powerpc: build-time sort exception table

 arch/powerpc/Kconfig  |   1 +
 arch/powerpc/include/asm/futex.h  |  14 +-
 arch/powerpc/include/asm/io.h |  18 +-
 arch/powerpc/include/asm/linkage.h|  22 ++
 arch/powerpc/include/asm/module.h |   4 -
 arch/powerpc/include/asm/uaccess.h|  51 ++--
 arch/powerpc/include/asm/word-at-a-time.h |   5 +-
 arch/powerpc/kernel/kprobes.c |   2 +-
 arch/powerpc/kernel/traps.c   |   2 +-
 arch/powerpc/lib/checksum_32.S|  47 ++--
 arch/powerpc/lib/checksum_64.S|  20 +-
 arch/powerpc/lib/copy_32.S|  56 ++---
 arch/powerpc/lib/copyuser_64.S| 271 +++---
 arch/powerpc/lib/copyuser_power7.S|  21 +-
 arch/powerpc/lib/ldstfp.S |  25 +-
 arch/powerpc/lib/sstep.c  |  15 +-
 arch/powerpc/lib/string.S |  11 +-
 arch/powerpc/lib/string_64.S  |  16 +-
 arch/powerpc/mm/fault.c   |   2 +-
 arch/powerpc/platforms/embedded6xx/holly.c|   2 +-
 arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c |   2 +-
 arch/powerpc/sysdev/fsl_rio.c |   7 +-
 arch/powerpc/sysdev/tsi108_pci.c  |   5 +-
 scripts/sortextable.c |   2 +
 24 files changed, 274 insertions(+), 347 deletions(-)

-- 
2.9.3

[PATCH 0/3][RFC] powerpc: relative exception tables, and build-time sort

2016-10-12 Thread Nicholas Piggin

This implements relative exception tables for powerpc, and converts it
to use build-time sorting. I've tested 64s only so far, but 32-bit seems
to build. Will obviously require some more testing and reviews.

Thanks,
Nick

Nicholas Piggin (3):
  powerpc: EX_TABLE macro for exception tables
  powerpc: relative exception tables
  powerpc: build-time sort exception table

 arch/powerpc/Kconfig  |   1 +
 arch/powerpc/include/asm/futex.h  |  14 +-
 arch/powerpc/include/asm/io.h |  18 +-
 arch/powerpc/include/asm/linkage.h|  22 ++
 arch/powerpc/include/asm/module.h |   4 -
 arch/powerpc/include/asm/uaccess.h|  51 ++--
 arch/powerpc/include/asm/word-at-a-time.h |   5 +-
 arch/powerpc/kernel/kprobes.c |   2 +-
 arch/powerpc/kernel/traps.c   |   2 +-
 arch/powerpc/lib/checksum_32.S|  47 ++--
 arch/powerpc/lib/checksum_64.S|  20 +-
 arch/powerpc/lib/copy_32.S|  56 ++---
 arch/powerpc/lib/copyuser_64.S| 271 +++---
 arch/powerpc/lib/copyuser_power7.S|  21 +-
 arch/powerpc/lib/ldstfp.S |  25 +-
 arch/powerpc/lib/sstep.c  |  15 +-
 arch/powerpc/lib/string.S |  11 +-
 arch/powerpc/lib/string_64.S  |  16 +-
 arch/powerpc/mm/fault.c   |   2 +-
 arch/powerpc/platforms/embedded6xx/holly.c|   2 +-
 arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c |   2 +-
 arch/powerpc/sysdev/fsl_rio.c |   7 +-
 arch/powerpc/sysdev/tsi108_pci.c  |   5 +-
 scripts/sortextable.c |   2 +
 24 files changed, 274 insertions(+), 347 deletions(-)

-- 
2.9.3

[PATCH] powerpc/mm: Drop dump_numa_memory_topology()

2016-10-12 Thread Michael Ellerman

At boot we dump the NUMA memory topology in dump_numa_memory_topology(),
at KERN_DEBUG level, resulting in output like:

  Node 0 Memory: 0x0-0x1
  Node 1 Memory: 0x1-0x2

Which is nice enough, but immediately after that we iterate over each
node and call setup_node_data(), which also prints out the node ranges,
at KERN_INFO, giving eg:

  numa: Initmem setup node 0 [mem 0x-0x]
  numa: Initmem setup node 1 [mem 0x1-0x1]

So drop dump_numa_memory_topology() as superfluous chatter.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/mm/numa.c | 36 
 1 file changed, 36 deletions(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 75b9cd6150cc..db5fc2b54c5a 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -871,40 +871,6 @@ void __init dump_numa_cpu_topology(void)
}
 }
 
-static void __init dump_numa_memory_topology(void)
-{
-   unsigned int node;
-   unsigned int count;
-
-   if (min_common_depth == -1 || !numa_enabled)
-   return;
-
-   for_each_online_node(node) {
-   unsigned long i;
-
-   printk(KERN_DEBUG "Node %d Memory:", node);
-
-   count = 0;
-
-   for (i = 0; i < memblock_end_of_DRAM();
-i += (1 << SECTION_SIZE_BITS)) {
-   if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
-   if (count == 0)
-   printk(" 0x%lx", i);
-   ++count;
-   } else {
-   if (count > 0)
-   printk("-0x%lx", i);
-   count = 0;
-   }
-   }
-
-   if (count > 0)
-   printk("-0x%lx", i);
-   printk("\n");
-   }
-}
-
 /* Initialize NODE_DATA for a node on the local memory */
 static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 {
@@ -947,8 +913,6 @@ void __init initmem_init(void)
 
if (parse_numa_properties())
setup_nonnuma();
-   else
-   dump_numa_memory_topology();
 
memblock_dump_all();
 
-- 
2.7.4

Re: [PATCH kernel v2 2/2] powerpc/mm/iommu, vfio/spapr: Put pages on VFIO container shutdown

2016-10-12 Thread David Gibson

On Wed, Oct 12, 2016 at 03:58:28PM +1100, Alexey Kardashevskiy wrote:
> At the moment the userspace tool is expected to request pinning of
> the entire guest RAM when VFIO IOMMU SPAPR v2 driver is present.
> When the userspace process finishes, all the pinned pages need to
> be put; this is done as a part of the userspace memory context (MM)
> destruction which happens on the very last mmdrop().
> 
> This approach has a problem that a MM of the userspace process
> may live longer than the userspace process itself as kernel threads
> use userspace process MMs which was runnning on a CPU where
> the kernel thread was scheduled to. If this happened, the MM remains
> referenced until this exact kernel thread wakes up again
> and releases the very last reference to the MM, on an idle system this
> can take even hours.
> 
> This moves preregistered regions tracking from MM to VFIO; insteads of
> using mm_iommu_table_group_mem_t::used, tce_container::prereg_list is
> added so each container releases regions which it has pre-registered.
> 
> This changes the userspace interface to return EBUSY if a memory
> region is already registered in a container. However it should not
> have any practical effect as the only userspace tool available now
> does register memory region once per container anyway.
> 
> As tce_iommu_register_pages/tce_iommu_unregister_pages are called
> under container->lock, this does not need additional locking.
> 
> Signed-off-by: Alexey Kardashevskiy 
> Reviewed-by: Nicholas Piggin 
> ---
> Changes:
> v2:
> * updated commit log
> ---
>  arch/powerpc/include/asm/mmu_context.h |  1 -
>  arch/powerpc/mm/mmu_context_book3s64.c |  4 ---
>  arch/powerpc/mm/mmu_context_iommu.c| 11 
>  drivers/vfio/vfio_iommu_spapr_tce.c| 51 
> +-
>  4 files changed, 50 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mmu_context.h 
> b/arch/powerpc/include/asm/mmu_context.h
> index b9e3f0a..a6e18b5 100644
> --- a/arch/powerpc/include/asm/mmu_context.h
> +++ b/arch/powerpc/include/asm/mmu_context.h
> @@ -26,7 +26,6 @@ extern long mm_iommu_get(struct mm_struct *mm,
>  extern long mm_iommu_put(struct mm_struct *mm,
>   struct mm_iommu_table_group_mem_t *mem);
>  extern void mm_iommu_init(struct mm_struct *mm);
> -extern void mm_iommu_cleanup(struct mm_struct *mm);
>  extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct 
> *mm,
>   unsigned long ua, unsigned long size);
>  extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
> diff --git a/arch/powerpc/mm/mmu_context_book3s64.c 
> b/arch/powerpc/mm/mmu_context_book3s64.c
> index ad82735..1a07969 100644
> --- a/arch/powerpc/mm/mmu_context_book3s64.c
> +++ b/arch/powerpc/mm/mmu_context_book3s64.c
> @@ -159,10 +159,6 @@ static inline void destroy_pagetable_page(struct 
> mm_struct *mm)
>  
>  void destroy_context(struct mm_struct *mm)
>  {
> -#ifdef CONFIG_SPAPR_TCE_IOMMU
> - mm_iommu_cleanup(mm);
> -#endif
> -
>  #ifdef CONFIG_PPC_ICSWX
>   drop_cop(mm->context.acop, mm);
>   kfree(mm->context.cop_lockp);
> diff --git a/arch/powerpc/mm/mmu_context_iommu.c 
> b/arch/powerpc/mm/mmu_context_iommu.c
> index 4c6db09..104bad0 100644
> --- a/arch/powerpc/mm/mmu_context_iommu.c
> +++ b/arch/powerpc/mm/mmu_context_iommu.c
> @@ -365,14 +365,3 @@ void mm_iommu_init(struct mm_struct *mm)
>  {
>   INIT_LIST_HEAD_RCU(>context.iommu_group_mem_list);
>  }
> -
> -void mm_iommu_cleanup(struct mm_struct *mm)
> -{
> - struct mm_iommu_table_group_mem_t *mem, *tmp;
> -
> - list_for_each_entry_safe(mem, tmp, >context.iommu_group_mem_list,
> - next) {
> - list_del_rcu(>next);
> - mm_iommu_do_free(mem);
> - }
> -}
> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
> b/drivers/vfio/vfio_iommu_spapr_tce.c
> index 3d2a65c..c8e9796 100644
> --- a/drivers/vfio/vfio_iommu_spapr_tce.c
> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
> @@ -89,6 +89,15 @@ struct tce_iommu_group {
>  };
>  
>  /*
> + * A container needs to remember which preregistered region  it has
> + * referenced to do proper cleanup at the userspace process exit.
> + */
> +struct tce_iommu_prereg {
> + struct list_head next;
> + struct mm_iommu_table_group_mem_t *mem;
> +};
> +
> +/*
>   * The container descriptor supports only a single group per container.
>   * Required by the API as the container is not supplied with the IOMMU group
>   * at the moment of initialization.
> @@ -101,12 +110,26 @@ struct tce_container {
>   struct mm_struct *mm;
>   struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
>   struct list_head group_list;
> + struct list_head prereg_list;
>  };
>  
> +static long tce_iommu_prereg_free(struct tce_container *container,
> + struct tce_iommu_prereg *tcemem)
> +{
> + long ret;
> +
> + list_del(>next);
> + ret =

Re: [PATCH kernel v2 1/2] powerpc/iommu: Stop using @current in mm_iommu_xxx

2016-10-12 Thread David Gibson

On Wed, Oct 12, 2016 at 03:58:27PM +1100, Alexey Kardashevskiy wrote:
> In some situations the userspace memory context may live longer than
> the userspace process itself so if we need to do proper memory context
> cleanup, we better cache @mm and use it later when the process is gone
> (@current or @current->mm are NULL).
> 
> This changes mm_iommu_xxx API to receive mm_struct instead of using one
> from @current.
> 
> This references and caches MM once per container so we do not depend
> on @current pointing to a valid task descriptor anymore.
> 
> This is needed by the following patch to do proper cleanup in time.
> This depends on "powerpc/powernv/ioda: Fix endianness when reading TCEs"
> to do proper cleanup via tce_iommu_clear() patch.
> 
> To keep API consistent, this replaces mm_context_t with mm_struct;
> we stick to mm_struct as mm_iommu_adjust_locked_vm() helper needs
> access to >mmap_sem.
> 
> This should cause no behavioral change.
> 
> Signed-off-by: Alexey Kardashevskiy 
> Reviewed-by: Nicholas Piggin 
> Acked-by: Balbir Singh 
> ---
> Changes:
> v2:
> * added BUG_ON(container->mm && (container->mm != current->mm)) in
> tce_iommu_register_pages()
> * added note about containers referencing MM
> ---
>  arch/powerpc/include/asm/mmu_context.h | 20 +++--
>  arch/powerpc/kernel/setup-common.c |  2 +-
>  arch/powerpc/mm/mmu_context_book3s64.c |  4 +--
>  arch/powerpc/mm/mmu_context_iommu.c| 55 
> ++
>  drivers/vfio/vfio_iommu_spapr_tce.c| 41 -
>  5 files changed, 63 insertions(+), 59 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mmu_context.h 
> b/arch/powerpc/include/asm/mmu_context.h
> index 5c45114..b9e3f0a 100644
> --- a/arch/powerpc/include/asm/mmu_context.h
> +++ b/arch/powerpc/include/asm/mmu_context.h
> @@ -19,16 +19,18 @@ extern void destroy_context(struct mm_struct *mm);
>  struct mm_iommu_table_group_mem_t;
>  
>  extern int isolate_lru_page(struct page *page);  /* from internal.h */
> -extern bool mm_iommu_preregistered(void);
> -extern long mm_iommu_get(unsigned long ua, unsigned long entries,
> +extern bool mm_iommu_preregistered(struct mm_struct *mm);
> +extern long mm_iommu_get(struct mm_struct *mm,
> + unsigned long ua, unsigned long entries,
>   struct mm_iommu_table_group_mem_t **pmem);
> -extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem);
> -extern void mm_iommu_init(mm_context_t *ctx);
> -extern void mm_iommu_cleanup(mm_context_t *ctx);
> -extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua,
> - unsigned long size);
> -extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua,
> - unsigned long entries);
> +extern long mm_iommu_put(struct mm_struct *mm,
> + struct mm_iommu_table_group_mem_t *mem);
> +extern void mm_iommu_init(struct mm_struct *mm);
> +extern void mm_iommu_cleanup(struct mm_struct *mm);
> +extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct 
> *mm,
> + unsigned long ua, unsigned long size);
> +extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
> + unsigned long ua, unsigned long entries);
>  extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
>   unsigned long ua, unsigned long *hpa);
>  extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
> diff --git a/arch/powerpc/kernel/setup-common.c 
> b/arch/powerpc/kernel/setup-common.c
> index dba265c..942cf49 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -906,7 +906,7 @@ void __init setup_arch(char **cmdline_p)
>   init_mm.context.pte_frag = NULL;
>  #endif
>  #ifdef CONFIG_SPAPR_TCE_IOMMU
> - mm_iommu_init(_mm.context);
> + mm_iommu_init(_mm);
>  #endif
>   irqstack_early_init();
>   exc_lvl_early_init();
> diff --git a/arch/powerpc/mm/mmu_context_book3s64.c 
> b/arch/powerpc/mm/mmu_context_book3s64.c
> index b114f8b..ad82735 100644
> --- a/arch/powerpc/mm/mmu_context_book3s64.c
> +++ b/arch/powerpc/mm/mmu_context_book3s64.c
> @@ -115,7 +115,7 @@ int init_new_context(struct task_struct *tsk, struct 
> mm_struct *mm)
>   mm->context.pte_frag = NULL;
>  #endif
>  #ifdef CONFIG_SPAPR_TCE_IOMMU
> - mm_iommu_init(>context);
> + mm_iommu_init(mm);
>  #endif
>   return 0;
>  }
> @@ -160,7 +160,7 @@ static inline void destroy_pagetable_page(struct 
> mm_struct *mm)
>  void destroy_context(struct mm_struct *mm)
>  {
>  #ifdef CONFIG_SPAPR_TCE_IOMMU
> - mm_iommu_cleanup(>context);
> + mm_iommu_cleanup(mm);
>  #endif
>  
>  #ifdef CONFIG_PPC_ICSWX
> diff --git a/arch/powerpc/mm/mmu_context_iommu.c 
> b/arch/powerpc/mm/mmu_context_iommu.c
> index e0f1c33..4c6db09 100644
> --- a/arch/powerpc/mm/mmu_context_iommu.c
> +++

[PATCH] powerpc/64s: reduce exception alignment

2016-10-12 Thread Nicholas Piggin

Exception handlers are aligned to 128 bytes (L1 cache) on 64s, which is
overkill. It can reduce the icache footprint of any individual exception
path. However taken as a whole, the expansion in icache footprint seems
likely to be counter-productive and cause more total misses.

Create IFETCH_ALIGN_SHIFT/BYTES, which should give optimal ifetch
alignment with much more reasonable alignment. This saves 1792 bytes
from head_64.o text with an allmodconfig build.

Other subarchitectures should define appropriate IFETCH_ALIGN_SHIFT
values if this becomes more widely used.

Cc: Anton Blanchard 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/cache.h | 3 +++
 arch/powerpc/include/asm/head-64.h   | 8 
 arch/powerpc/kernel/exceptions-64s.S | 2 +-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index ffbafbf..7657aa8 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -20,12 +20,15 @@
 #endif
 #else /* CONFIG_PPC64 */
 #define L1_CACHE_SHIFT 7
+#define IFETCH_ALIGN_SHIFT 4 /* POWER8,9 */
 #endif
 
 #defineL1_CACHE_BYTES  (1 << L1_CACHE_SHIFT)
 
 #defineSMP_CACHE_BYTES L1_CACHE_BYTES
 
+#define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT)
+
 #if defined(__powerpc64__) && !defined(__ASSEMBLY__)
 struct ppc64_caches {
u32 dsize;  /* L1 d-cache size */
diff --git a/arch/powerpc/include/asm/head-64.h 
b/arch/powerpc/include/asm/head-64.h
index ab90c2f..fca7033 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -95,12 +95,12 @@ end_##sname:
 
 #define __FIXED_SECTION_ENTRY_BEGIN(sname, name, __align)  \
USE_FIXED_SECTION(sname);   \
-   .align __align; \
+   .balign __align;\
.global name;   \
 name:
 
 #define FIXED_SECTION_ENTRY_BEGIN(sname, name) \
-   __FIXED_SECTION_ENTRY_BEGIN(sname, name, 0)
+   __FIXED_SECTION_ENTRY_BEGIN(sname, name, IFETCH_ALIGN_BYTES)
 
 #define FIXED_SECTION_ENTRY_BEGIN_LOCATION(sname, name, start) \
USE_FIXED_SECTION(sname);   \
@@ -203,9 +203,9 @@ end_##sname:
 #define EXC_VIRT_END(name, start, end) \
FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, 
exc_virt_##start##_##name, end)
 
-#define EXC_COMMON_BEGIN(name) \
+#define EXC_COMMON_BEGIN(name) \
USE_TEXT_SECTION(); \
-   .align  7;  \
+   .balign IFETCH_ALIGN_BYTES; \
.global name;   \
DEFINE_FIXED_SYMBOL(name);  \
 name:
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index e680e84..4af87e4 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1403,7 +1403,7 @@ USE_TEXT_SECTION()
 /*
  * Hash table stuff
  */
-   .align  7
+   .balign IFETCH_ALIGN_BYTES
 do_hash_page:
 #ifdef CONFIG_PPC_STD_MMU_64
andis.  r0,r4,0xa410/* weird error? */
-- 
2.9.3

[RFC][PATCH] kernel relocation for KVM exceptions

2016-10-12 Thread Nicholas Piggin

Hi Paul,

I wonder what you think about this approach for applying relocation to KVM
exceptions? It's not yet tested and I haven't attempted PR, but I'll keep
at it if you think it's the right direction.

The relocation branch requires ctr, but we can get away without more scratch
storage by putting trap and cr in one register.

On the other hand, that's going to make the calling convention diverge even
more for 32-bit, so perhaps it's being overly complex and you'd rather add
another scratch save for CONFIG_RELOCATABLE? Other ideas?

Thanks,
Nick


---
 arch/powerpc/include/asm/exception-64s.h | 63 ++--
 arch/powerpc/kernel/exceptions-64s.S |  4 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  | 18 +
 3 files changed, 65 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 84d49b1..466870f 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -97,6 +97,11 @@
ld  reg,PACAKBASE(r13); \
ori reg,reg,(ABS_ADDR(label))@l;
 
+#define __LOAD_FAR_HANDLER(reg, label) \
+   ld  reg,PACAKBASE(r13); \
+   ori reg,reg,(ABS_ADDR(label))@l;\
+   addis   reg,reg,(ABS_ADDR(label))@h;
+
 /* Exception register prefixes */
 #define EXC_HV H
 #define EXC_STD
@@ -218,12 +223,43 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtctr   reg;\
bctr
 
+/*
+ * KVM requires a far (>64K) branch, and to set the exit number in r12
+ * when branching from an exception
+ */
+#define BRANCH_TO_KVM_EXIT(reg, label) \
+   mfctr   reg;\
+   std reg,HSTATE_SCRATCH2(r13);   \
+   __LOAD_FAR_HANDLER(reg, label); \
+   mtctr   reg;\
+   bctr
+
+#define BRANCH_TO_KVM(reg, label)  \
+   __LOAD_FAR_HANDLER(reg, label); \
+   mtctr   reg;\
+   bctr
+
+#define BRANCH_LINK_TO_KVM(reg, label) \
+   __LOAD_FAR_HANDLER(reg, label); \
+   mtctr   reg;\
+   bctrl
+
 #else
 #define BRANCH_TO_COMMON(reg, label)   \
b   label
 
+#define BRANCH_TO_KVM(reg, label)  \
+   b   label
+
+#define BRANCH_TO_KVM_EXIT(reg, label) \
+   b   label
+
+#define BRANCH_LINK_TO_KVM(reg, label) \
+   b   label
+
 #endif
 
+
 #define __KVM_HANDLER_PROLOG(area, n)  \
BEGIN_FTR_SECTION_NESTED(947)   \
ld  r10,area+EX_CFAR(r13);  \
@@ -234,30 +270,35 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
std r10,HSTATE_PPR(r13);\
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948);\
ld  r10,area+EX_R10(r13);   \
-   stw r9,HSTATE_SCRATCH1(r13);\
-   ld  r9,area+EX_R9(r13); \
std r12,HSTATE_SCRATCH0(r13);   \
+   li  r12,(n);\
+   sldir12,r12,32; \
+   ori r12,r12,r9; \
+   ld  r9,area+EX_R9(r13); \
+   std r9,HSTATE_SCRATCH1(r13);\
 
 #define __KVM_HANDLER(area, h, n)  \
__KVM_HANDLER_PROLOG(area, n)   \
-   li  r12,n;  \
-   b   kvmppc_interrupt
+   BRANCH_TO_KVM_EXIT(r9, kvmppc_interrupt)
 
 #define __KVM_HANDLER_SKIP(area, h, n) \
cmpwi   r10,KVM_GUEST_MODE_SKIP;\
-   ld  r10,area+EX_R10(r13);   \
beq 89f;\
-   stw r9,HSTATE_SCRATCH1(r13);\
BEGIN_FTR_SECTION_NESTED(948)   \
-   ld  r9,area+EX_PPR(r13);\
-   std r9,HSTATE_PPR(r13);

Re: [PATCH v2] cxl: Prevent adapter reset if an active context exists

2016-10-12 Thread Andrew Donnellan


On 12/10/16 15:17, Vaibhav Jain wrote:

This patch prevents resetting the cxl adapter via sysfs in presence of
one or more active cxl_context on it. This protects against an
unrecoverable error caused by PSL owning a dirty cache line even after
reset and host tries to touch the same cache line. In case a force reset
of the card is required irrespective of any active contexts, the int
value -1 can be stored in the 'reset' sysfs attribute of the card.

The patch introduces a new atomic_t member named contexts_num inside
struct cxl that holds the number of active context attached to the card
, which is checked against '0' before proceeding with the reset. To
prevent against a race condition where a context is activated just after
reset check is performed, the contexts_num is atomically set to '-1'
after reset-check to indicate that no more contexts can be activated on
the card anymore.

Before activating a context we atomically test if contexts_num is
non-negative and if so, increment its value by one. In case the value of
contexts_num is negative then it indicates that the card is about to be
reset and context activation is error-ed out at that point.

Signed-off-by: Vaibhav Jain 


All the changes look good to me.

Reviewed-by: Andrew Donnellan 


diff --git a/Documentation/ABI/testing/sysfs-class-cxl 
b/Documentation/ABI/testing/sysfs-class-cxl
index 4ba0a2a..dae2b38 100644
--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -220,8 +220,11 @@ What:   /sys/class/cxl//reset
 Date:   October 2014
 Contact:linuxppc-dev@lists.ozlabs.org
 Description:write only
-Writing 1 will issue a PERST to card which may cause the card
-to reload the FPGA depending on load_image_on_perst.
+Writing 1 will issue a PERST to card provided there are no
+   contexts active on any one of the card AFUs. This may cause
+   the card to reload the FPGA depending on load_image_on_perst.
+   Writing -1 will do a force PERST irrespective of any active
+   contexts on the card AFUs.


Ugh, spaces vs tabs bites again :(

--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited

Re: [PATCH] powerpc/64: option to force run-at-load to test relocation

2016-10-12 Thread Nicholas Piggin

On Wed, 12 Oct 2016 18:35:21 +1100
Balbir Singh  wrote:

> On 12/10/16 17:57, Nicholas Piggin wrote:
> > This adds a config option that can help exercise the case when
> > the kernel is not running at PAGE_OFFSET.
> > 
> > Signed-off-by: Nicholas Piggin 
> > ---
> >  arch/powerpc/Kconfig   | 9 +
> >  arch/powerpc/kernel/head_64.S  | 4 
> >  arch/powerpc/kernel/setup-common.c | 3 +++
> >  3 files changed, 16 insertions(+)
> > 
> > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > index 65fba4c..5d43cb8 100644
> > --- a/arch/powerpc/Kconfig
> > +++ b/arch/powerpc/Kconfig
> > @@ -478,6 +478,15 @@ config RELOCATABLE
> >   setting can still be useful to bootwrappers that need to know the
> >   load address of the kernel (eg. u-boot/mkimage).
> >  
> > +config RELOCATABLE_TEST
> > +   bool "Test relocatable kernel"
> > +   depends on (PPC64 && RELOCATABLE)
> > +   default n
> > +   help
> > + This runs the relocatable kernel at the address it was initially
> > + loaded at, which tends to be non-zero and therefore test the
> > + relocation code.
> > +
> >  config CRASH_DUMP
> > bool "Build a kdump crash kernel"
> > depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP)
> > diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
> > index 79da0641..bc9ceac 100644
> > --- a/arch/powerpc/kernel/head_64.S
> > +++ b/arch/powerpc/kernel/head_64.S
> > @@ -111,8 +111,12 @@ __secondary_hold_acknowledge:
> > .globl  __run_at_load
> >  __run_at_load:
> >  DEFINE_FIXED_SYMBOL(__run_at_load)
> > +#ifdef CONFIG_RELOCATABLE_TEST
> > +   .long   0x1 /* Test relocation, do not relocate to 0 */
> > +#else
> > .long   0x72756e30  /* "run0" -- relocate to 0 by default */
> >  #endif
> > +#endif  
> 
> Could we do something like
> 
> config RELOCATION_VALUE
>   default 0x72756e30
>   default 1  if CONFIG_RELOCTABLE_TEST
> 
> and then get
> 
>   .long CONFIG_RELOCATION_VALUE

Normally I'm up for reducing ifdefs in S and c files, but in this case
I'm not sure. I like being able to see the two possible values in the
source. I don't really mind though. If you or Michael feel strongly, I'm
happy to change it.


> > . = 0x60
> >  /*
> > diff --git a/arch/powerpc/kernel/setup-common.c 
> > b/arch/powerpc/kernel/setup-common.c
> > index dba265c..18e0f19 100644
> > --- a/arch/powerpc/kernel/setup-common.c
> > +++ b/arch/powerpc/kernel/setup-common.c
> > @@ -795,6 +795,9 @@ static __init void print_system_info(void)
> > pr_info("mmu_features  = 0x%08x\n", cur_cpu_spec->mmu_features);
> >  #ifdef CONFIG_PPC64
> > pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
> > +
> > +   if (get_paca()->kernelbase != PAGE_OFFSET)
> > +   pr_info("kernelbase= 0x%llx\n", get_paca()->kernelbase);
> >  #endif
> >
> 
> Do we need this? We get physical_offset if we are relocated.

You're right, that hunk can go.

Thanks,
Nick

[PATCH] powerpc/64s: relocation, register save fixes for system reset interrupt

2016-10-12 Thread Nicholas Piggin

This patch does a couple of things. First of all, powernv immediately
explodes when running a relocated kernel, because the system reset
exception for handling sleeps does not do correct relocated branches.

Secondly, the sleep handling code trashes the condition and cfar
registers, which we would like to preserve for debugging purposes (for
non-sleep case exception).

This patch changes the exception to use the standard format that saves
registers before any tests or branches are made. It adds the test for
idle-wakeup as an "extra" to break out of the normal exception path.
Then it branches to a relocated idle handler that calls the various
idle handling functions.

After this patch, POWER8 CPU simulator now boots powernv kernel that is
running at non-zero.

Cc: Balbir Singh 
Cc: Shreyas B. Prabhu 
Cc: Gautham R. Shenoy 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 16 ++
 arch/powerpc/kernel/exceptions-64s.S | 50 ++--
 2 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 2e4e7d8..84d49b1 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -93,6 +93,10 @@
ld  reg,PACAKBASE(r13); /* get high part of  */   \
ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l;
 
+#define __LOAD_HANDLER(reg, label) \
+   ld  reg,PACAKBASE(r13); \
+   ori reg,reg,(ABS_ADDR(label))@l;
+
 /* Exception register prefixes */
 #define EXC_HV H
 #define EXC_STD
@@ -208,6 +212,18 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define kvmppc_interrupt kvmppc_interrupt_pr
 #endif
 
+#ifdef CONFIG_RELOCATABLE
+#define BRANCH_TO_COMMON(reg, label)   \
+   __LOAD_HANDLER(reg, label); \
+   mtctr   reg;\
+   bctr
+
+#else
+#define BRANCH_TO_COMMON(reg, label)   \
+   b   label
+
+#endif
+
 #define __KVM_HANDLER_PROLOG(area, n)  \
BEGIN_FTR_SECTION_NESTED(947)   \
ld  r10,area+EX_CFAR(r13);  \
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 08992f8..e680e84 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -95,19 +95,35 @@ __start_interrupts:
 /* No virt vectors corresponding with 0x0..0x100 */
 EXC_VIRT_NONE(0x4000, 0x4100)
 
-EXC_REAL_BEGIN(system_reset, 0x100, 0x200)
-   SET_SCRATCH0(r13)
+
 #ifdef CONFIG_PPC_P7_NAP
-BEGIN_FTR_SECTION
-   /* Running native on arch 2.06 or later, check if we are
-* waking up from nap/sleep/winkle.
+   /*
+* If running native on arch 2.06 or later, check if we are waking up
+* from nap/sleep/winkle, and branch to idle handler.
 */
-   mfspr   r13,SPRN_SRR1
-   rlwinm. r13,r13,47-31,30,31
-   beq 9f
+#define IDLETEST(n)\
+   BEGIN_FTR_SECTION ; \
+   mfspr   r10,SPRN_SRR1 ; \
+   rlwinm. r10,r10,47-31,30,31 ;   \
+   beq-1f ;\
+   cmpwi   cr3,r10,2 ; \
+   BRANCH_TO_COMMON(r10, system_reset_idle_common) ;   \
+1: \
+   END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#else
+#define IDLETEST NOTEST
+#endif
 
-   cmpwi   cr3,r13,2
-   GET_PACA(r13)
+EXC_REAL_BEGIN(system_reset, 0x100, 0x200)
+   SET_SCRATCH0(r13)
+   EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
+IDLETEST, 0x100)
+
+EXC_REAL_END(system_reset, 0x100, 0x200)
+EXC_VIRT_NONE(0x4100, 0x4200)
+
+#ifdef CONFIG_PPC_P7_NAP
+EXC_COMMON_BEGIN(system_reset_idle_common)
bl  pnv_restore_hyp_resource
 
li  r0,PNV_THREAD_RUNNING
@@ -130,14 +146,8 @@ BEGIN_FTR_SECTION
blt cr3,2f
b   pnv_wakeup_loss
 2: b   pnv_wakeup_noloss
+#endif
 
-9:
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif /* CONFIG_PPC_P7_NAP */
-   EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
-NOTEST, 0x100)
-EXC_REAL_END(system_reset, 0x100, 0x200)
-EXC_VIRT_NONE(0x4100, 0x4200)
 EXC_COMMON(system_reset_common, 0x100, system_reset_exception)
 
 #ifdef

[PATCH] powerpc: make _ASM_NOKPROBE_SYMBOL a noop when KPROBES not defined

2016-10-12 Thread Nicholas Piggin

Signed-off-by: Nicholas Piggin 
---
The linker orphan sections error patch caught this

 arch/powerpc/include/asm/ppc_asm.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index c73750b..ceec199 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -265,10 +265,14 @@ GLUE(.,name):
  * latter is for those that incdentially must be excluded from probing
  * and allows them to be linked at more optimal location within text.
  */
+#ifdef CONFIG_KPROBES
 #define _ASM_NOKPROBE_SYMBOL(entry)\
.pushsection "_kprobe_blacklist","aw";  \
PPC_LONG (entry) ;  \
.popsection
+#else
+#define _ASM_NOKPROBE_SYMBOL(entry)
+#endif
 
 #define FUNC_START(name)   _GLOBAL(name)
 #define FUNC_END(name)
-- 
2.9.3

[PATCH 10/10] mm: replace access_process_vm() write parameter with gup_flags

2016-10-12 Thread Lorenzo Stoakes

This patch removes the write parameter from access_process_vm() and replaces it
with a gup_flags parameter as use of this function previously _implied_
FOLL_FORCE, whereas after this patch callers explicitly pass this flag.

We make this explicit as use of FOLL_FORCE can result in surprising behaviour
(and hence bugs) within the mm subsystem.

Signed-off-by: Lorenzo Stoakes 
---
 arch/alpha/kernel/ptrace.c |  9 ++---
 arch/blackfin/kernel/ptrace.c  |  5 +++--
 arch/cris/arch-v32/kernel/ptrace.c |  4 ++--
 arch/ia64/kernel/ptrace.c  | 14 +-
 arch/m32r/kernel/ptrace.c  | 15 ++-
 arch/mips/kernel/ptrace32.c|  5 +++--
 arch/powerpc/kernel/ptrace32.c |  5 +++--
 arch/score/kernel/ptrace.c | 10 ++
 arch/sparc/kernel/ptrace_64.c  | 24 
 arch/x86/kernel/step.c |  3 ++-
 arch/x86/um/ptrace_32.c|  3 ++-
 arch/x86/um/ptrace_64.c|  3 ++-
 include/linux/mm.h |  3 ++-
 kernel/ptrace.c| 16 ++--
 mm/memory.c|  8 ++--
 mm/nommu.c |  6 +++---
 mm/util.c  |  5 +++--
 17 files changed, 84 insertions(+), 54 deletions(-)

diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index d9ee817..940dfb4 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -157,14 +157,16 @@ put_reg(struct task_struct *task, unsigned long regno, 
unsigned long data)
 static inline int
 read_int(struct task_struct *task, unsigned long addr, int * data)
 {
-   int copied = access_process_vm(task, addr, data, sizeof(int), 0);
+   int copied = access_process_vm(task, addr, data, sizeof(int),
+   FOLL_FORCE);
return (copied == sizeof(int)) ? 0 : -EIO;
 }
 
 static inline int
 write_int(struct task_struct *task, unsigned long addr, int data)
 {
-   int copied = access_process_vm(task, addr, , sizeof(int), 1);
+   int copied = access_process_vm(task, addr, , sizeof(int),
+   FOLL_FORCE | FOLL_WRITE);
return (copied == sizeof(int)) ? 0 : -EIO;
 }
 
@@ -281,7 +283,8 @@ long arch_ptrace(struct task_struct *child, long request,
/* When I and D space are separate, these will need to be fixed.  */
case PTRACE_PEEKTEXT: /* read word at location addr. */
case PTRACE_PEEKDATA:
-   copied = access_process_vm(child, addr, , sizeof(tmp), 0);
+   copied = access_process_vm(child, addr, , sizeof(tmp),
+   FOLL_FORCE);
ret = -EIO;
if (copied != sizeof(tmp))
break;
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index 8b8fe67..8d79286 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -271,7 +271,7 @@ long arch_ptrace(struct task_struct *child, long request,
case BFIN_MEM_ACCESS_CORE:
case BFIN_MEM_ACCESS_CORE_ONLY:
copied = access_process_vm(child, addr, ,
-  to_copy, 0);
+  to_copy, FOLL_FORCE);
if (copied)
break;
 
@@ -324,7 +324,8 @@ long arch_ptrace(struct task_struct *child, long request,
case BFIN_MEM_ACCESS_CORE:
case BFIN_MEM_ACCESS_CORE_ONLY:
copied = access_process_vm(child, addr, ,
-  to_copy, 1);
+  to_copy,
+  FOLL_FORCE | 
FOLL_WRITE);
break;
case BFIN_MEM_ACCESS_DMA:
if (safe_dma_memcpy(paddr, , to_copy))
diff --git a/arch/cris/arch-v32/kernel/ptrace.c 
b/arch/cris/arch-v32/kernel/ptrace.c
index f085229..f0df654 100644
--- a/arch/cris/arch-v32/kernel/ptrace.c
+++ b/arch/cris/arch-v32/kernel/ptrace.c
@@ -147,7 +147,7 @@ long arch_ptrace(struct task_struct *child, long request,
/* The trampoline page is globally mapped, no 
page table to traverse.*/
tmp = *(unsigned long*)addr;
} else {
-   copied = access_process_vm(child, addr, , 
sizeof(tmp), 0);
+   copied = access_process_vm(child, addr, , 
sizeof(tmp), FOLL_FORCE);
 
if (copied != sizeof(tmp))
break;
@@ -279,7 +279,7 @@ static int insn_size(struct task_struct *child, unsigned 
long pc)
   int opsize = 0;
 
   /* Read the

[PATCH 09/10] mm: replace access_remote_vm() write parameter with gup_flags

2016-10-12 Thread Lorenzo Stoakes

This patch removes the write parameter from access_remote_vm() and replaces it
with a gup_flags parameter as use of this function previously _implied_
FOLL_FORCE, whereas after this patch callers explicitly pass this flag.

We make this explicit as use of FOLL_FORCE can result in surprising behaviour
(and hence bugs) within the mm subsystem.

Signed-off-by: Lorenzo Stoakes 
---
 fs/proc/base.c | 19 +--
 include/linux/mm.h |  2 +-
 mm/memory.c| 11 +++
 mm/nommu.c |  7 +++
 4 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index c2964d8..8e65446 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -252,7 +252,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, 
char __user *buf,
 * Inherently racy -- command line shares address space
 * with code and data.
 */
-   rv = access_remote_vm(mm, arg_end - 1, , 1, 0);
+   rv = access_remote_vm(mm, arg_end - 1, , 1, FOLL_FORCE);
if (rv <= 0)
goto out_free_page;
 
@@ -270,7 +270,8 @@ static ssize_t proc_pid_cmdline_read(struct file *file, 
char __user *buf,
int nr_read;
 
_count = min3(count, len, PAGE_SIZE);
-   nr_read = access_remote_vm(mm, p, page, _count, 0);
+   nr_read = access_remote_vm(mm, p, page, _count,
+   FOLL_FORCE);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -305,7 +306,8 @@ static ssize_t proc_pid_cmdline_read(struct file *file, 
char __user *buf,
bool final;
 
_count = min3(count, len, PAGE_SIZE);
-   nr_read = access_remote_vm(mm, p, page, _count, 0);
+   nr_read = access_remote_vm(mm, p, page, _count,
+   FOLL_FORCE);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -354,7 +356,8 @@ static ssize_t proc_pid_cmdline_read(struct file *file, 
char __user *buf,
bool final;
 
_count = min3(count, len, PAGE_SIZE);
-   nr_read = access_remote_vm(mm, p, page, _count, 0);
+   nr_read = access_remote_vm(mm, p, page, _count,
+   FOLL_FORCE);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -832,6 +835,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
unsigned long addr = *ppos;
ssize_t copied;
char *page;
+   unsigned int flags = FOLL_FORCE;
 
if (!mm)
return 0;
@@ -844,6 +848,9 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
if (!atomic_inc_not_zero(>mm_users))
goto free;
 
+   if (write)
+   flags |= FOLL_WRITE;
+
while (count > 0) {
int this_len = min_t(int, count, PAGE_SIZE);
 
@@ -852,7 +859,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
break;
}
 
-   this_len = access_remote_vm(mm, addr, page, this_len, write);
+   this_len = access_remote_vm(mm, addr, page, this_len, flags);
if (!this_len) {
if (!copied)
copied = -EIO;
@@ -965,7 +972,7 @@ static ssize_t environ_read(struct file *file, char __user 
*buf,
this_len = min(max_len, this_len);
 
retval = access_remote_vm(mm, (env_start + src),
-   page, this_len, 0);
+   page, this_len, FOLL_FORCE);
 
if (retval <= 0) {
ret = retval;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2a481d3..3e5234e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1268,7 +1268,7 @@ static inline int fixup_user_fault(struct task_struct 
*tsk,
 
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void 
*buf, int len, int write);
 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
-   void *buf, int len, int write);
+   void *buf, int len, unsigned int gup_flags);
 
 long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
  unsigned long start, unsigned long nr_pages,
diff --git a/mm/memory.c b/mm/memory.c
index 79ebed3..bac2d99 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3935,19 +3935,14 @@ static int __access_remote_vm(struct task_struct *tsk, 
struct mm_struct *mm,
  * @addr:  start address to access
  * @buf:   source or destination buffer
  * @len:   number of bytes to transfer
- * @write:

[PATCH 08/10] mm: replace __access_remote_vm() write parameter with gup_flags

2016-10-12 Thread Lorenzo Stoakes

This patch removes the write parameter from __access_remote_vm() and replaces it
with a gup_flags parameter as use of this function previously _implied_
FOLL_FORCE, whereas after this patch callers explicitly pass this flag.

We make this explicit as use of FOLL_FORCE can result in surprising behaviour
(and hence bugs) within the mm subsystem.

Signed-off-by: Lorenzo Stoakes 
---
 mm/memory.c | 23 +++
 mm/nommu.c  |  9 ++---
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 20a9adb..79ebed3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3869,14 +3869,11 @@ EXPORT_SYMBOL_GPL(generic_access_phys);
  * given task for page fault accounting.
  */
 static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
-   unsigned long addr, void *buf, int len, int write)
+   unsigned long addr, void *buf, int len, unsigned int gup_flags)
 {
struct vm_area_struct *vma;
void *old_buf = buf;
-   unsigned int flags = FOLL_FORCE;
-
-   if (write)
-   flags |= FOLL_WRITE;
+   int write = gup_flags & FOLL_WRITE;
 
down_read(>mmap_sem);
/* ignore errors, just check how much was successfully transferred */
@@ -3886,7 +3883,7 @@ static int __access_remote_vm(struct task_struct *tsk, 
struct mm_struct *mm,
struct page *page = NULL;
 
ret = get_user_pages_remote(tsk, mm, addr, 1,
-   flags, , );
+   gup_flags, , );
if (ret <= 0) {
 #ifndef CONFIG_HAVE_IOREMAP_PROT
break;
@@ -3945,7 +3942,12 @@ static int __access_remote_vm(struct task_struct *tsk, 
struct mm_struct *mm,
 int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, int write)
 {
-   return __access_remote_vm(NULL, mm, addr, buf, len, write);
+   unsigned int flags = FOLL_FORCE;
+
+   if (write)
+   flags |= FOLL_WRITE;
+
+   return __access_remote_vm(NULL, mm, addr, buf, len, flags);
 }
 
 /*
@@ -3958,12 +3960,17 @@ int access_process_vm(struct task_struct *tsk, unsigned 
long addr,
 {
struct mm_struct *mm;
int ret;
+   unsigned int flags = FOLL_FORCE;
 
mm = get_task_mm(tsk);
if (!mm)
return 0;
 
-   ret = __access_remote_vm(tsk, mm, addr, buf, len, write);
+   if (write)
+   flags |= FOLL_WRITE;
+
+   ret = __access_remote_vm(tsk, mm, addr, buf, len, flags);
+
mmput(mm);
 
return ret;
diff --git a/mm/nommu.c b/mm/nommu.c
index 70cb844..bde7df3 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1809,9 +1809,10 @@ void filemap_map_pages(struct fault_env *fe,
 EXPORT_SYMBOL(filemap_map_pages);
 
 static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
-   unsigned long addr, void *buf, int len, int write)
+   unsigned long addr, void *buf, int len, unsigned int gup_flags)
 {
struct vm_area_struct *vma;
+   int write = gup_flags & FOLL_WRITE;
 
down_read(>mmap_sem);
 
@@ -1853,7 +1854,8 @@ static int __access_remote_vm(struct task_struct *tsk, 
struct mm_struct *mm,
 int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, int write)
 {
-   return __access_remote_vm(NULL, mm, addr, buf, len, write);
+   return __access_remote_vm(NULL, mm, addr, buf, len,
+   write ? FOLL_WRITE : 0);
 }
 
 /*
@@ -1871,7 +1873,8 @@ int access_process_vm(struct task_struct *tsk, unsigned 
long addr, void *buf, in
if (!mm)
return 0;
 
-   len = __access_remote_vm(tsk, mm, addr, buf, len, write);
+   len = __access_remote_vm(tsk, mm, addr, buf, len,
+   write ? FOLL_WRITE : 0);
 
mmput(mm);
return len;
-- 
2.10.0

[PATCH 07/10] mm: replace get_user_pages_remote() write/force parameters with gup_flags

2016-10-12 Thread Lorenzo Stoakes

This patch removes the write and force parameters from get_user_pages_remote()
and replaces them with a gup_flags parameter to make the use of FOLL_FORCE
explicit in callers as use of this flag can result in surprising behaviour (and
hence bugs) within the mm subsystem.

Signed-off-by: Lorenzo Stoakes 
---
 drivers/gpu/drm/etnaviv/etnaviv_gem.c   |  7 +--
 drivers/gpu/drm/i915/i915_gem_userptr.c |  6 +-
 drivers/infiniband/core/umem_odp.c  |  7 +--
 fs/exec.c   |  9 +++--
 include/linux/mm.h  |  2 +-
 kernel/events/uprobes.c |  6 --
 mm/gup.c| 22 +++---
 mm/memory.c |  6 +-
 security/tomoyo/domain.c|  2 +-
 9 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c 
b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 5ce3603..0370b84 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -748,19 +748,22 @@ static struct page **etnaviv_gem_userptr_do_get_pages(
int ret = 0, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
struct page **pvec;
uintptr_t ptr;
+   unsigned int flags = 0;
 
pvec = drm_malloc_ab(npages, sizeof(struct page *));
if (!pvec)
return ERR_PTR(-ENOMEM);
 
+   if (!etnaviv_obj->userptr.ro)
+   flags |= FOLL_WRITE;
+
pinned = 0;
ptr = etnaviv_obj->userptr.ptr;
 
down_read(>mmap_sem);
while (pinned < npages) {
ret = get_user_pages_remote(task, mm, ptr, npages - pinned,
-   !etnaviv_obj->userptr.ro, 0,
-   pvec + pinned, NULL);
+   flags, pvec + pinned, NULL);
if (ret < 0)
break;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/i915_gem_userptr.c
index e537930..c6f780f 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -508,6 +508,10 @@ __i915_gem_userptr_get_pages_worker(struct work_struct 
*_work)
pvec = drm_malloc_gfp(npages, sizeof(struct page *), GFP_TEMPORARY);
if (pvec != NULL) {
struct mm_struct *mm = obj->userptr.mm->mm;
+   unsigned int flags = 0;
+
+   if (!obj->userptr.read_only)
+   flags |= FOLL_WRITE;
 
ret = -EFAULT;
if (atomic_inc_not_zero(>mm_users)) {
@@ -517,7 +521,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct 
*_work)
(work->task, mm,
 obj->userptr.ptr + pinned * PAGE_SIZE,
 npages - pinned,
-!obj->userptr.read_only, 0,
+flags,
 pvec + pinned, NULL);
if (ret < 0)
break;
diff --git a/drivers/infiniband/core/umem_odp.c 
b/drivers/infiniband/core/umem_odp.c
index 75077a0..1f0fe32 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -527,6 +527,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 
user_virt, u64 bcnt,
u64 off;
int j, k, ret = 0, start_idx, npages = 0;
u64 base_virt_addr;
+   unsigned int flags = 0;
 
if (access_mask == 0)
return -EINVAL;
@@ -556,6 +557,9 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 
user_virt, u64 bcnt,
goto out_put_task;
}
 
+   if (access_mask & ODP_WRITE_ALLOWED_BIT)
+   flags |= FOLL_WRITE;
+
start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
k = start_idx;
 
@@ -574,8 +578,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 
user_virt, u64 bcnt,
 */
npages = get_user_pages_remote(owning_process, owning_mm,
user_virt, gup_num_pages,
-   access_mask & ODP_WRITE_ALLOWED_BIT,
-   0, local_page_list, NULL);
+   flags, local_page_list, NULL);
up_read(_mm->mmap_sem);
 
if (npages < 0)
diff --git a/fs/exec.c b/fs/exec.c
index 6fcfb3f..4e497b9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -191,6 +191,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, 
unsigned long pos,
 {
struct page *page;
int ret;
+   unsigned int gup_flags = FOLL_FORCE;
 
 #ifdef CONFIG_STACK_GROWSUP
if (write) {
@@ -199,12 +200,16 @@ static struct page *get_arg_page(struct linux_binprm 
*bprm, unsigned

[PATCH 06/10] mm: replace get_user_pages() write/force parameters with gup_flags

2016-10-12 Thread Lorenzo Stoakes

This patch removes the write and force parameters from get_user_pages() and
replaces them with a gup_flags parameter to make the use of FOLL_FORCE explicit
in callers as use of this flag can result in surprising behaviour (and hence
bugs) within the mm subsystem.

Signed-off-by: Lorenzo Stoakes 
---
 arch/cris/arch-v32/drivers/cryptocop.c |  4 +---
 arch/ia64/kernel/err_inject.c  |  2 +-
 arch/x86/mm/mpx.c  |  5 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|  7 +--
 drivers/gpu/drm/radeon/radeon_ttm.c|  3 ++-
 drivers/gpu/drm/via/via_dmablit.c  |  4 ++--
 drivers/infiniband/core/umem.c |  6 +-
 drivers/infiniband/hw/mthca/mthca_memfree.c|  2 +-
 drivers/infiniband/hw/qib/qib_user_pages.c |  3 ++-
 drivers/infiniband/hw/usnic/usnic_uiom.c   |  5 -
 drivers/media/v4l2-core/videobuf-dma-sg.c  |  7 +--
 drivers/misc/mic/scif/scif_rma.c   |  3 +--
 drivers/misc/sgi-gru/grufault.c|  2 +-
 drivers/platform/goldfish/goldfish_pipe.c  |  3 ++-
 drivers/rapidio/devices/rio_mport_cdev.c   |  3 ++-
 .../vc04_services/interface/vchiq_arm/vchiq_2835_arm.c |  3 +--
 .../vc04_services/interface/vchiq_arm/vchiq_arm.c  |  3 +--
 drivers/virt/fsl_hypervisor.c  |  4 ++--
 include/linux/mm.h |  2 +-
 mm/gup.c   | 12 +++-
 mm/mempolicy.c |  2 +-
 mm/nommu.c | 18 --
 22 files changed, 49 insertions(+), 54 deletions(-)

diff --git a/arch/cris/arch-v32/drivers/cryptocop.c 
b/arch/cris/arch-v32/drivers/cryptocop.c
index b5698c8..099e170 100644
--- a/arch/cris/arch-v32/drivers/cryptocop.c
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -2722,7 +2722,6 @@ static int cryptocop_ioctl_process(struct inode *inode, 
struct file *filp, unsig
err = get_user_pages((unsigned long int)(oper.indata + prev_ix),
 noinpages,
 0,  /* read access only for in data */
-0, /* no force */
 inpages,
 NULL);
 
@@ -2736,8 +2735,7 @@ static int cryptocop_ioctl_process(struct inode *inode, 
struct file *filp, unsig
if (oper.do_cipher){
err = get_user_pages((unsigned long int)oper.cipher_outdata,
 nooutpages,
-1, /* write access for out data */
-0, /* no force */
+FOLL_WRITE, /* write access for out data */
 outpages,
 NULL);
up_read(>mm->mmap_sem);
diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c
index 09f8457..5ed0ea9 100644
--- a/arch/ia64/kernel/err_inject.c
+++ b/arch/ia64/kernel/err_inject.c
@@ -142,7 +142,7 @@ store_virtual_to_phys(struct device *dev, struct 
device_attribute *attr,
u64 virt_addr=simple_strtoull(buf, NULL, 16);
int ret;
 
-   ret = get_user_pages(virt_addr, 1, VM_READ, 0, NULL, NULL);
+   ret = get_user_pages(virt_addr, 1, FOLL_WRITE, NULL, NULL);
if (ret<=0) {
 #ifdef ERR_INJ_DEBUG
printk("Virtual address %lx is not existing.\n",virt_addr);
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 8047687..e4f8009 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -544,10 +544,9 @@ static int mpx_resolve_fault(long __user *addr, int write)
 {
long gup_ret;
int nr_pages = 1;
-   int force = 0;
 
-   gup_ret = get_user_pages((unsigned long)addr, nr_pages, write,
-   force, NULL, NULL);
+   gup_ret = get_user_pages((unsigned long)addr, nr_pages,
+   write ? FOLL_WRITE : 0, NULL, NULL);
/*
 * get_user_pages() returns number of pages gotten.
 * 0 means we failed to fault in and get anything,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 887483b..dcaf691 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -555,10 +555,13 @@ struct amdgpu_ttm_tt {
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
struct amdgpu_ttm_tt *gtt = (void *)ttm;
-   int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
+   unsigned int flags = 0;
unsigned pinned = 0;
int r;
 
+   if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
+   flags |= FOLL_WRITE;
+
if (gtt->userflags &

[PATCH 05/10] mm: replace get_vaddr_frames() write/force parameters with gup_flags

2016-10-12 Thread Lorenzo Stoakes

This patch removes the write and force parameters from get_vaddr_frames() and
replaces them with a gup_flags parameter to make the use of FOLL_FORCE explicit
in callers as use of this flag can result in surprising behaviour (and hence
bugs) within the mm subsystem.

Signed-off-by: Lorenzo Stoakes 
---
 drivers/gpu/drm/exynos/exynos_drm_g2d.c|  3 ++-
 drivers/media/platform/omap/omap_vout.c|  2 +-
 drivers/media/v4l2-core/videobuf2-memops.c |  6 +-
 include/linux/mm.h |  2 +-
 mm/frame_vector.c  | 13 ++---
 5 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c 
b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index aa92dec..fbd13fa 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -488,7 +488,8 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct 
drm_device *drm_dev,
goto err_free;
}
 
-   ret = get_vaddr_frames(start, npages, true, true, g2d_userptr->vec);
+   ret = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
+   g2d_userptr->vec);
if (ret != npages) {
DRM_ERROR("failed to get user pages from userptr.\n");
if (ret < 0)
diff --git a/drivers/media/platform/omap/omap_vout.c 
b/drivers/media/platform/omap/omap_vout.c
index e668dde..a31b95c 100644
--- a/drivers/media/platform/omap/omap_vout.c
+++ b/drivers/media/platform/omap/omap_vout.c
@@ -214,7 +214,7 @@ static int omap_vout_get_userptr(struct videobuf_buffer 
*vb, u32 virtp,
if (!vec)
return -ENOMEM;
 
-   ret = get_vaddr_frames(virtp, 1, true, false, vec);
+   ret = get_vaddr_frames(virtp, 1, FOLL_WRITE, vec);
if (ret != 1) {
frame_vector_destroy(vec);
return -EINVAL;
diff --git a/drivers/media/v4l2-core/videobuf2-memops.c 
b/drivers/media/v4l2-core/videobuf2-memops.c
index 3c3b517..1cd322e 100644
--- a/drivers/media/v4l2-core/videobuf2-memops.c
+++ b/drivers/media/v4l2-core/videobuf2-memops.c
@@ -42,6 +42,10 @@ struct frame_vector *vb2_create_framevec(unsigned long start,
unsigned long first, last;
unsigned long nr;
struct frame_vector *vec;
+   unsigned int flags = FOLL_FORCE;
+
+   if (write)
+   flags |= FOLL_WRITE;
 
first = start >> PAGE_SHIFT;
last = (start + length - 1) >> PAGE_SHIFT;
@@ -49,7 +53,7 @@ struct frame_vector *vb2_create_framevec(unsigned long start,
vec = frame_vector_create(nr);
if (!vec)
return ERR_PTR(-ENOMEM);
-   ret = get_vaddr_frames(start & PAGE_MASK, nr, write, true, vec);
+   ret = get_vaddr_frames(start & PAGE_MASK, nr, flags, vec);
if (ret < 0)
goto out_destroy;
/* We accept only complete set of PFNs */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27ab538..5ff084f6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1305,7 +1305,7 @@ struct frame_vector {
 struct frame_vector *frame_vector_create(unsigned int nr_frames);
 void frame_vector_destroy(struct frame_vector *vec);
 int get_vaddr_frames(unsigned long start, unsigned int nr_pfns,
-bool write, bool force, struct frame_vector *vec);
+unsigned int gup_flags, struct frame_vector *vec);
 void put_vaddr_frames(struct frame_vector *vec);
 int frame_vector_to_pages(struct frame_vector *vec);
 void frame_vector_to_pfns(struct frame_vector *vec);
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index 81b6749..db77dcb 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -11,10 +11,7 @@
  * get_vaddr_frames() - map virtual addresses to pfns
  * @start: starting user address
  * @nr_frames: number of pages / pfns from start to map
- * @write: whether pages will be written to by the caller
- * @force: whether to force write access even if user mapping is
- * readonly. See description of the same argument of
-   get_user_pages().
+ * @gup_flags: flags modifying lookup behaviour
  * @vec:   structure which receives pages / pfns of the addresses mapped.
  * It should have space for at least nr_frames entries.
  *
@@ -34,23 +31,17 @@
  * This function takes care of grabbing mmap_sem as necessary.
  */
 int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
-bool write, bool force, struct frame_vector *vec)
+unsigned int gup_flags, struct frame_vector *vec)
 {
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int ret = 0;
int err;
int locked;
-   unsigned int gup_flags = 0;
 
if (nr_frames == 0)
return 0;
 
-   if (write)
-   gup_flags |= FOLL_WRITE;
-   if (force)
-   gup_flags |= FOLL_FORCE;
-
if

[PATCH 04/10] mm: replace get_user_pages_locked() write/force parameters with gup_flags

2016-10-12 Thread Lorenzo Stoakes

This patch removes the write and force parameters from get_user_pages_locked()
and replaces them with a gup_flags parameter to make the use of FOLL_FORCE
explicit in callers as use of this flag can result in surprising behaviour (and
hence bugs) within the mm subsystem.

Signed-off-by: Lorenzo Stoakes 
---
 include/linux/mm.h |  2 +-
 mm/frame_vector.c  |  8 +++-
 mm/gup.c   | 12 +++-
 mm/nommu.c |  5 -
 4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6adc4bc..27ab538 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1282,7 +1282,7 @@ long get_user_pages(unsigned long start, unsigned long 
nr_pages,
int write, int force, struct page **pages,
struct vm_area_struct **vmas);
 long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
-   int write, int force, struct page **pages, int *locked);
+   unsigned int gup_flags, struct page **pages, int *locked);
 long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
   unsigned long start, unsigned long nr_pages,
   struct page **pages, unsigned int gup_flags);
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index 381bb07..81b6749 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -41,10 +41,16 @@ int get_vaddr_frames(unsigned long start, unsigned int 
nr_frames,
int ret = 0;
int err;
int locked;
+   unsigned int gup_flags = 0;
 
if (nr_frames == 0)
return 0;
 
+   if (write)
+   gup_flags |= FOLL_WRITE;
+   if (force)
+   gup_flags |= FOLL_FORCE;
+
if (WARN_ON_ONCE(nr_frames > vec->nr_allocated))
nr_frames = vec->nr_allocated;
 
@@ -59,7 +65,7 @@ int get_vaddr_frames(unsigned long start, unsigned int 
nr_frames,
vec->got_ref = true;
vec->is_pfns = false;
ret = get_user_pages_locked(start, nr_frames,
-   write, force, (struct page **)(vec->ptrs), );
+   gup_flags, (struct page **)(vec->ptrs), );
goto out;
}
 
diff --git a/mm/gup.c b/mm/gup.c
index cfcb014..7a0d033 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -838,18 +838,12 @@ static __always_inline long 
__get_user_pages_locked(struct task_struct *tsk,
  *  up_read(>mmap_sem);
  */
 long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
-  int write, int force, struct page **pages,
+  unsigned int gup_flags, struct page **pages,
   int *locked)
 {
-   unsigned int flags = FOLL_TOUCH;
-
-   if (write)
-   flags |= FOLL_WRITE;
-   if (force)
-   flags |= FOLL_FORCE;
-
return __get_user_pages_locked(current, current->mm, start, nr_pages,
-  pages, NULL, locked, true, flags);
+  pages, NULL, locked, true,
+  gup_flags | FOLL_TOUCH);
 }
 EXPORT_SYMBOL(get_user_pages_locked);
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 7e27add..842cfdd 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -176,9 +176,12 @@ long get_user_pages(unsigned long start, unsigned long 
nr_pages,
 EXPORT_SYMBOL(get_user_pages);
 
 long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
-   int write, int force, struct page **pages,
+   unsigned int gup_flags, struct page **pages,
int *locked)
 {
+   int write = gup_flags & FOLL_WRITE;
+   int force = gup_flags & FOLL_FORCE;
+
return get_user_pages(start, nr_pages, write, force, pages, NULL);
 }
 EXPORT_SYMBOL(get_user_pages_locked);
-- 
2.10.0

[PATCH 03/10] mm: replace get_user_pages_unlocked() write/force parameters with gup_flags

2016-10-12 Thread Lorenzo Stoakes

This patch removes the write and force parameters from get_user_pages_unlocked()
and replaces them with a gup_flags parameter to make the use of FOLL_FORCE
explicit in callers as use of this flag can result in surprising behaviour (and
hence bugs) within the mm subsystem.

Signed-off-by: Lorenzo Stoakes 
---
 arch/mips/mm/gup.c |  2 +-
 arch/s390/mm/gup.c |  3 ++-
 arch/sh/mm/gup.c   |  3 ++-
 arch/sparc/mm/gup.c|  3 ++-
 arch/x86/mm/gup.c  |  2 +-
 drivers/media/pci/ivtv/ivtv-udma.c |  4 ++--
 drivers/media/pci/ivtv/ivtv-yuv.c  |  5 +++--
 drivers/scsi/st.c  |  5 ++---
 drivers/video/fbdev/pvr2fb.c   |  4 ++--
 include/linux/mm.h |  2 +-
 mm/gup.c   | 14 --
 mm/nommu.c | 11 ++-
 mm/util.c  |  3 ++-
 net/ceph/pagevec.c |  2 +-
 14 files changed, 27 insertions(+), 36 deletions(-)

diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 42d124f..d8c3c15 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -287,7 +287,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, 
int write,
pages += nr;
 
ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT,
- write, 0, pages);
+ pages, write ? FOLL_WRITE : 0);
 
/* Have to be a bit careful with return values */
if (nr > 0) {
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index adb0c34..18d4107 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -266,7 +266,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, 
int write,
/* Try to get the remaining pages with get_user_pages */
start += nr << PAGE_SHIFT;
pages += nr;
-   ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages);
+   ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
+ write ? FOLL_WRITE : 0);
/* Have to be a bit careful with return values */
if (nr > 0)
ret = (ret < 0) ? nr : ret + nr;
diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c
index 40fa6c8..063c298 100644
--- a/arch/sh/mm/gup.c
+++ b/arch/sh/mm/gup.c
@@ -258,7 +258,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, 
int write,
pages += nr;
 
ret = get_user_pages_unlocked(start,
-   (end - start) >> PAGE_SHIFT, write, 0, pages);
+   (end - start) >> PAGE_SHIFT, pages,
+   write ? FOLL_WRITE : 0);
 
/* Have to be a bit careful with return values */
if (nr > 0) {
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 4e06750..cd0e32b 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -238,7 +238,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, 
int write,
pages += nr;
 
ret = get_user_pages_unlocked(start,
-   (end - start) >> PAGE_SHIFT, write, 0, pages);
+   (end - start) >> PAGE_SHIFT, pages,
+   write ? FOLL_WRITE : 0);
 
/* Have to be a bit careful with return values */
if (nr > 0) {
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index b8b6a60..0d4fb3e 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -435,7 +435,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, 
int write,
 
ret = get_user_pages_unlocked(start,
  (end - start) >> PAGE_SHIFT,
- write, 0, pages);
+ pages, write ? FOLL_WRITE : 0);
 
/* Have to be a bit careful with return values */
if (nr > 0) {
diff --git a/drivers/media/pci/ivtv/ivtv-udma.c 
b/drivers/media/pci/ivtv/ivtv-udma.c
index 4769469..2c9232e 100644
--- a/drivers/media/pci/ivtv/ivtv-udma.c
+++ b/drivers/media/pci/ivtv/ivtv-udma.c
@@ -124,8 +124,8 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long 
ivtv_dest_addr,
}
 
/* Get user pages for DMA Xfer */
-   err = get_user_pages_unlocked(user_dma.uaddr, user_dma.page_count, 0,
-   1, dma->map);
+   err = get_user_pages_unlocked(user_dma.uaddr, user_dma.page_count,
+   dma->map, FOLL_FORCE);
 
if (user_dma.page_count != err) {
IVTV_DEBUG_WARN("failed to map user pages, returned %d instead 
of %d\n",
diff --git a/drivers/media/pci/ivtv/ivtv-yuv.c 
b/drivers/media/pci/ivtv/ivtv-yuv.c
index b094054..f7299d3 100644
--- a/drivers/media/pci/ivtv/ivtv-yuv.c
+++ b/drivers/media/pci/ivtv/ivtv-yuv.c
@@ -76,11 +76,12 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct

[PATCH 02/10] mm: remove write/force parameters from __get_user_pages_unlocked()

2016-10-12 Thread Lorenzo Stoakes

This patch removes the write and force parameters from
__get_user_pages_unlocked() to make the use of FOLL_FORCE explicit in callers as
use of this flag can result in surprising behaviour (and hence bugs) within the
mm subsystem.

Signed-off-by: Lorenzo Stoakes 
---
 include/linux/mm.h |  3 +--
 mm/gup.c   | 17 +
 mm/nommu.c | 12 +---
 mm/process_vm_access.c |  7 +--
 virt/kvm/async_pf.c|  3 ++-
 virt/kvm/kvm_main.c| 11 ---
 6 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e9caec6..2db98b6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1285,8 +1285,7 @@ long get_user_pages_locked(unsigned long start, unsigned 
long nr_pages,
int write, int force, struct page **pages, int *locked);
 long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
   unsigned long start, unsigned long nr_pages,
-  int write, int force, struct page **pages,
-  unsigned int gup_flags);
+  struct page **pages, unsigned int gup_flags);
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
int write, int force, struct page **pages);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
diff --git a/mm/gup.c b/mm/gup.c
index ba83942..3d620dd 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -865,17 +865,11 @@ EXPORT_SYMBOL(get_user_pages_locked);
  */
 __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct 
mm_struct *mm,
   unsigned long start, unsigned 
long nr_pages,
-  int write, int force, struct 
page **pages,
-  unsigned int gup_flags)
+  struct page **pages, unsigned 
int gup_flags)
 {
long ret;
int locked = 1;
 
-   if (write)
-   gup_flags |= FOLL_WRITE;
-   if (force)
-   gup_flags |= FOLL_FORCE;
-
down_read(>mmap_sem);
ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL,
  , false, gup_flags);
@@ -905,8 +899,15 @@ EXPORT_SYMBOL(__get_user_pages_unlocked);
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 int write, int force, struct page **pages)
 {
+   unsigned int flags = FOLL_TOUCH;
+
+   if (write)
+   flags |= FOLL_WRITE;
+   if (force)
+   flags |= FOLL_FORCE;
+
return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
-write, force, pages, FOLL_TOUCH);
+pages, flags);
 }
 EXPORT_SYMBOL(get_user_pages_unlocked);
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 95daf81..925dcc1 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -185,8 +185,7 @@ EXPORT_SYMBOL(get_user_pages_locked);
 
 long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
   unsigned long start, unsigned long nr_pages,
-  int write, int force, struct page **pages,
-  unsigned int gup_flags)
+  struct page **pages, unsigned int gup_flags)
 {
long ret;
down_read(>mmap_sem);
@@ -200,8 +199,15 @@ EXPORT_SYMBOL(__get_user_pages_unlocked);
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 int write, int force, struct page **pages)
 {
+   unsigned int flags = 0;
+
+   if (write)
+   flags |= FOLL_WRITE;
+   if (force)
+   flags |= FOLL_FORCE;
+
return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
-write, force, pages, 0);
+pages, flags);
 }
 EXPORT_SYMBOL(get_user_pages_unlocked);
 
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 07514d4..be8dc8d 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -88,12 +88,16 @@ static int process_vm_rw_single_vec(unsigned long addr,
ssize_t rc = 0;
unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES
/ sizeof(struct pages *);
+   unsigned int flags = FOLL_REMOTE;
 
/* Work out address and page range required */
if (len == 0)
return 0;
nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1;
 
+   if (vm_write)
+   flags |= FOLL_WRITE;
+
while (!rc && nr_pages && iov_iter_count(iter)) {
int pages = min(nr_pages, max_pages_per_loop);
size_t bytes;
@@ -104,8

[PATCH 01/10] mm: remove write/force parameters from __get_user_pages_locked()

2016-10-12 Thread Lorenzo Stoakes

This patch removes the write and force parameters from __get_user_pages_locked()
to make the use of FOLL_FORCE explicit in callers as use of this flag can result
in surprising behaviour (and hence bugs) within the mm subsystem.

Signed-off-by: Lorenzo Stoakes 
---
 mm/gup.c | 47 +--
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 96b2b2f..ba83942 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -729,7 +729,6 @@ static __always_inline long __get_user_pages_locked(struct 
task_struct *tsk,
struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
-   int write, int force,
struct page **pages,
struct vm_area_struct **vmas,
int *locked, bool notify_drop,
@@ -747,10 +746,6 @@ static __always_inline long __get_user_pages_locked(struct 
task_struct *tsk,
 
if (pages)
flags |= FOLL_GET;
-   if (write)
-   flags |= FOLL_WRITE;
-   if (force)
-   flags |= FOLL_FORCE;
 
pages_done = 0;
lock_dropped = false;
@@ -846,9 +841,15 @@ long get_user_pages_locked(unsigned long start, unsigned 
long nr_pages,
   int write, int force, struct page **pages,
   int *locked)
 {
+   unsigned int flags = FOLL_TOUCH;
+
+   if (write)
+   flags |= FOLL_WRITE;
+   if (force)
+   flags |= FOLL_FORCE;
+
return __get_user_pages_locked(current, current->mm, start, nr_pages,
-  write, force, pages, NULL, locked, true,
-  FOLL_TOUCH);
+  pages, NULL, locked, true, flags);
 }
 EXPORT_SYMBOL(get_user_pages_locked);
 
@@ -869,9 +870,15 @@ __always_inline long __get_user_pages_unlocked(struct 
task_struct *tsk, struct m
 {
long ret;
int locked = 1;
+
+   if (write)
+   gup_flags |= FOLL_WRITE;
+   if (force)
+   gup_flags |= FOLL_FORCE;
+
down_read(>mmap_sem);
-   ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
- pages, NULL, , false, gup_flags);
+   ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL,
+ , false, gup_flags);
if (locked)
up_read(>mmap_sem);
return ret;
@@ -963,9 +970,15 @@ long get_user_pages_remote(struct task_struct *tsk, struct 
mm_struct *mm,
int write, int force, struct page **pages,
struct vm_area_struct **vmas)
 {
-   return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
-  pages, vmas, NULL, false,
-  FOLL_TOUCH | FOLL_REMOTE);
+   unsigned int flags = FOLL_TOUCH | FOLL_REMOTE;
+
+   if (write)
+   flags |= FOLL_WRITE;
+   if (force)
+   flags |= FOLL_FORCE;
+
+   return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
+  NULL, false, flags);
 }
 EXPORT_SYMBOL(get_user_pages_remote);
 
@@ -979,9 +992,15 @@ long get_user_pages(unsigned long start, unsigned long 
nr_pages,
int write, int force, struct page **pages,
struct vm_area_struct **vmas)
 {
+   unsigned int flags = FOLL_TOUCH;
+
+   if (write)
+   flags |= FOLL_WRITE;
+   if (force)
+   flags |= FOLL_FORCE;
+
return __get_user_pages_locked(current, current->mm, start, nr_pages,
-  write, force, pages, vmas, NULL, false,
-  FOLL_TOUCH);
+  pages, vmas, NULL, false, flags);
 }
 EXPORT_SYMBOL(get_user_pages);
 
-- 
2.10.0

[PATCH 00/10] mm: adjust get_user_pages* functions to explicitly pass FOLL_* flags

2016-10-12 Thread Lorenzo Stoakes

This patch series adjusts functions in the get_user_pages* family such that
desired FOLL_* flags are passed as an argument rather than implied by flags.

The purpose of this change is to make the use of FOLL_FORCE explicit so it is
easier to grep for and clearer to callers that this flag is being used. The use
of FOLL_FORCE is an issue as it overrides missing VM_READ/VM_WRITE flags for the
VMA whose pages we are reading from/writing to, which can result in surprising
behaviour.

The patch series came out of the discussion around commit 38e0885, which
addressed a BUG_ON() being triggered when a page was faulted in with PROT_NONE
set but having been overridden by FOLL_FORCE. do_numa_page() was run on the
assumption the page _must_ be one marked for NUMA node migration as an actual
PROT_NONE page would have been dealt with prior to this code path, however
FOLL_FORCE introduced a situation where this assumption did not hold.

See https://marc.info/?l=linux-mm=147585445805166 for the patch proposal.

Lorenzo Stoakes (10):
  mm: remove write/force parameters from __get_user_pages_locked()
  mm: remove write/force parameters from __get_user_pages_unlocked()
  mm: replace get_user_pages_unlocked() write/force parameters with gup_flags
  mm: replace get_user_pages_locked() write/force parameters with gup_flags
  mm: replace get_vaddr_frames() write/force parameters with gup_flags
  mm: replace get_user_pages() write/force parameters with gup_flags
  mm: replace get_user_pages_remote() write/force parameters with gup_flags
  mm: replace __access_remote_vm() write parameter with gup_flags
  mm: replace access_remote_vm() write parameter with gup_flags
  mm: replace access_process_vm() write parameter with gup_flags

 arch/alpha/kernel/ptrace.c |  9 ++--
 arch/blackfin/kernel/ptrace.c  |  5 ++-
 arch/cris/arch-v32/drivers/cryptocop.c |  4 +-
 arch/cris/arch-v32/kernel/ptrace.c |  4 +-
 arch/ia64/kernel/err_inject.c  |  2 +-
 arch/ia64/kernel/ptrace.c  | 14 +++---
 arch/m32r/kernel/ptrace.c  | 15 ---
 arch/mips/kernel/ptrace32.c|  5 ++-
 arch/mips/mm/gup.c |  2 +-
 arch/powerpc/kernel/ptrace32.c |  5 ++-
 arch/s390/mm/gup.c |  3 +-
 arch/score/kernel/ptrace.c | 10 +++--
 arch/sh/mm/gup.c   |  3 +-
 arch/sparc/kernel/ptrace_64.c  | 24 +++
 arch/sparc/mm/gup.c|  3 +-
 arch/x86/kernel/step.c |  3 +-
 arch/x86/mm/gup.c  |  2 +-
 arch/x86/mm/mpx.c  |  5 +--
 arch/x86/um/ptrace_32.c|  3 +-
 arch/x86/um/ptrace_64.c|  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|  7 ++-
 drivers/gpu/drm/etnaviv/etnaviv_gem.c  |  7 ++-
 drivers/gpu/drm/exynos/exynos_drm_g2d.c|  3 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c|  6 ++-
 drivers/gpu/drm/radeon/radeon_ttm.c|  3 +-
 drivers/gpu/drm/via/via_dmablit.c  |  4 +-
 drivers/infiniband/core/umem.c |  6 ++-
 drivers/infiniband/core/umem_odp.c |  7 ++-
 drivers/infiniband/hw/mthca/mthca_memfree.c|  2 +-
 drivers/infiniband/hw/qib/qib_user_pages.c |  3 +-
 drivers/infiniband/hw/usnic/usnic_uiom.c   |  5 ++-
 drivers/media/pci/ivtv/ivtv-udma.c |  4 +-
 drivers/media/pci/ivtv/ivtv-yuv.c  |  5 ++-
 drivers/media/platform/omap/omap_vout.c|  2 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c  |  7 ++-
 drivers/media/v4l2-core/videobuf2-memops.c |  6 ++-
 drivers/misc/mic/scif/scif_rma.c   |  3 +-
 drivers/misc/sgi-gru/grufault.c|  2 +-
 drivers/platform/goldfish/goldfish_pipe.c  |  3 +-
 drivers/rapidio/devices/rio_mport_cdev.c   |  3 +-
 drivers/scsi/st.c  |  5 +--
 .../interface/vchiq_arm/vchiq_2835_arm.c   |  3 +-
 .../vc04_services/interface/vchiq_arm/vchiq_arm.c  |  3 +-
 drivers/video/fbdev/pvr2fb.c   |  4 +-
 drivers/virt/fsl_hypervisor.c  |  4 +-
 fs/exec.c  |  9 +++-
 fs/proc/base.c | 19 +---
 include/linux/mm.h | 18 
 kernel/events/uprobes.c|  6 ++-
 kernel/ptrace.c| 16 ---
 mm/frame_vector.c  |  9 ++--
 mm/gup.c   | 50 ++
 mm/memory.c| 16 ---

[PATCH] powerpc/boot: fix boot on systems with uncompressed kernel image

2016-10-12 Thread Heiner Kallweit

This commit broke boot on systems with an uncompressed kernel image,
namely systems using a cuImage. On such systems the compressed boot
image (boot wrapper, uncompressed kernel image, ..) is decompressed
by u-boot already, therefore the boot wrapper code sees an
uncompressed kernel image.

The old decompression code silently assumed an uncompressed kernel
image if it found no valid gzip signature, whilst the new code
bailed out in this case.

Fix this by re-introducing such a fallback if no valid compressed
image is found.

Fixes: 1b7898ee276b ("Use the pre-boot decompression API")
Signed-off-by: Heiner Kallweit 
---
 arch/powerpc/boot/main.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index f7a184b..57d42d1 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -32,9 +32,16 @@ static struct addr_range prep_kernel(void)
void *addr = 0;
struct elf_info ei;
long len;
+   int uncompressed_image = 0;
 
-   partial_decompress(vmlinuz_addr, vmlinuz_size,
+   len = partial_decompress(vmlinuz_addr, vmlinuz_size,
elfheader, sizeof(elfheader), 0);
+   /* assume uncompressed data if -1 is returned */
+   if (len == -1) {
+   uncompressed_image = 1;
+   memcpy(elfheader, vmlinuz_addr, sizeof(elfheader));
+   printf("No valid compressed data found, assume uncompressed 
data\n\r");
+   }
 
if (!parse_elf64(elfheader, ) && !parse_elf32(elfheader, ))
fatal("Error: not a valid PPC32 or PPC64 ELF file!\n\r");
@@ -67,6 +74,13 @@ static struct addr_range prep_kernel(void)
"device tree\n\r");
}
 
+   if (uncompressed_image) {
+   memcpy(addr, vmlinuz_addr + ei.elfoffset, ei.loadsize);
+   printf("0x%lx bytes of uncompressed data copied\n\r",
+  ei.loadsize);
+   goto out;
+   }
+
/* Finally, decompress the kernel */
printf("Decompressing (0x%p <- 0x%p:0x%p)...\n\r", addr,
   vmlinuz_addr, vmlinuz_addr+vmlinuz_size);
@@ -82,7 +96,7 @@ static struct addr_range prep_kernel(void)
 len, ei.loadsize);
 
printf("Done! Decompressed 0x%lx bytes\n\r", len);
-
+out:
flush_cache(addr, ei.loadsize);
 
return (struct addr_range){addr, ei.memsize};
-- 
2.10.0

Re: Commit 1b7898ee276b "powerpc/boot: Use the pre-boot decompression API" breaks boot

2016-10-12 Thread Heiner Kallweit

Am 12.10.2016 um 06:26 schrieb Oliver O'Halloran:
> On Tue, Oct 11, 2016 at 7:06 AM, Heiner Kallweit  wrote:
>>> IMHO in case of using cuboot no CONFIG_KERNEL_ config option
>>> should be set and Makefile + code in arch/powerpc/boot should be able
>>> to deal with this situation:
>>> - don't copy and build the decompression stuff
>>> - use an alternative version of prep_kernel() in main.c which doesn't
>>>   attempt to decompress the kernel image
>>>
>>> This should be a cleaner solution than probing the kernel image whether
>>> it's compressed or not.
>>>
>>
>> This would be the patch implementing the idea. Advantage is that all
>> the unnecessary decompression code isn't built. Works fine for me.
> 
> I don't think this approach is viable. The wrapper code is shared
> among the various output image formats some of which *will* contain a
> compressed kernel image so we can't simply remove the decompressor
> from the wrapper. A random example I found in the makefile was
> CONFIG_BAMBOO:
> 
>> image-$(CONFIG_BAMBOO) += treeImage.bamboo cuImage.bamboo
> 
> When building for this platform Kbuild will produce treeboot and a
> cuboot image. Unlike uboot, Treeboot doesn't do any decompression so
> the wrapper needs to decompress the kernel itself. The probing
> solution more or less matches the old behaviour (which we know works)
> so I think we should just stick with that.
> 
> - Oliver
> 
Indeed, I also figured that out later. As you say, then let's stick
with re-introducing the probing. I'll send the patch for this.

Heiner

Re: [PATCH] powerpc/fadump: Fix the race in crash_fadump().

2016-10-12 Thread Mahesh Jagannath Salgaonkar

On 10/10/2016 04:22 PM, Michael Ellerman wrote:
> Mahesh J Salgaonkar  writes:
> 
>> From: Mahesh Salgaonkar 
>>
>> There are chances that multiple CPUs can call crash_fadump() simultaneously
>> and would start duplicating same info to vmcoreinfo ELF note section. This
>> causes makedumpfile to fail during kdump capture. One example is,
>> triggering dumprestart from HMC which sends system reset to all the CPUs at
>> once.
> ...
>> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
>> index b3a6633..2ed9d1c 100644
>> --- a/arch/powerpc/kernel/fadump.c
>> +++ b/arch/powerpc/kernel/fadump.c
>> @@ -402,8 +402,14 @@ void crash_fadump(struct pt_regs *regs, const char *str)
>>  {
>>  struct fadump_crash_info_header *fdh = NULL;
>>  
>> -if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
>> +mutex_lock(_mutex);
> 
> What happens when a crashing CPU can't get the mutex and goes to sleep?

Got your point. I think I should use mutex_trylock() here. There is only
two reason crashing CPU can't get mutex, 1) Another CPU also crashing
that got the mutex and on its way to trigger fadump. OR 2) We are in
middle of fadump register/un-register, in which case we can just return
and go to normal panic.

Thanks,
-Mahesh.

Re: [PATCH] powerpc: cmp -> cmpd for 64-bit

2016-10-12 Thread Vaidyanathan Srinivasan

* Segher Boessenkool  [2016-10-12 08:26:48]:

> On Wed, Oct 12, 2016 at 02:05:19PM +1100, Michael Ellerman wrote:
> > Segher Boessenkool  writes:
> > 

[snip]

> 
> > > --- a/arch/powerpc/include/asm/cpuidle.h
> > > +++ b/arch/powerpc/include/asm/cpuidle.h
> > > @@ -26,7 +26,7 @@ extern u64 pnv_first_deep_stop_state;
> > 
> > #define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
> > /* Magic NAP/SLEEP/WINKLE mode enter sequence */\
> > >   std r0,0(r1);   \
> > >   ptesync;\
> > >   ld  r0,0(r1);   \
> > > -1:   cmp cr0,r0,r0;  \
> > > +1:   cmpdcr0,r0,r0;  \
> > >   bne 1b; \
> > >   IDLE_INST;  \
> > >   b   .
> > 
> > What's this one doing, is it a bug? I can't really tell without knowing
> > what the magic sequence is meant to do.

This one is the recommended idle state entry sequence described in
ISA.  We need to ensure the context is fully saved and also create
a register dependency using cmp and loop which will ideally not be
taken.  This will get the thread (pipeline) ready to start losing
state when the idle instruction is executed.

ISA 2.07 Section: 3.3.2.1 Entering and Exiting Power-Saving Mode

> 
> It looks like it is making sure the ptesync is done.  The ld/cmp/bne
> is the usual to make sure the ld is done, and in std/ptesync/ld the ld
> won't be done before the ptesync is done.
> 
> The cmp always compares equal, of course, so both cmpw and cmpd would
> work fine here.  cmpd looks better after ld ;-)

Yes :)

cmpd or cmpw would provide same result as far as this code sequence is
concerned.  I agree that cpmd is more appropriate here.

--Vaidy

Re: [PATCH v2 0/7] PCI: layerscape: Cleanups

2016-10-12 Thread Roy Zang

On 10/12/2016 11:07 AM, Bjorn Helgaas wrote:
> I applied these to pci/host-layerscape for v4.9.  I hope to ask Linus to
> pull them tomorrow, so if you see any issues, let me know soon.
>
good to me.

Thanks.

Roy

Re: [PATCH v2 0/7] PCI: layerscape: Cleanups

2016-10-12 Thread Bjorn Helgaas

On Wed, Oct 12, 2016 at 08:57:22AM -0500, Bjorn Helgaas wrote:
>   - Add local "dev" pointers to reduce repetition of things like
> ">dev".
> 
>   - Remove platform drvdata because it appears unused (we called
> platform_set_drvdata() but not platform_get_drvdata()).
> 
>   - Remove redundant struct members.
> 
>   - Pass device-specific struct to internal functions for consistency.
> 
>   - Move struct pcie_port setup to probe function for consistency.
> 
>   - Remove unused ls_add_pcie_port() platform_device argument.
> 
> Nothing here should change the behavior of the driver.
> 
> Changes from v1:
>   I dropped the following patch because it was a lot of churn for
>   questionable benefit:
> PCI: layerscape: Name private struct pointer "ls" consistently
> 
> ---
> 
> Bjorn Helgaas (7):
>   PCI: layerscape: Add local struct device pointers
>   PCI: layerscape: Remove unused platform data
>   PCI: layerscape: Remove redundant struct ls_pcie.dbi
>   PCI: layerscape: Pass device-specific struct to internal functions
>   PCI: layerscape: Move struct pcie_port setup to probe function
>   PCI: layerscape: Remove unused ls_add_pcie_port() platform_device arg
>   PCI: layerscape: Reorder struct ls_pcie
> 
> 
>  drivers/pci/host/pci-layerscape.c |   65 
> +++--
>  1 file changed, 33 insertions(+), 32 deletions(-)

I applied these to pci/host-layerscape for v4.9.  I hope to ask Linus to
pull them tomorrow, so if you see any issues, let me know soon.

Re: [PATCH] powerpc/mm: Prevent unlikely crash in copro_calculate_slb()

2016-10-12 Thread Frederic Barrat


ping? The patch still applies cleanly on recent trees.

  Fred

Le 17/06/2016 à 18:53, Frederic Barrat a écrit :

If a cxl adapter faults on an invalid address for a kernel context, we
may enter copro_calculate_slb() with a NULL mm pointer (kernel
context) and an effective address which looks like a user
address. Which will cause a crash when dereferencing mm. It is clearly
an AFU bug, but there's no reason to crash either. So return an error,
so that cxl can ack the interrupt with an address error.

Signed-off-by: Frederic Barrat 
Cc: 
---
 arch/powerpc/mm/copro_fault.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 6527882..ddfd274 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -106,6 +106,8 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, 
struct copro_slb *slb)
switch (REGION_ID(ea)) {
case USER_REGION_ID:
pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
+   if (mm == NULL)
+   return 1;
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize);

Re: [PATCH v15 03/15] selftests/powerpc: Add ptrace tests for EBB

2016-10-12 Thread Simon Guo

On Fri, Oct 07, 2016 at 08:44:48AM +1100, Michael Ellerman wrote:
> wei.guo.si...@gmail.com writes:
> 
> > From: Anshuman Khandual 
> >
> > This patch adds ptrace interface test for EBB/PMU specific
> > registers. This also adds some generic ptrace interface
> > based helper functions to be used by other patches later
> > on in the series.
> 
> This is consistently failing for me on a P8 Tuleta (pvr 004b 0201):
> 
> # ./ptrace-ebb 
> test: ptrace_ebb_pmu
> tags: git_version:v4.8-rc5-176-g89cf1de0ae90
> EBBRR: 100059f8
> EBBHR: 100053cc; expected: 100053cc
> BESCR: 8001
> SIAR:  100012d0
> SDAR:  3fff7e4cc000
> SIER:  300; expected: 200
> MMCR2: 0; expected: 0
> MMCR0: 18080; expected: 18080
> failure: ptrace_ebb_pmu
> 
> cheers
Michael,

Yes.. SIER has different value in baremetal and virtual machine
due to different MSR[HV] value.  I will correct it. Originally I only
tested in virtual BE/LE machines.

Currently all tests cases (with fix) passed on one baremetal P8 machine 
with LE OS installed.  And I will try to find another baremetal with BE 
OS installed to test.

Thanks for indicating it.

BR,
Simon

[PATCH v2 7/7] PCI: layerscape: Reorder struct ls_pcie

2016-10-12 Thread Bjorn Helgaas

Reorder struct ls_pcie to put generic fields first.  No functional change
intended.

Signed-off-by: Bjorn Helgaas 
---
 drivers/pci/host/pci-layerscape.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/host/pci-layerscape.c 
b/drivers/pci/host/pci-layerscape.c
index 3a86c1a..2cb7315 100644
--- a/drivers/pci/host/pci-layerscape.c
+++ b/drivers/pci/host/pci-layerscape.c
@@ -45,9 +45,9 @@ struct ls_pcie_drvdata {
 };
 
 struct ls_pcie {
+   struct pcie_port pp;/* pp.dbi_base is DT regs */
void __iomem *lut;
struct regmap *scfg;
-   struct pcie_port pp;
const struct ls_pcie_drvdata *drvdata;
int index;
 };

[PATCH v2 6/7] PCI: layerscape: Remove unused ls_add_pcie_port() platform_device arg

2016-10-12 Thread Bjorn Helgaas

ls_add_pcie_port() doesn't use the platform_device pointer passed to it, so
remove it.  No functional change intended.

Signed-off-by: Bjorn Helgaas 
---
 drivers/pci/host/pci-layerscape.c |5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/host/pci-layerscape.c 
b/drivers/pci/host/pci-layerscape.c
index 2d77104..3a86c1a 100644
--- a/drivers/pci/host/pci-layerscape.c
+++ b/drivers/pci/host/pci-layerscape.c
@@ -213,8 +213,7 @@ static const struct of_device_id ls_pcie_of_match[] = {
{ },
 };
 
-static int __init ls_add_pcie_port(struct ls_pcie *pcie,
-  struct platform_device *pdev)
+static int __init ls_add_pcie_port(struct ls_pcie *pcie)
 {
struct pcie_port *pp = >pp;
struct device *dev = pp->dev;
@@ -263,7 +262,7 @@ static int __init ls_pcie_probe(struct platform_device 
*pdev)
if (!ls_pcie_is_bridge(pcie))
return -ENODEV;
 
-   ret = ls_add_pcie_port(pcie, pdev);
+   ret = ls_add_pcie_port(pcie);
if (ret < 0)
return ret;

[PATCH v2 5/7] PCI: layerscape: Move struct pcie_port setup to probe function

2016-10-12 Thread Bjorn Helgaas

Do the basic pcie_port setup in the probe function for consistency with
other drivers.  No functional change intended.

Signed-off-by: Bjorn Helgaas 
---
 drivers/pci/host/pci-layerscape.c |   10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/host/pci-layerscape.c 
b/drivers/pci/host/pci-layerscape.c
index 2b31296..2d77104 100644
--- a/drivers/pci/host/pci-layerscape.c
+++ b/drivers/pci/host/pci-layerscape.c
@@ -216,13 +216,10 @@ static const struct of_device_id ls_pcie_of_match[] = {
 static int __init ls_add_pcie_port(struct ls_pcie *pcie,
   struct platform_device *pdev)
 {
-   struct device *dev = >dev;
struct pcie_port *pp = >pp;
+   struct device *dev = pp->dev;
int ret;
 
-   pp->dev = dev;
-   pp->ops = pcie->drvdata->ops;
-
ret = dw_pcie_host_init(pp);
if (ret) {
dev_err(dev, "failed to initialize host\n");
@@ -237,6 +234,7 @@ static int __init ls_pcie_probe(struct platform_device 
*pdev)
struct device *dev = >dev;
const struct of_device_id *match;
struct ls_pcie *pcie;
+   struct pcie_port *pp;
struct resource *dbi_base;
int ret;
 
@@ -248,6 +246,10 @@ static int __init ls_pcie_probe(struct platform_device 
*pdev)
if (!pcie)
return -ENOMEM;
 
+   pp = >pp;
+   pp->dev = dev;
+   pp->ops = pcie->drvdata->ops;
+
dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
pcie->pp.dbi_base = devm_ioremap_resource(dev, dbi_base);
if (IS_ERR(pcie->pp.dbi_base)) {

[PATCH v2 4/7] PCI: layerscape: Pass device-specific struct to internal functions

2016-10-12 Thread Bjorn Helgaas

Only interfaces used from outside the driver, e.g., those called by the
DesignWare core, need to accept pointers to the generic struct pcie_port.
Internal interfaces can accept pointers to the device-specific struct,
which makes them more straightforward.  No functional change intended.

Signed-off-by: Bjorn Helgaas 
---
 drivers/pci/host/pci-layerscape.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/host/pci-layerscape.c 
b/drivers/pci/host/pci-layerscape.c
index bdafe55..2b31296 100644
--- a/drivers/pci/host/pci-layerscape.c
+++ b/drivers/pci/host/pci-layerscape.c
@@ -213,12 +213,12 @@ static const struct of_device_id ls_pcie_of_match[] = {
{ },
 };
 
-static int __init ls_add_pcie_port(struct pcie_port *pp,
+static int __init ls_add_pcie_port(struct ls_pcie *pcie,
   struct platform_device *pdev)
 {
struct device *dev = >dev;
+   struct pcie_port *pp = >pp;
int ret;
-   struct ls_pcie *pcie = to_ls_pcie(pp);
 
pp->dev = dev;
pp->ops = pcie->drvdata->ops;
@@ -261,7 +261,7 @@ static int __init ls_pcie_probe(struct platform_device 
*pdev)
if (!ls_pcie_is_bridge(pcie))
return -ENODEV;
 
-   ret = ls_add_pcie_port(>pp, pdev);
+   ret = ls_add_pcie_port(pcie, pdev);
if (ret < 0)
return ret;

[PATCH v2 3/7] PCI: layerscape: Remove redundant struct ls_pcie.dbi

2016-10-12 Thread Bjorn Helgaas

Remove the struct ls_pcie.dbi member, which is a duplicate of the generic
pp.dbi_base member.  No functional change intended.

Signed-off-by: Bjorn Helgaas 
---
 drivers/pci/host/pci-layerscape.c |   24 +++-
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/pci/host/pci-layerscape.c 
b/drivers/pci/host/pci-layerscape.c
index ebed415..bdafe55 100644
--- a/drivers/pci/host/pci-layerscape.c
+++ b/drivers/pci/host/pci-layerscape.c
@@ -45,7 +45,6 @@ struct ls_pcie_drvdata {
 };
 
 struct ls_pcie {
-   void __iomem *dbi;
void __iomem *lut;
struct regmap *scfg;
struct pcie_port pp;
@@ -59,7 +58,7 @@ static bool ls_pcie_is_bridge(struct ls_pcie *pcie)
 {
u32 header_type;
 
-   header_type = ioread8(pcie->dbi + PCI_HEADER_TYPE);
+   header_type = ioread8(pcie->pp.dbi_base + PCI_HEADER_TYPE);
header_type &= 0x7f;
 
return header_type == PCI_HEADER_TYPE_BRIDGE;
@@ -68,13 +67,13 @@ static bool ls_pcie_is_bridge(struct ls_pcie *pcie)
 /* Clear multi-function bit */
 static void ls_pcie_clear_multifunction(struct ls_pcie *pcie)
 {
-   iowrite8(PCI_HEADER_TYPE_BRIDGE, pcie->dbi + PCI_HEADER_TYPE);
+   iowrite8(PCI_HEADER_TYPE_BRIDGE, pcie->pp.dbi_base + PCI_HEADER_TYPE);
 }
 
 /* Fix class value */
 static void ls_pcie_fix_class(struct ls_pcie *pcie)
 {
-   iowrite16(PCI_CLASS_BRIDGE_PCI, pcie->dbi + PCI_CLASS_DEVICE);
+   iowrite16(PCI_CLASS_BRIDGE_PCI, pcie->pp.dbi_base + PCI_CLASS_DEVICE);
 }
 
 /* Drop MSG TLP except for Vendor MSG */
@@ -82,9 +81,9 @@ static void ls_pcie_drop_msg_tlp(struct ls_pcie *pcie)
 {
u32 val;
 
-   val = ioread32(pcie->dbi + PCIE_STRFMR1);
+   val = ioread32(pcie->pp.dbi_base + PCIE_STRFMR1);
val &= 0xDFFF;
-   iowrite32(val, pcie->dbi + PCIE_STRFMR1);
+   iowrite32(val, pcie->pp.dbi_base + PCIE_STRFMR1);
 }
 
 static int ls1021_pcie_link_up(struct pcie_port *pp)
@@ -149,11 +148,11 @@ static void ls_pcie_host_init(struct pcie_port *pp)
 {
struct ls_pcie *pcie = to_ls_pcie(pp);
 
-   iowrite32(1, pcie->dbi + PCIE_DBI_RO_WR_EN);
+   iowrite32(1, pcie->pp.dbi_base + PCIE_DBI_RO_WR_EN);
ls_pcie_fix_class(pcie);
ls_pcie_clear_multifunction(pcie);
ls_pcie_drop_msg_tlp(pcie);
-   iowrite32(0, pcie->dbi + PCIE_DBI_RO_WR_EN);
+   iowrite32(0, pcie->pp.dbi_base + PCIE_DBI_RO_WR_EN);
 }
 
 static int ls_pcie_msi_host_init(struct pcie_port *pp,
@@ -222,7 +221,6 @@ static int __init ls_add_pcie_port(struct pcie_port *pp,
struct ls_pcie *pcie = to_ls_pcie(pp);
 
pp->dev = dev;
-   pp->dbi_base = pcie->dbi;
pp->ops = pcie->drvdata->ops;
 
ret = dw_pcie_host_init(pp);
@@ -251,14 +249,14 @@ static int __init ls_pcie_probe(struct platform_device 
*pdev)
return -ENOMEM;
 
dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
-   pcie->dbi = devm_ioremap_resource(dev, dbi_base);
-   if (IS_ERR(pcie->dbi)) {
+   pcie->pp.dbi_base = devm_ioremap_resource(dev, dbi_base);
+   if (IS_ERR(pcie->pp.dbi_base)) {
dev_err(dev, "missing *regs* space\n");
-   return PTR_ERR(pcie->dbi);
+   return PTR_ERR(pcie->pp.dbi_base);
}
 
pcie->drvdata = match->data;
-   pcie->lut = pcie->dbi + pcie->drvdata->lut_offset;
+   pcie->lut = pcie->pp.dbi_base + pcie->drvdata->lut_offset;
 
if (!ls_pcie_is_bridge(pcie))
return -ENODEV;

[PATCH v2 2/7] PCI: layerscape: Remove unused platform data

2016-10-12 Thread Bjorn Helgaas

The layerscape driver never uses the platform drvdata pointer, so don't
bother setting it.  No functional change intended.

Signed-off-by: Bjorn Helgaas 
---
 drivers/pci/host/pci-layerscape.c |2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/pci/host/pci-layerscape.c 
b/drivers/pci/host/pci-layerscape.c
index 08b511e..ebed415 100644
--- a/drivers/pci/host/pci-layerscape.c
+++ b/drivers/pci/host/pci-layerscape.c
@@ -267,8 +267,6 @@ static int __init ls_pcie_probe(struct platform_device 
*pdev)
if (ret < 0)
return ret;
 
-   platform_set_drvdata(pdev, pcie);
-
return 0;
 }

[PATCH v2 1/7] PCI: layerscape: Add local struct device pointers

2016-10-12 Thread Bjorn Helgaas

Use a local "struct device *dev" for brevity and consistency with other
drivers.  No functional change intended.

Signed-off-by: Bjorn Helgaas 
---
 drivers/pci/host/pci-layerscape.c |   26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/pci/host/pci-layerscape.c 
b/drivers/pci/host/pci-layerscape.c
index 114ba81..08b511e 100644
--- a/drivers/pci/host/pci-layerscape.c
+++ b/drivers/pci/host/pci-layerscape.c
@@ -106,18 +106,19 @@ static int ls1021_pcie_link_up(struct pcie_port *pp)
 
 static void ls1021_pcie_host_init(struct pcie_port *pp)
 {
+   struct device *dev = pp->dev;
struct ls_pcie *pcie = to_ls_pcie(pp);
u32 index[2];
 
-   pcie->scfg = syscon_regmap_lookup_by_phandle(pp->dev->of_node,
+   pcie->scfg = syscon_regmap_lookup_by_phandle(dev->of_node,
 "fsl,pcie-scfg");
if (IS_ERR(pcie->scfg)) {
-   dev_err(pp->dev, "No syscfg phandle specified\n");
+   dev_err(dev, "No syscfg phandle specified\n");
pcie->scfg = NULL;
return;
}
 
-   if (of_property_read_u32_array(pp->dev->of_node,
+   if (of_property_read_u32_array(dev->of_node,
   "fsl,pcie-scfg", index, 2)) {
pcie->scfg = NULL;
return;
@@ -158,8 +159,9 @@ static void ls_pcie_host_init(struct pcie_port *pp)
 static int ls_pcie_msi_host_init(struct pcie_port *pp,
 struct msi_controller *chip)
 {
+   struct device *dev = pp->dev;
+   struct device_node *np = dev->of_node;
struct device_node *msi_node;
-   struct device_node *np = pp->dev->of_node;
 
/*
 * The MSI domain is set by the generic of_msi_configure().  This
@@ -169,7 +171,7 @@ static int ls_pcie_msi_host_init(struct pcie_port *pp,
 */
msi_node = of_parse_phandle(np, "msi-parent", 0);
if (!msi_node) {
-   dev_err(pp->dev, "failed to find msi-parent\n");
+   dev_err(dev, "failed to find msi-parent\n");
return -EINVAL;
}
 
@@ -215,16 +217,17 @@ static const struct of_device_id ls_pcie_of_match[] = {
 static int __init ls_add_pcie_port(struct pcie_port *pp,
   struct platform_device *pdev)
 {
+   struct device *dev = >dev;
int ret;
struct ls_pcie *pcie = to_ls_pcie(pp);
 
-   pp->dev = >dev;
+   pp->dev = dev;
pp->dbi_base = pcie->dbi;
pp->ops = pcie->drvdata->ops;
 
ret = dw_pcie_host_init(pp);
if (ret) {
-   dev_err(pp->dev, "failed to initialize host\n");
+   dev_err(dev, "failed to initialize host\n");
return ret;
}
 
@@ -233,23 +236,24 @@ static int __init ls_add_pcie_port(struct pcie_port *pp,
 
 static int __init ls_pcie_probe(struct platform_device *pdev)
 {
+   struct device *dev = >dev;
const struct of_device_id *match;
struct ls_pcie *pcie;
struct resource *dbi_base;
int ret;
 
-   match = of_match_device(ls_pcie_of_match, >dev);
+   match = of_match_device(ls_pcie_of_match, dev);
if (!match)
return -ENODEV;
 
-   pcie = devm_kzalloc(>dev, sizeof(*pcie), GFP_KERNEL);
+   pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
if (!pcie)
return -ENOMEM;
 
dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
-   pcie->dbi = devm_ioremap_resource(>dev, dbi_base);
+   pcie->dbi = devm_ioremap_resource(dev, dbi_base);
if (IS_ERR(pcie->dbi)) {
-   dev_err(>dev, "missing *regs* space\n");
+   dev_err(dev, "missing *regs* space\n");
return PTR_ERR(pcie->dbi);
}

[PATCH v2 0/7] PCI: layerscape: Cleanups

2016-10-12 Thread Bjorn Helgaas

  - Add local "dev" pointers to reduce repetition of things like
">dev".

  - Remove platform drvdata because it appears unused (we called
platform_set_drvdata() but not platform_get_drvdata()).

  - Remove redundant struct members.

  - Pass device-specific struct to internal functions for consistency.

  - Move struct pcie_port setup to probe function for consistency.

  - Remove unused ls_add_pcie_port() platform_device argument.

Nothing here should change the behavior of the driver.

Changes from v1:
  I dropped the following patch because it was a lot of churn for
  questionable benefit:
PCI: layerscape: Name private struct pointer "ls" consistently

---

Bjorn Helgaas (7):
  PCI: layerscape: Add local struct device pointers
  PCI: layerscape: Remove unused platform data
  PCI: layerscape: Remove redundant struct ls_pcie.dbi
  PCI: layerscape: Pass device-specific struct to internal functions
  PCI: layerscape: Move struct pcie_port setup to probe function
  PCI: layerscape: Remove unused ls_add_pcie_port() platform_device arg
  PCI: layerscape: Reorder struct ls_pcie


 drivers/pci/host/pci-layerscape.c |   65 +++--
 1 file changed, 33 insertions(+), 32 deletions(-)

Re: [PATCH] powerpc: cmp -> cmpd for 64-bit

2016-10-12 Thread Segher Boessenkool

On Wed, Oct 12, 2016 at 02:05:19PM +1100, Michael Ellerman wrote:
> Segher Boessenkool  writes:
> 
> > PowerPC's "cmp" instruction has four operands.  Normally people write
> > "cmpw" or "cmpd" for the second cmp operand 0 or 1.  But, frequently
> > people forget, and write "cmp" with just three operands.
> >
> > With older binutils this is silently accepted as if this was "cmpw",
> > while often "cmpd" is wanted.  With newer binutils GAS will complain
> > about this for 64-bit code.  For 32-bit code it still silently assumes
> > "cmpw" is what is meant.
> 
> Thanks.
> 
> Anton already sent a fix for the two vdso ones, which were real bugs,
> and that's now in Linus' tree.

Ah cool.  You'll just need the one then (and many more for book4e, but
I cannot really handle that, other people can do that a lot better).


> > --- a/arch/powerpc/include/asm/cpuidle.h
> > +++ b/arch/powerpc/include/asm/cpuidle.h
> > @@ -26,7 +26,7 @@ extern u64 pnv_first_deep_stop_state;
> 
> #define   IDLE_STATE_ENTER_SEQ(IDLE_INST) \
>   /* Magic NAP/SLEEP/WINKLE mode enter sequence */\
> > std r0,0(r1);   \
> > ptesync;\
> > ld  r0,0(r1);   \
> > -1: cmp cr0,r0,r0;  \
> > +1: cmpdcr0,r0,r0;  \
> > bne 1b; \
> > IDLE_INST;  \
> > b   .
> 
> What's this one doing, is it a bug? I can't really tell without knowing
> what the magic sequence is meant to do.

It looks like it is making sure the ptesync is done.  The ld/cmp/bne
is the usual to make sure the ld is done, and in std/ptesync/ld the ld
won't be done before the ptesync is done.

The cmp always compares equal, of course, so both cmpw and cmpd would
work fine here.  cmpd looks better after ld ;-)


Segher

Re: [PATCH v3 08/11] powerpc/tracing: fix compat syscall handling

2016-10-12 Thread Marcin Nowakowski




On 12.10.2016 11:59, Michael Ellerman wrote:

I went to test this and noticed the exit and enter events appear to be
reversed in time? (your series on top of 24532f768121)


thanks for testing the patch - I've found a bug that has sneaked in 
while cleaning up the patches before submission ... I'll fix it in the 
next iteration.


Marcin

Re: [PATCH v3 03/11] tracing/syscalls: add compat syscall metadata

2016-10-12 Thread Marcin Nowakowski


On 12.10.2016 10:50, Michael Ellerman wrote:

<...>
It's annoying that we have to duplicate all that just to do a + 1.

How about this as a precursor?

> <...>

Thanks for the suggestion - unless anyone sees a reason to keep the 
current solution I'll change it.


Marcin

[PATCH v2 13/16] scsi: fc: use bsg_job_done

2016-10-12 Thread Johannes Thumshirn

fc_bsg_jobdone() and bsg_job_done() are 1:1 copies now so use the bsg-lib one
instead of the FC private implementation.

Signed-off-by: Johannes Thumshirn 
---
 drivers/s390/scsi/zfcp_fc.c  |  2 +-
 drivers/scsi/bfa/bfad_bsg.c  |  4 ++--
 drivers/scsi/ibmvscsi/ibmvfc.c   |  2 +-
 drivers/scsi/libfc/fc_lport.c|  4 ++--
 drivers/scsi/lpfc/lpfc_bsg.c | 38 +-
 drivers/scsi/qla2xxx/qla_bsg.c   | 44 
 drivers/scsi/scsi_transport_fc.c | 41 +++--
 include/scsi/scsi_transport_fc.h |  2 --
 8 files changed, 50 insertions(+), 87 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index b1b4129..a0f9c82 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -893,7 +893,7 @@ static void zfcp_fc_ct_els_job_handler(void *data)
jr->reply_payload_rcv_len = job->reply_payload.payload_len;
jr->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK;
jr->result = zfcp_ct_els->status ? -EIO : 0;
-   fc_bsg_jobdone(job, jr->result, jr->reply_payload_rcv_len);
+   bsg_job_done(job, jr->result, jr->reply_payload_rcv_len);
 }
 
 static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct bsg_job *job)
diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c
index cdc25e6..a9a0016 100644
--- a/drivers/scsi/bfa/bfad_bsg.c
+++ b/drivers/scsi/bfa/bfad_bsg.c
@@ -3179,7 +3179,7 @@ bfad_im_bsg_vendor_request(struct bsg_job *job)
bsg_reply->reply_payload_rcv_len = job->reply_payload.payload_len;
bsg_reply->result = rc;
 
-   fc_bsg_jobdone(job, bsg_reply->result,
+   bsg_job_done(job, bsg_reply->result,
   bsg_reply->reply_payload_rcv_len);
return rc;
 error:
@@ -3555,7 +3555,7 @@ out:
bsg_reply->result = rc;
 
if (rc == BFA_STATUS_OK)
-   fc_bsg_jobdone(job, bsg_reply->result,
+   bsg_job_done(job, bsg_reply->result,
   bsg_reply->reply_payload_rcv_len);
 
return rc;
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 9fd8975..85aa8ab 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1940,7 +1940,7 @@ static int ibmvfc_bsg_request(struct bsg_job *job)
ibmvfc_free_event(evt);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
bsg_reply->result = rc;
-   fc_bsg_jobdone(job, bsg_reply->result,
+   bsg_job_done(job, bsg_reply->result,
   bsg_reply->reply_payload_rcv_len);
rc = 0;
 out:
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 58a3ccb..40d9038 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -1911,7 +1911,7 @@ static void fc_lport_bsg_resp(struct fc_seq *sp, struct 
fc_frame *fp,
bsg_reply->result = (PTR_ERR(fp) == -FC_EX_CLOSED) ?
-ECONNABORTED : -ETIMEDOUT;
job->reply_len = sizeof(uint32_t);
-   fc_bsg_jobdone(job, bsg_reply->result,
+   bsg_job_done(job, bsg_reply->result,
   bsg_reply->reply_payload_rcv_len);
kfree(info);
return;
@@ -1946,7 +1946,7 @@ static void fc_lport_bsg_resp(struct fc_seq *sp, struct 
fc_frame *fp,
bsg_reply->reply_payload_rcv_len =
job->reply_payload.payload_len;
bsg_reply->result = 0;
-   fc_bsg_jobdone(job, bsg_reply->result,
+   bsg_job_done(job, bsg_reply->result,
   bsg_reply->reply_payload_rcv_len);
kfree(info);
}
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index ca21f25..a862437 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -371,7 +371,7 @@ lpfc_bsg_send_mgmt_cmd_cmp(struct lpfc_hba *phba,
 
if (job) {
bsg_reply->result = rc;
-   fc_bsg_jobdone(job, bsg_reply->result,
+   bsg_job_done(job, bsg_reply->result,
   bsg_reply->reply_payload_rcv_len);
}
return;
@@ -645,7 +645,7 @@ lpfc_bsg_rport_els_cmp(struct lpfc_hba *phba,
 
if (job) {
bsg_reply->result = rc;
-   fc_bsg_jobdone(job, bsg_reply->result,
+   bsg_job_done(job, bsg_reply->result,
   bsg_reply->reply_payload_rcv_len);
}
return;
@@ -1138,7 +1138,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct 
lpfc_sli_ring *pring,
job->dd_data = NULL;
/* complete the job back to userspace */
spin_unlock_irqrestore(>ct_ev_lock, flags);
-   fc_bsg_jobdone(job, bsg_reply->result,
+

[PATCH v2 10/16] scsi: change FC drivers to use 'struct bsg_job'

2016-10-12 Thread Johannes Thumshirn

Change FC drivers to use 'struct bsg_job' from bsg-lib.h instead of 'struct
fc_bsg_job' from scsi_transport_fc.h and remove 'struct fc_bsg_job'.

Signed-off-by: Johannes Thumshirn 
---
 drivers/s390/scsi/zfcp_ext.h |  4 +--
 drivers/s390/scsi/zfcp_fc.c  | 15 
 drivers/scsi/bfa/bfad_bsg.c  | 10 +++---
 drivers/scsi/bfa/bfad_im.h   |  4 +--
 drivers/scsi/ibmvscsi/ibmvfc.c   |  9 ++---
 drivers/scsi/libfc/fc_lport.c| 10 +++---
 drivers/scsi/lpfc/lpfc_bsg.c | 74 
 drivers/scsi/lpfc/lpfc_crtn.h|  4 +--
 drivers/scsi/qla2xxx/qla_bsg.c   | 61 +
 drivers/scsi/qla2xxx/qla_def.h   |  2 +-
 drivers/scsi/qla2xxx/qla_gbl.h   |  4 +--
 drivers/scsi/qla2xxx/qla_iocb.c  |  8 ++---
 drivers/scsi/qla2xxx/qla_isr.c   |  6 ++--
 drivers/scsi/qla2xxx/qla_mr.c|  5 +--
 drivers/scsi/scsi_transport_fc.c | 20 +--
 include/scsi/libfc.h |  2 +-
 include/scsi/scsi_transport_fc.h | 63 ++
 17 files changed, 138 insertions(+), 163 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 5b50065..ab163be 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -82,8 +82,8 @@ extern void zfcp_fc_link_test_work(struct work_struct *);
 extern void zfcp_fc_wka_ports_force_offline(struct zfcp_fc_wka_ports *);
 extern int zfcp_fc_gs_setup(struct zfcp_adapter *);
 extern void zfcp_fc_gs_destroy(struct zfcp_adapter *);
-extern int zfcp_fc_exec_bsg_job(struct fc_bsg_job *);
-extern int zfcp_fc_timeout_bsg_job(struct fc_bsg_job *);
+extern int zfcp_fc_exec_bsg_job(struct bsg_job *);
+extern int zfcp_fc_timeout_bsg_job(struct bsg_job *);
 extern void zfcp_fc_sym_name_update(struct work_struct *);
 extern unsigned int zfcp_fc_port_scan_backoff(void);
 extern void zfcp_fc_conditional_port_scan(struct zfcp_adapter *);
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 1977a66..b1b4129 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include "zfcp_ext.h"
@@ -885,7 +886,7 @@ out_free:
 
 static void zfcp_fc_ct_els_job_handler(void *data)
 {
-   struct fc_bsg_job *job = data;
+   struct bsg_job *job = data;
struct zfcp_fsf_ct_els *zfcp_ct_els = job->dd_data;
struct fc_bsg_reply *jr = job->reply;
 
@@ -895,7 +896,7 @@ static void zfcp_fc_ct_els_job_handler(void *data)
fc_bsg_jobdone(job, jr->result, jr->reply_payload_rcv_len);
 }
 
-static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct fc_bsg_job *job)
+static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct bsg_job *job)
 {
u32 preamble_word1;
u8 gs_type;
@@ -925,7 +926,7 @@ static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct 
fc_bsg_job *job)
 
 static void zfcp_fc_ct_job_handler(void *data)
 {
-   struct fc_bsg_job *job = data;
+   struct bsg_job *job = data;
struct zfcp_fc_wka_port *wka_port;
 
wka_port = zfcp_fc_job_wka_port(job);
@@ -934,7 +935,7 @@ static void zfcp_fc_ct_job_handler(void *data)
zfcp_fc_ct_els_job_handler(data);
 }
 
-static int zfcp_fc_exec_els_job(struct fc_bsg_job *job,
+static int zfcp_fc_exec_els_job(struct bsg_job *job,
struct zfcp_adapter *adapter)
 {
struct zfcp_fsf_ct_els *els = job->dd_data;
@@ -957,7 +958,7 @@ static int zfcp_fc_exec_els_job(struct fc_bsg_job *job,
return zfcp_fsf_send_els(adapter, d_id, els, job->req->timeout / HZ);
 }
 
-static int zfcp_fc_exec_ct_job(struct fc_bsg_job *job,
+static int zfcp_fc_exec_ct_job(struct bsg_job *job,
   struct zfcp_adapter *adapter)
 {
int ret;
@@ -980,7 +981,7 @@ static int zfcp_fc_exec_ct_job(struct fc_bsg_job *job,
return ret;
 }
 
-int zfcp_fc_exec_bsg_job(struct fc_bsg_job *job)
+int zfcp_fc_exec_bsg_job(struct bsg_job *job)
 {
struct Scsi_Host *shost;
struct zfcp_adapter *adapter;
@@ -1010,7 +1011,7 @@ int zfcp_fc_exec_bsg_job(struct fc_bsg_job *job)
}
 }
 
-int zfcp_fc_timeout_bsg_job(struct fc_bsg_job *job)
+int zfcp_fc_timeout_bsg_job(struct bsg_job *job)
 {
/* hardware tracks timeout, reset bsg timeout to not interfere */
return -EAGAIN;
diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c
index d3094270..cdc25e6 100644
--- a/drivers/scsi/bfa/bfad_bsg.c
+++ b/drivers/scsi/bfa/bfad_bsg.c
@@ -3130,7 +3130,7 @@ bfad_iocmd_handler(struct bfad_s *bfad, unsigned int cmd, 
void *iocmd,
 }
 
 static int
-bfad_im_bsg_vendor_request(struct fc_bsg_job *job)
+bfad_im_bsg_vendor_request(struct bsg_job *job)
 {
struct fc_bsg_request *bsg_request = job->request;
struct fc_bsg_reply *bsg_reply = job->reply;
@@ -3314,7 +3314,7 @@ bfad_fcxp_free_mem(struct bfad_s *bfad, struct 
bfad_buf_info

[PATCH v2 02/16] scsi: don't use fc_bsg_job::request and fc_bsg_job::reply directly

2016-10-12 Thread Johannes Thumshirn

Don't use fc_bsg_job::request and fc_bsg_job::reply directly, but use
helper variables bsg_request and bsg_reply. This will be helpfull  when
transitioning to bsg-lib.

Signed-off-by: Johannes Thumshirn 
---
 drivers/s390/scsi/zfcp_fc.c  |   9 +-
 drivers/scsi/bfa/bfad_bsg.c  |  40 +++---
 drivers/scsi/ibmvscsi/ibmvfc.c   |  22 ++--
 drivers/scsi/libfc/fc_lport.c|  23 ++--
 drivers/scsi/lpfc/lpfc_bsg.c | 194 +---
 drivers/scsi/qla2xxx/qla_bsg.c   | 264 ++-
 drivers/scsi/qla2xxx/qla_iocb.c  |   5 +-
 drivers/scsi/qla2xxx/qla_isr.c   |  46 ---
 drivers/scsi/qla2xxx/qla_mr.c|  10 +-
 drivers/scsi/scsi_transport_fc.c |  37 +++---
 10 files changed, 387 insertions(+), 263 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 237688a..4c4023f 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -900,8 +900,9 @@ static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct 
fc_bsg_job *job)
u32 preamble_word1;
u8 gs_type;
struct zfcp_adapter *adapter;
+   struct fc_bsg_request *bsg_request = job->request;
 
-   preamble_word1 = job->request->rqst_data.r_ct.preamble_word1;
+   preamble_word1 = bsg_request->rqst_data.r_ct.preamble_word1;
gs_type = (preamble_word1 & 0xff00) >> 24;
 
adapter = (struct zfcp_adapter *) job->shost->hostdata[0];
@@ -938,6 +939,7 @@ static int zfcp_fc_exec_els_job(struct fc_bsg_job *job,
 {
struct zfcp_fsf_ct_els *els = job->dd_data;
struct fc_rport *rport = job->rport;
+   struct fc_bsg_request *bsg_request = job->request;
struct zfcp_port *port;
u32 d_id;
 
@@ -949,7 +951,7 @@ static int zfcp_fc_exec_els_job(struct fc_bsg_job *job,
d_id = port->d_id;
put_device(>dev);
} else
-   d_id = ntoh24(job->request->rqst_data.h_els.port_id);
+   d_id = ntoh24(bsg_request->rqst_data.h_els.port_id);
 
els->handler = zfcp_fc_ct_els_job_handler;
return zfcp_fsf_send_els(adapter, d_id, els, job->req->timeout / HZ);
@@ -983,6 +985,7 @@ int zfcp_fc_exec_bsg_job(struct fc_bsg_job *job)
struct Scsi_Host *shost;
struct zfcp_adapter *adapter;
struct zfcp_fsf_ct_els *ct_els = job->dd_data;
+   struct fc_bsg_request *bsg_request = job->request;
 
shost = job->rport ? rport_to_shost(job->rport) : job->shost;
adapter = (struct zfcp_adapter *)shost->hostdata[0];
@@ -994,7 +997,7 @@ int zfcp_fc_exec_bsg_job(struct fc_bsg_job *job)
ct_els->resp = job->reply_payload.sg_list;
ct_els->handler_data = job;
 
-   switch (job->request->msgcode) {
+   switch (bsg_request->msgcode) {
case FC_BSG_RPT_ELS:
case FC_BSG_HST_ELS_NOLOGIN:
return zfcp_fc_exec_els_job(job, adapter);
diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c
index d1ad020..48366d8 100644
--- a/drivers/scsi/bfa/bfad_bsg.c
+++ b/drivers/scsi/bfa/bfad_bsg.c
@@ -3132,7 +3132,9 @@ bfad_iocmd_handler(struct bfad_s *bfad, unsigned int cmd, 
void *iocmd,
 static int
 bfad_im_bsg_vendor_request(struct fc_bsg_job *job)
 {
-   uint32_t vendor_cmd = job->request->rqst_data.h_vendor.vendor_cmd[0];
+   struct fc_bsg_request *bsg_request = job->request;
+   struct fc_bsg_reply *bsg_reply = job->reply;
+   uint32_t vendor_cmd = bsg_request->rqst_data.h_vendor.vendor_cmd[0];
struct bfad_im_port_s *im_port =
(struct bfad_im_port_s *) job->shost->hostdata[0];
struct bfad_s *bfad = im_port->bfad;
@@ -3175,8 +3177,8 @@ bfad_im_bsg_vendor_request(struct fc_bsg_job *job)
 
/* Fill the BSG job reply data */
job->reply_len = job->reply_payload.payload_len;
-   job->reply->reply_payload_rcv_len = job->reply_payload.payload_len;
-   job->reply->result = rc;
+   bsg_reply->reply_payload_rcv_len = job->reply_payload.payload_len;
+   bsg_reply->result = rc;
 
job->job_done(job);
return rc;
@@ -3184,9 +3186,9 @@ error:
/* free the command buffer */
kfree(payload_kbuf);
 out:
-   job->reply->result = rc;
+   bsg_reply->result = rc;
job->reply_len = sizeof(uint32_t);
-   job->reply->reply_payload_rcv_len = 0;
+   bsg_reply->reply_payload_rcv_len = 0;
return rc;
 }
 
@@ -3362,18 +3364,20 @@ bfad_im_bsg_els_ct_request(struct fc_bsg_job *job)
struct bfad_fcxp*drv_fcxp;
struct bfa_fcs_lport_s *fcs_port;
struct bfa_fcs_rport_s *fcs_rport;
-   uint32_t command_type = job->request->msgcode;
+   struct fc_bsg_request *bsg_request = bsg_request;
+   struct fc_bsg_reply *bsg_reply = job->reply;
+   uint32_t command_type = bsg_request->msgcode;
unsigned long flags;
struct bfad_buf_info *rsp_buf_info;
void *req_kbuf = NULL, *rsp_kbuf =

[PATCH v2 05/16] scsi: fc: provide fc_bsg_to_shost() helper

2016-10-12 Thread Johannes Thumshirn

Provide fc_bsg_to_shost() helper that will become handy when we're moving from
struct fc_bsg_job to a plain struct bsg_job. Also use this little helper in
the LLDDs.

Signed-off-by: Johannes Thumshirn 
---
 drivers/s390/scsi/zfcp_fc.c  |  4 +--
 drivers/scsi/bfa/bfad_bsg.c  |  6 ++---
 drivers/scsi/ibmvscsi/ibmvfc.c   |  4 +--
 drivers/scsi/libfc/fc_lport.c|  2 +-
 drivers/scsi/lpfc/lpfc_bsg.c | 32 
 drivers/scsi/qla2xxx/qla_bsg.c   | 54 
 drivers/scsi/scsi_transport_fc.c |  2 +-
 include/scsi/scsi_transport_fc.h |  5 
 8 files changed, 56 insertions(+), 53 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 87f6330..813c286 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -905,7 +905,7 @@ static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct 
fc_bsg_job *job)
preamble_word1 = bsg_request->rqst_data.r_ct.preamble_word1;
gs_type = (preamble_word1 & 0xff00) >> 24;
 
-   adapter = (struct zfcp_adapter *) job->shost->hostdata[0];
+   adapter = shost_priv(fc_bsg_to_shost(job));
 
switch (gs_type) {
case FC_FST_ALIAS:
@@ -987,7 +987,7 @@ int zfcp_fc_exec_bsg_job(struct fc_bsg_job *job)
struct zfcp_fsf_ct_els *ct_els = job->dd_data;
struct fc_bsg_request *bsg_request = job->request;
 
-   shost = job->rport ? rport_to_shost(job->rport) : job->shost;
+   shost = job->rport ? rport_to_shost(job->rport) : fc_bsg_to_shost(job);
adapter = (struct zfcp_adapter *)shost->hostdata[0];
 
if (!(atomic_read(>status) & ZFCP_STATUS_COMMON_OPEN))
diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c
index e49a6c8..d3094270 100644
--- a/drivers/scsi/bfa/bfad_bsg.c
+++ b/drivers/scsi/bfa/bfad_bsg.c
@@ -3135,8 +3135,7 @@ bfad_im_bsg_vendor_request(struct fc_bsg_job *job)
struct fc_bsg_request *bsg_request = job->request;
struct fc_bsg_reply *bsg_reply = job->reply;
uint32_t vendor_cmd = bsg_request->rqst_data.h_vendor.vendor_cmd[0];
-   struct bfad_im_port_s *im_port =
-   (struct bfad_im_port_s *) job->shost->hostdata[0];
+   struct bfad_im_port_s *im_port = shost_priv(fc_bsg_to_shost(job));
struct bfad_s *bfad = im_port->bfad;
struct request_queue *request_q = job->req->q;
void *payload_kbuf;
@@ -3358,8 +3357,7 @@ int
 bfad_im_bsg_els_ct_request(struct fc_bsg_job *job)
 {
struct bfa_bsg_data *bsg_data;
-   struct bfad_im_port_s *im_port =
-   (struct bfad_im_port_s *) job->shost->hostdata[0];
+   struct bfad_im_port_s *im_port = shost_priv(fc_bsg_to_shost(job));
struct bfad_s *bfad = im_port->bfad;
bfa_bsg_fcpt_t *bsg_fcpt;
struct bfad_fcxp*drv_fcxp;
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 1001d4a..f7b50af 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1702,7 +1702,7 @@ static void ibmvfc_bsg_timeout_done(struct ibmvfc_event 
*evt)
  **/
 static int ibmvfc_bsg_timeout(struct fc_bsg_job *job)
 {
-   struct ibmvfc_host *vhost = shost_priv(job->shost);
+   struct ibmvfc_host *vhost = shost_priv(fc_bsg_to_shost(job));
unsigned long port_id = (unsigned long)job->dd_data;
struct ibmvfc_event *evt;
struct ibmvfc_tmf *tmf;
@@ -1815,7 +1815,7 @@ unlock_out:
  **/
 static int ibmvfc_bsg_request(struct fc_bsg_job *job)
 {
-   struct ibmvfc_host *vhost = shost_priv(job->shost);
+   struct ibmvfc_host *vhost = shost_priv(fc_bsg_to_shost(job));
struct fc_rport *rport = job->rport;
struct ibmvfc_passthru_mad *mad;
struct ibmvfc_event *evt;
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 4bed7ec..c60fdb9 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -2087,7 +2087,7 @@ int fc_lport_bsg_request(struct fc_bsg_job *job)
struct fc_bsg_request *bsg_request = job->request;
struct fc_bsg_reply *bsg_reply = job->reply;
struct request *rsp = job->req->next_rq;
-   struct Scsi_Host *shost = job->shost;
+   struct Scsi_Host *shost = fc_bsg_to_shost(job);
struct fc_lport *lport = shost_priv(shost);
struct fc_rport *rport;
struct fc_rport_priv *rdata;
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 447a7af..bfcc37d 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -384,7 +384,7 @@ lpfc_bsg_send_mgmt_cmd_cmp(struct lpfc_hba *phba,
 static int
 lpfc_bsg_send_mgmt_cmd(struct fc_bsg_job *job)
 {
-   struct lpfc_vport *vport = (struct lpfc_vport *)job->shost->hostdata;
+   struct lpfc_vport *vport = shost_priv(fc_bsg_to_shost(job));
struct lpfc_hba *phba = vport->phba;
struct lpfc_rport_data *rdata =

[PATCH v2 03/16] scsi: fc: Export fc_bsg_jobdone and use it in FC drivers

2016-10-12 Thread Johannes Thumshirn

Export fc_bsg_jobdone so drivers can use it directly instead of doing
the round-trip via struct fc_bsg_job::job_done() and use it in the LLDDs.

As we've converted all LLDDs over to use fc_bsg_jobdone() directly,
we can remove the function pointer from struct fc_bsg_job as well.

Signed-off-by: Johannes Thumshirn 
---
 drivers/s390/scsi/zfcp_fc.c  |  2 +-
 drivers/scsi/bfa/bfad_bsg.c  |  4 ++--
 drivers/scsi/ibmvscsi/ibmvfc.c   |  2 +-
 drivers/scsi/libfc/fc_lport.c|  4 ++--
 drivers/scsi/lpfc/lpfc_bsg.c | 38 +-
 drivers/scsi/qla2xxx/qla_bsg.c   | 44 
 drivers/scsi/scsi_transport_fc.c |  5 ++---
 include/scsi/scsi_transport_fc.h |  2 +-
 8 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 4c4023f..40d8f06 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -892,7 +892,7 @@ static void zfcp_fc_ct_els_job_handler(void *data)
jr->reply_payload_rcv_len = job->reply_payload.payload_len;
jr->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK;
jr->result = zfcp_ct_els->status ? -EIO : 0;
-   job->job_done(job);
+   fc_bsg_jobdone(job);
 }
 
 static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct fc_bsg_job *job)
diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c
index 48366d8..25889b9 100644
--- a/drivers/scsi/bfa/bfad_bsg.c
+++ b/drivers/scsi/bfa/bfad_bsg.c
@@ -3180,7 +3180,7 @@ bfad_im_bsg_vendor_request(struct fc_bsg_job *job)
bsg_reply->reply_payload_rcv_len = job->reply_payload.payload_len;
bsg_reply->result = rc;
 
-   job->job_done(job);
+   fc_bsg_jobdone(job);
return rc;
 error:
/* free the command buffer */
@@ -3556,7 +3556,7 @@ out:
bsg_reply->result = rc;
 
if (rc == BFA_STATUS_OK)
-   job->job_done(job);
+   fc_bsg_jobdone(job);
 
return rc;
 }
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 8b55279..21c9d28 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1939,7 +1939,7 @@ static int ibmvfc_bsg_request(struct fc_bsg_job *job)
ibmvfc_free_event(evt);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
bsg_reply->result = rc;
-   job->job_done(job);
+   fc_bsg_jobdone(job);
rc = 0;
 out:
dma_unmap_sg(vhost->dev, job->request_payload.sg_list,
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index a1c12e7..8811fe0 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -1912,7 +1912,7 @@ static void fc_lport_bsg_resp(struct fc_seq *sp, struct 
fc_frame *fp,
-ECONNABORTED : -ETIMEDOUT;
job->reply_len = sizeof(uint32_t);
job->state_flags |= FC_RQST_STATE_DONE;
-   job->job_done(job);
+   fc_bsg_jobdone(job);
kfree(info);
return;
}
@@ -1947,7 +1947,7 @@ static void fc_lport_bsg_resp(struct fc_seq *sp, struct 
fc_frame *fp,
job->reply_payload.payload_len;
bsg_reply->result = 0;
job->state_flags |= FC_RQST_STATE_DONE;
-   job->job_done(job);
+   fc_bsg_jobdone(job);
kfree(info);
}
fc_frame_free(fp);
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 27b5930..1db9cca 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -371,7 +371,7 @@ lpfc_bsg_send_mgmt_cmd_cmp(struct lpfc_hba *phba,
 
if (job) {
bsg_reply->result = rc;
-   job->job_done(job);
+   fc_bsg_jobdone(job);
}
return;
 }
@@ -644,7 +644,7 @@ lpfc_bsg_rport_els_cmp(struct lpfc_hba *phba,
 
if (job) {
bsg_reply->result = rc;
-   job->job_done(job);
+   fc_bsg_jobdone(job);
}
return;
 }
@@ -1136,7 +1136,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct 
lpfc_sli_ring *pring,
job->dd_data = NULL;
/* complete the job back to userspace */
spin_unlock_irqrestore(>ct_ev_lock, flags);
-   job->job_done(job);
+   fc_bsg_jobdone(job);
spin_lock_irqsave(>ct_ev_lock, flags);
}
}
@@ -1361,7 +1361,7 @@ lpfc_bsg_hba_get_event(struct fc_bsg_job *job)
spin_unlock_irqrestore(>ct_ev_lock, flags);
job->dd_data = NULL;
bsg_reply->result = 0;
-   job->job_done(job);
+   fc_bsg_jobdone(job);
return 0;
 
 job_error:
@@ -1458,7 +1458,7 @@ lpfc_issue_ct_rsp_cmp(struct lpfc_hba *phba,
 
if (job) {

[PATCH v2 04/16] scsi: Unify interfaces of fc_bsg_jobdone and bsg_job_done

2016-10-12 Thread Johannes Thumshirn

Unify the interfaces of fc_bsg_jobdone and bsg_job_done. This will reduce the
diff when moving from 'struct fc_bsg_job' to a plain 'struct bsg_job' later
on.

Signed-off-by: Johannes Thumshirn 
---
 drivers/s390/scsi/zfcp_fc.c  |  2 +-
 drivers/scsi/bfa/bfad_bsg.c  |  6 ++--
 drivers/scsi/ibmvscsi/ibmvfc.c   |  3 +-
 drivers/scsi/libfc/fc_lport.c|  6 ++--
 drivers/scsi/lpfc/lpfc_bsg.c | 68 +++-
 drivers/scsi/qla2xxx/qla_bsg.c   | 66 +-
 drivers/scsi/scsi_transport_fc.c | 22 +++--
 include/scsi/scsi_transport_fc.h |  3 +-
 8 files changed, 116 insertions(+), 60 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 40d8f06..87f6330 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -892,7 +892,7 @@ static void zfcp_fc_ct_els_job_handler(void *data)
jr->reply_payload_rcv_len = job->reply_payload.payload_len;
jr->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK;
jr->result = zfcp_ct_els->status ? -EIO : 0;
-   fc_bsg_jobdone(job);
+   fc_bsg_jobdone(job, jr->result, jr->reply_payload_rcv_len);
 }
 
 static struct zfcp_fc_wka_port *zfcp_fc_job_wka_port(struct fc_bsg_job *job)
diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c
index 25889b9..e49a6c8 100644
--- a/drivers/scsi/bfa/bfad_bsg.c
+++ b/drivers/scsi/bfa/bfad_bsg.c
@@ -3180,7 +3180,8 @@ bfad_im_bsg_vendor_request(struct fc_bsg_job *job)
bsg_reply->reply_payload_rcv_len = job->reply_payload.payload_len;
bsg_reply->result = rc;
 
-   fc_bsg_jobdone(job);
+   fc_bsg_jobdone(job, bsg_reply->result,
+  bsg_reply->reply_payload_rcv_len);
return rc;
 error:
/* free the command buffer */
@@ -3556,7 +3557,8 @@ out:
bsg_reply->result = rc;
 
if (rc == BFA_STATUS_OK)
-   fc_bsg_jobdone(job);
+   fc_bsg_jobdone(job, bsg_reply->result,
+  bsg_reply->reply_payload_rcv_len);
 
return rc;
 }
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 21c9d28..1001d4a 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1939,7 +1939,8 @@ static int ibmvfc_bsg_request(struct fc_bsg_job *job)
ibmvfc_free_event(evt);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
bsg_reply->result = rc;
-   fc_bsg_jobdone(job);
+   fc_bsg_jobdone(job, bsg_reply->result,
+  bsg_reply->reply_payload_rcv_len);
rc = 0;
 out:
dma_unmap_sg(vhost->dev, job->request_payload.sg_list,
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 8811fe0..4bed7ec 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -1912,7 +1912,8 @@ static void fc_lport_bsg_resp(struct fc_seq *sp, struct 
fc_frame *fp,
-ECONNABORTED : -ETIMEDOUT;
job->reply_len = sizeof(uint32_t);
job->state_flags |= FC_RQST_STATE_DONE;
-   fc_bsg_jobdone(job);
+   fc_bsg_jobdone(job, bsg_reply->result,
+  bsg_reply->reply_payload_rcv_len);
kfree(info);
return;
}
@@ -1947,7 +1948,8 @@ static void fc_lport_bsg_resp(struct fc_seq *sp, struct 
fc_frame *fp,
job->reply_payload.payload_len;
bsg_reply->result = 0;
job->state_flags |= FC_RQST_STATE_DONE;
-   fc_bsg_jobdone(job);
+   fc_bsg_jobdone(job, bsg_reply->result,
+  bsg_reply->reply_payload_rcv_len);
kfree(info);
}
fc_frame_free(fp);
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 1db9cca..447a7af 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -371,7 +371,8 @@ lpfc_bsg_send_mgmt_cmd_cmp(struct lpfc_hba *phba,
 
if (job) {
bsg_reply->result = rc;
-   fc_bsg_jobdone(job);
+   fc_bsg_jobdone(job, bsg_reply->result,
+  bsg_reply->reply_payload_rcv_len);
}
return;
 }
@@ -644,7 +645,8 @@ lpfc_bsg_rport_els_cmp(struct lpfc_hba *phba,
 
if (job) {
bsg_reply->result = rc;
-   fc_bsg_jobdone(job);
+   fc_bsg_jobdone(job, bsg_reply->result,
+  bsg_reply->reply_payload_rcv_len);
}
return;
 }
@@ -1136,7 +1138,8 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct 
lpfc_sli_ring *pring,
job->dd_data = NULL;
/* complete the job back to userspace */
spin_unlock_irqrestore(>ct_ev_lock, flags);
-   fc_bsg_jobdone(job);
+

[PATCH v2 06/16] scsi: fc: provide fc_bsg_to_rport() helper

2016-10-12 Thread Johannes Thumshirn

Provide fc_bsg_to_rport() helper that will become handy when we're moving
from struct fc_bsg_job to a plain struct bsg_job. Also move all LLDDs to use
the new helper.

Signed-off-by: Johannes Thumshirn 
---
 drivers/s390/scsi/zfcp_fc.c  | 5 +++--
 drivers/scsi/ibmvscsi/ibmvfc.c   | 2 +-
 drivers/scsi/libfc/fc_lport.c| 4 ++--
 drivers/scsi/lpfc/lpfc_bsg.c | 4 ++--
 drivers/scsi/qla2xxx/qla_bsg.c   | 4 ++--
 drivers/scsi/scsi_transport_fc.c | 3 ++-
 include/scsi/scsi_transport_fc.h | 5 +
 7 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 813c286..1977a66 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -938,7 +938,7 @@ static int zfcp_fc_exec_els_job(struct fc_bsg_job *job,
struct zfcp_adapter *adapter)
 {
struct zfcp_fsf_ct_els *els = job->dd_data;
-   struct fc_rport *rport = job->rport;
+   struct fc_rport *rport = fc_bsg_to_rport(job);
struct fc_bsg_request *bsg_request = job->request;
struct zfcp_port *port;
u32 d_id;
@@ -986,8 +986,9 @@ int zfcp_fc_exec_bsg_job(struct fc_bsg_job *job)
struct zfcp_adapter *adapter;
struct zfcp_fsf_ct_els *ct_els = job->dd_data;
struct fc_bsg_request *bsg_request = job->request;
+   struct fc_rport *rport = fc_bsg_to_rport(job);
 
-   shost = job->rport ? rport_to_shost(job->rport) : fc_bsg_to_shost(job);
+   shost = rport ? rport_to_shost(rport) : fc_bsg_to_shost(job);
adapter = (struct zfcp_adapter *)shost->hostdata[0];
 
if (!(atomic_read(>status) & ZFCP_STATUS_COMMON_OPEN))
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index f7b50af..35114d9 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1816,7 +1816,7 @@ unlock_out:
 static int ibmvfc_bsg_request(struct fc_bsg_job *job)
 {
struct ibmvfc_host *vhost = shost_priv(fc_bsg_to_shost(job));
-   struct fc_rport *rport = job->rport;
+   struct fc_rport *rport = fc_bsg_to_rport(job);
struct ibmvfc_passthru_mad *mad;
struct ibmvfc_event *evt;
union ibmvfc_iu rsp_iu;
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index c60fdb9..156708a 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -2102,7 +2102,7 @@ int fc_lport_bsg_request(struct fc_bsg_job *job)
 
switch (bsg_request->msgcode) {
case FC_BSG_RPT_ELS:
-   rport = job->rport;
+   rport = fc_bsg_to_rport(job);
if (!rport)
break;
 
@@ -2112,7 +2112,7 @@ int fc_lport_bsg_request(struct fc_bsg_job *job)
break;
 
case FC_BSG_RPT_CT:
-   rport = job->rport;
+   rport = fc_bsg_to_rport(job);
if (!rport)
break;
 
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index bfcc37d..dae7cc3 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -386,7 +386,7 @@ lpfc_bsg_send_mgmt_cmd(struct fc_bsg_job *job)
 {
struct lpfc_vport *vport = shost_priv(fc_bsg_to_shost(job));
struct lpfc_hba *phba = vport->phba;
-   struct lpfc_rport_data *rdata = job->rport->dd_data;
+   struct lpfc_rport_data *rdata = fc_bsg_to_rport(job)->dd_data;
struct lpfc_nodelist *ndlp = rdata->pnode;
struct fc_bsg_reply *bsg_reply = job->reply;
struct ulp_bde64 *bpl = NULL;
@@ -660,7 +660,7 @@ lpfc_bsg_rport_els(struct fc_bsg_job *job)
 {
struct lpfc_vport *vport = shost_priv(fc_bsg_to_shost(job));
struct lpfc_hba *phba = vport->phba;
-   struct lpfc_rport_data *rdata = job->rport->dd_data;
+   struct lpfc_rport_data *rdata = fc_bsg_to_rport(job)->dd_data;
struct lpfc_nodelist *ndlp = rdata->pnode;
struct fc_bsg_request *bsg_request = job->request;
struct fc_bsg_reply *bsg_reply = job->reply;
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 109b852..917eafe 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -264,7 +264,7 @@ qla2x00_process_els(struct fc_bsg_job *bsg_job)
uint16_t nextlid = 0;
 
if (bsg_request->msgcode == FC_BSG_RPT_ELS) {
-   rport = bsg_job->rport;
+   rport = fc_bsg_to_rport(bsg_job);
fcport = *(fc_port_t **) rport->dd_data;
host = rport_to_shost(rport);
vha = shost_priv(host);
@@ -2485,7 +2485,7 @@ qla24xx_bsg_request(struct fc_bsg_job *bsg_job)
bsg_reply->reply_payload_rcv_len = 0;
 
if (bsg_request->msgcode == FC_BSG_RPT_ELS) {
-   rport = bsg_job->rport;
+   rport = fc_bsg_to_rport(bsg_job);
host = rport_to_shost(rport);
vha =

Re: [mm] c4344e8035: WARNING: CPU: 0 PID: 101 at mm/memory.c:303 __tlb_remove_page_size+0x25/0x99

2016-10-12 Thread Ye Xiaolong

On 10/12, Aneesh Kumar K.V wrote:
>kernel test robot <xiaolong...@intel.com> writes:
>
>> FYI, we noticed the following commit:
>>
>> https://github.com/0day-ci/linux 
>> Aneesh-Kumar-K-V/mm-Use-the-correct-page-size-when-removing-the-page/20161012-013446
>> commit c4344e80359420d7574b3b90fddf53311f1d24e6 ("mm: Remove the page size 
>> change check in tlb_remove_page")
>>
>> in testcase: boot
>>
>> on test machine: qemu-system-i386 -enable-kvm -cpu Haswell,+smep,+smap -m 
>> 360M
>>
>> caused below changes:
>>
>>
>> ++++
>> || eff764128d | c4344e8035 |
>> ++++
>> | boot_successes | 59 | 0  |
>> | boot_failures  | 0  | 43 |
>> | WARNING:at_mm/memory.c:#__tlb_remove_page_size | 0  | 43 |
>> | calltrace:SyS_execve   | 0  | 43 |
>> | calltrace:run_init_process | 0  | 21 |
>> ++++
>>
>>
>>
>> [4.096204] Write protecting the kernel text: 3148k
>> [4.096911] Write protecting the kernel read-only data: 1444k
>> [4.120357] [ cut here ]
>> [4.121078] WARNING: CPU: 0 PID: 101 at mm/memory.c:303 
>> __tlb_remove_page_size+0x25/0x99
>> [4.122380] Modules linked in:
>> [4.122788] CPU: 0 PID: 101 Comm: run-parts Not tainted 
>> 4.8.0-mm1-00315-gc4344e8 #5
>> [4.123956]  bd145dc4 b111e5e6 bd145de0 b10320dc 012f b10974d1 
>> bd145e70 c4954170
>> [4.125277]  c4954170 bd145df4 b103215f 0009   
>> bd145e04 b10974d1
>> [4.126424]  c4954170 bd145e70 bd145e14 b10263ca bd145e70 bd47bafc 
>> bd145e40 b109767a
>> [4.127622] Call Trace:
>
>Thanks for the report. The below change should fix this.
>
>commit 18c929e7cf672da617dc218c6265366bf78b1644
>Author: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com>
>Date:   Wed Oct 12 08:40:41 2016 +0530
>
>update mmu gather page size before flushing page table cache
>
>diff --git a/mm/memory.c b/mm/memory.c
>index 26d1ba8c87e6..7e7eccb82a2b 100644
>--- a/mm/memory.c
>+++ b/mm/memory.c
>@@ -526,7 +526,11 @@ void free_pgd_range(struct mmu_gather *tlb,
>   end -= PMD_SIZE;
>   if (addr > end - 1)
>   return;
>-
>+  /*
>+   * We add page table cache pages with PAGE_SIZE,
>+   * (see pte_free_tlb()), flush the tlb if we need
>+   */
>+  tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
>   pgd = pgd_offset(tlb->mm, addr);
>   do {
>   next = pgd_addr_end(addr, end);
>

Just applied this fix on top of commit c4344e8035 and confirmed that
reportedwarning is gone with this fix.

Tested-by: Xiaolong Ye <xiaolong...@intel.com>

=
compiler/kconfig/rootfs/sleep/tbox_group/testcase:
  
gcc-6/i386-randconfig-s1-201641/quantal-core-i386.cgz/1/vm-vp-quantal-i386/boot

commit:
  c4344e80359420d7574b3b90fddf53311f1d24e6
  384db818365c90b91d8bad80be188765e801cf58 ("update mmu gather page size before 
flushing page table cache")

c4344e80359420d7 384db818365c90b91d8bad80be
 --
   fail:runs  %reproductionfail:runs
   | | |
 24:24-100%:5 
dmesg.WARNING:at_mm/memory.c:#__tlb_remove_page_size

Thanks,
Xiaolong

[mm] c4344e8035: WARNING: CPU: 0 PID: 101 at mm/memory.c:303 __tlb_remove_page_size+0x25/0x99

2016-10-12 Thread kernel test robot

FYI, we noticed the following commit:

https://github.com/0day-ci/linux 
Aneesh-Kumar-K-V/mm-Use-the-correct-page-size-when-removing-the-page/20161012-013446
commit c4344e80359420d7574b3b90fddf53311f1d24e6 ("mm: Remove the page size 
change check in tlb_remove_page")

in testcase: boot

on test machine: qemu-system-i386 -enable-kvm -cpu Haswell,+smep,+smap -m 360M

caused below changes:


++++
|| eff764128d | c4344e8035 |
++++
| boot_successes | 59 | 0  |
| boot_failures  | 0  | 43 |
| WARNING:at_mm/memory.c:#__tlb_remove_page_size | 0  | 43 |
| calltrace:SyS_execve   | 0  | 43 |
| calltrace:run_init_process | 0  | 21 |
++++



[4.096204] Write protecting the kernel text: 3148k
[4.096911] Write protecting the kernel read-only data: 1444k
[4.120357] [ cut here ]
[4.121078] WARNING: CPU: 0 PID: 101 at mm/memory.c:303 
__tlb_remove_page_size+0x25/0x99
[4.122380] Modules linked in:
[4.122788] CPU: 0 PID: 101 Comm: run-parts Not tainted 
4.8.0-mm1-00315-gc4344e8 #5
[4.123956]  bd145dc4 b111e5e6 bd145de0 b10320dc 012f b10974d1 bd145e70 
c4954170
[4.125277]  c4954170 bd145df4 b103215f 0009   bd145e04 
b10974d1
[4.126424]  c4954170 bd145e70 bd145e14 b10263ca bd145e70 bd47bafc bd145e40 
b109767a
[4.127622] Call Trace:
[4.128255] [ cut here ]
[4.128261] WARNING: CPU: 0 PID: 103 at mm/memory.c:303 
__tlb_remove_page_size+0x25/0x99
[4.128261] Modules linked in:
[4.128264] CPU: 0 PID: 103 Comm: sh Not tainted 4.8.0-mm1-00315-gc4344e8 #5
[4.128268]  bd143dc4 b111e5e6 bd143de0 b10320dc 012f b10974d1 bd143e70 
c494cd00
[4.128271]  c494cd00 bd143df4 b103215f 0009   bd143e04 
b10974d1
[4.128274]  c494cd00 bd143e70 bd143e14 b10263ca bd143e70 bd47dafc bd143e40 
b109767a
[4.128275] Call Trace:
[4.128281]  [] dump_stack+0x16/0x18
[4.128284]  [] __warn+0xa5/0xbc
[4.128286]  [] ? __tlb_remove_page_size+0x25/0x99
[4.128288]  [] warn_slowpath_null+0x11/0x16
[4.128290]  [] __tlb_remove_page_size+0x25/0x99
[4.128293]  [] ___pte_free_tlb+0x57/0x66
[4.128295]  [] free_pgd_range+0x135/0x1d0
[4.128298]  [] setup_arg_pages+0x219/0x29a
[4.128302]  [] load_elf_binary+0x2ad/0x94a
[4.128305]  [] ? _copy_from_user+0x49/0x5c
[4.128307]  [] search_binary_handler+0x106/0x159
[4.128309]  [] do_execveat_common+0x3bf/0x4dc
[4.128311]  [] do_execve+0x14/0x16
[4.128313]  [] SyS_execve+0x16/0x18
[4.128316]  [] do_fast_syscall_32+0x8f/0xce
[4.128320]  [] sysenter_past_esp+0x47/0x75
[4.128322] ---[ end trace 816334aebb0eaffe ]---
[4.132981] [ cut here ]





Thanks,
Kernel Test Robot
#
# Automatically generated file; DO NOT EDIT.
# Linux/i386 4.8.0-mm1 Kernel Configuration
#
# CONFIG_64BIT is not set
CONFIG_X86_32=y
CONFIG_X86=y
CONFIG_INSTRUCTION_DECODER=y
CONFIG_OUTPUT_FORMAT="elf32-i386"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig"
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_MMU=y
CONFIG_ARCH_MMAP_RND_BITS_MIN=8
CONFIG_ARCH_MMAP_RND_BITS_MAX=16
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
CONFIG_NEED_SG_DMA_LENGTH=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_ARCH_SUPPORTS_UPROBES=y
CONFIG_FIX_EARLYCON_MEM=y
CONFIG_DEBUG_RODATA=y
CONFIG_PGTABLE_LEVELS=2
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_EXTABLE_SORT=y
CONFIG_THREAD_INFO_IN_TASK=y

#
# General setup
#
CONFIG_BROKEN_ON_SMP=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_CROSS_COMPILE=""
# CONFIG_COMPILE_TEST is not set
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_HAVE_KERNEL_XZ=y
CONFIG_HAVE_KERNEL_LZO=y
CONFIG_HAVE_KERNEL_LZ4=y
# CONFIG_KERNEL_GZIP is not set
# CONFIG_KERNEL_BZIP2 is not set
# CONFIG_KERNEL_LZMA

Re: [PATCH v3 08/11] powerpc/tracing: fix compat syscall handling

2016-10-12 Thread Michael Ellerman

Marcin Nowakowski  writes:

> Adapt the code to make use of new syscall handling interface
>
> Signed-off-by: Marcin Nowakowski 
> Cc: Steven Rostedt 
> Cc: Ingo Molnar 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: linuxppc-dev@lists.ozlabs.org
> ---
>  arch/powerpc/include/asm/ftrace.h | 11 +++
>  arch/powerpc/kernel/ftrace.c  |  4 

I went to test this and noticed the exit and enter events appear to be
reversed in time? (your series on top of 24532f768121)

  ls-4221  [003] 83.766113: compat_sys_rt_sigprocmask -> 0x2
  ls-4221  [003] 83.766137: compat_sys_rt_sigprocmask(how: 2, nset: 
1010db30, oset: 0, sigsetsize: 8)
  ls-4221  [003] 83.766175: compat_sys_rt_sigaction -> 0x14
  ls-4221  [003] 83.766175: compat_sys_rt_sigaction(sig: 14, act: 
ffbd33c4, oact: ffbd3338, sigsetsize: 8)
  ls-4221  [003] 83.766177: compat_sys_rt_sigaction -> 0x15
  ls-4221  [003] 83.766177: compat_sys_rt_sigaction(sig: 15, act: 
ffbd33c4, oact: ffbd3338, sigsetsize: 8)
  ls-4221  [003] 83.766178: compat_sys_rt_sigaction -> 0x16
  ls-4221  [003] 83.766178: compat_sys_rt_sigaction(sig: 16, act: 
ffbd33d4, oact: ffbd3348, sigsetsize: 8)
  ls-4221  [003] 83.766179: sys_setpgid -> 0x107d
  ls-4221  [003] 83.766179: sys_setpgid(pid: 107d, pgid: 107d)
  ls-4221  [003] 83.766180: compat_sys_rt_sigprocmask -> 0x0
  ls-4221  [003] 83.766181: compat_sys_rt_sigprocmask(how: 0, nset: 
ffbd34b0, oset: ffbd3530, sigsetsize: 8)
  ls-4221  [003] 83.766186: compat_sys_ioctl -> 0xff
  ls-4221  [003] 83.766187: compat_sys_ioctl(fd: ff, cmd: 80047476, 
arg32: ffbd3488)
  ls-4221  [003] 83.766188: compat_sys_rt_sigprocmask -> 0x2
  ls-4221  [003] 83.766189: compat_sys_rt_sigprocmask(how: 2, nset: 
ffbd3530, oset: 0, sigsetsize: 8)
  ls-4221  [003] 83.766189: sys_close -> 0x4
  ls-4221  [003] 83.766190: sys_close(fd: 4)
  ls-4221  [003] 83.766191: sys_read -> 0x3
  ls-4221  [003] 83.766191: sys_read(fd: 3, buf: ffbd35dc, count: 1)
  ls-4221  [003] 83.766235: sys_close -> 0x3
  ls-4221  [003] 83.766235: sys_close(fd: 3)

cheers

Re: [PATCH v3 03/11] tracing/syscalls: add compat syscall metadata

2016-10-12 Thread Michael Ellerman

Marcin Nowakowski  writes:

> Now that compat syscalls are properly distinguished from native calls,
> we can add metadata for compat syscalls as well.
> All the macros used to generate the metadata are the same as for
> standard syscalls, but with a compat_ prefix to distinguish them easily.
>
> Signed-off-by: Marcin Nowakowski 
> Cc: Steven Rostedt 
> Cc: Ingo Molnar 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: linuxppc-dev@lists.ozlabs.org
> ---
>  arch/powerpc/include/asm/ftrace.h | 15 +---
>  include/linux/compat.h| 74 
> +++
>  kernel/trace/trace_syscalls.c |  8 +++--
>  3 files changed, 90 insertions(+), 7 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/ftrace.h 
> b/arch/powerpc/include/asm/ftrace.h
> index 686c5f7..9697a73 100644
> --- a/arch/powerpc/include/asm/ftrace.h
> +++ b/arch/powerpc/include/asm/ftrace.h
> @@ -73,12 +73,17 @@ struct dyn_arch_ftrace {
>  static inline bool arch_syscall_match_sym_name(const char *sym, const char 
> *name)
>  {
>   /*
> -  * Compare the symbol name with the system call name. Skip the .sys or 
> .SyS
> -  * prefix from the symbol name and the sys prefix from the system call 
> name and
> -  * just match the rest. This is only needed on ppc64 since symbol names 
> on
> -  * 32bit do not start with a period so the generic function will work.
> +  * Compare the symbol name with the system call name. Skip the .sys,
> +  * .SyS or .compat_sys prefix from the symbol name and the sys prefix
> +  * from the system call name and just match the rest. This is only
> +  * needed on ppc64 since symbol names on 32bit do not start with a
> +  * period so the generic function will work.
>*/
> - return !strcmp(sym + 4, name + 3);
> + int prefix_len = 3;
> +
> + if (!strncasecmp(name, "compat_", 7))
> + prefix_len = 10;
> + return !strcmp(sym + prefix_len + 1, name + prefix_len);
>  }

It's annoying that we have to duplicate all that just to do a + 1.

How about this as a precursor?

cheers


diff --git a/Documentation/trace/ftrace-design.txt 
b/Documentation/trace/ftrace-design.txt
index dd5f916b351d..bd65f2adeb09 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -226,10 +226,6 @@ You need very few things to get the syscalls tracing in an 
arch.
 - If the system call table on this arch is more complicated than a simple array
   of addresses of the system calls, implement an arch_syscall_addr to return
   the address of a given system call.
-- If the symbol names of the system calls do not match the function names on
-  this arch, define ARCH_HAS_SYSCALL_MATCH_SYM_NAME in asm/ftrace.h and
-  implement arch_syscall_match_sym_name with the appropriate logic to return
-  true if the function name corresponds with the symbol name.
 - Tag this arch as HAVE_SYSCALL_TRACEPOINTS.
 
 
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 686c5f70eb84..dc48f5b2878d 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -60,6 +60,12 @@ struct dyn_arch_ftrace {
struct module *mod;
 };
 #endif /*  CONFIG_DYNAMIC_FTRACE */
+
+#ifdef PPC64_ELF_ABI_v1
+/* On ppc64 ABIv1 (BE) we have to skip the leading '.' in the symbol name */
+#define ARCH_SYM_NAME_SKIP_CHARS 1
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
@@ -67,20 +73,4 @@ struct dyn_arch_ftrace {
 #endif
 #endif
 
-#if defined(CONFIG_FTRACE_SYSCALLS) && !defined(__ASSEMBLY__)
-#ifdef PPC64_ELF_ABI_v1
-#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
-static inline bool arch_syscall_match_sym_name(const char *sym, const char 
*name)
-{
-   /*
-* Compare the symbol name with the system call name. Skip the .sys or 
.SyS
-* prefix from the symbol name and the sys prefix from the system call 
name and
-* just match the rest. This is only needed on ppc64 since symbol names 
on
-* 32bit do not start with a period so the generic function will work.
-*/
-   return !strcmp(sym + 4, name + 3);
-}
-#endif
-#endif /* CONFIG_FTRACE_SYSCALLS && !__ASSEMBLY__ */
-
 #endif /* _ASM_POWERPC_FTRACE */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index b2b6efc083a4..91a7315dbe43 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -31,8 +31,11 @@ extern struct syscall_metadata *__stop_syscalls_metadata[];
 
 static struct syscall_metadata **syscalls_metadata;
 
-#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
-static inline bool arch_syscall_match_sym_name(const char *sym, const char 
*name)
+#ifndef ARCH_SYM_NAME_SKIP_CHARS
+#define ARCH_SYM_NAME_SKIP_CHARS 0
+#endif
+
+static

Re: [PATCH] powerpc/64: option to force run-at-load to test relocation

2016-10-12 Thread Balbir Singh



On 12/10/16 17:57, Nicholas Piggin wrote:
> This adds a config option that can help exercise the case when
> the kernel is not running at PAGE_OFFSET.
> 
> Signed-off-by: Nicholas Piggin 
> ---
>  arch/powerpc/Kconfig   | 9 +
>  arch/powerpc/kernel/head_64.S  | 4 
>  arch/powerpc/kernel/setup-common.c | 3 +++
>  3 files changed, 16 insertions(+)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 65fba4c..5d43cb8 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -478,6 +478,15 @@ config RELOCATABLE
> setting can still be useful to bootwrappers that need to know the
> load address of the kernel (eg. u-boot/mkimage).
>  
> +config RELOCATABLE_TEST
> + bool "Test relocatable kernel"
> + depends on (PPC64 && RELOCATABLE)
> + default n
> + help
> +   This runs the relocatable kernel at the address it was initially
> +   loaded at, which tends to be non-zero and therefore test the
> +   relocation code.
> +
>  config CRASH_DUMP
>   bool "Build a kdump crash kernel"
>   depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP)
> diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
> index 79da0641..bc9ceac 100644
> --- a/arch/powerpc/kernel/head_64.S
> +++ b/arch/powerpc/kernel/head_64.S
> @@ -111,8 +111,12 @@ __secondary_hold_acknowledge:
>   .globl  __run_at_load
>  __run_at_load:
>  DEFINE_FIXED_SYMBOL(__run_at_load)
> +#ifdef CONFIG_RELOCATABLE_TEST
> + .long   0x1 /* Test relocation, do not relocate to 0 */
> +#else
>   .long   0x72756e30  /* "run0" -- relocate to 0 by default */
>  #endif
> +#endif

Could we do something like

config RELOCATION_VALUE
default 0x72756e30
default 1  if CONFIG_RELOCTABLE_TEST

and then get

.long CONFIG_RELOCATION_VALUE




>  
>   . = 0x60
>  /*
> diff --git a/arch/powerpc/kernel/setup-common.c 
> b/arch/powerpc/kernel/setup-common.c
> index dba265c..18e0f19 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -795,6 +795,9 @@ static __init void print_system_info(void)
>   pr_info("mmu_features  = 0x%08x\n", cur_cpu_spec->mmu_features);
>  #ifdef CONFIG_PPC64
>   pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
> +
> + if (get_paca()->kernelbase != PAGE_OFFSET)
> + pr_info("kernelbase= 0x%llx\n", get_paca()->kernelbase);
>  #endif
>  

Do we need this? We get physical_offset if we are relocated.

>  #ifdef CONFIG_PPC_STD_MMU_64
> 

Balbir Singh.

[PATCH] powerpc: link error on orphan sections

2016-10-12 Thread Nicholas Piggin

Add --orphan-handling=error to final link flags. This ensures we have to
handle all sections. This would have caught subtle breakage such as
7de3b27bac47da9de08409df1d69664acbb72197 at build-time.

Also bring some wayward sections into the fold:
- .text.hot and .text.unlikely are compiler generated sections.
- .sfpr is a linker generated section for register save functions.
- .sdata2, .dynsbss, .plt are used by PPC32
- We previously did not specify DWARF_DEBUG or STABS_DEBUG
- DWARF_DEBUG did not include DWARF3 .debug_ranges
- A number of sections are unused.

I don't know if I've exactly got everything right here, particularly
with ppc32, so would appreciate people casting their eye over it.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/Makefile |  2 +-
 arch/powerpc/kernel/vmlinux.lds.S | 16 ++--
 include/asm-generic/vmlinux.lds.h |  3 +++
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 50d020a..a3f2784 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -90,7 +90,7 @@ endif
 
 LDFLAGS_vmlinux-y := -Bstatic
 LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
-LDFLAGS_vmlinux:= $(LDFLAGS_vmlinux-y)
+LDFLAGS_vmlinux:= $(LDFLAGS_vmlinux-y) --orphan-handling=error
 
 ifeq ($(CONFIG_PPC64),y)
 ifeq ($(call cc-option-yn,-mcmodel=medium),y)
diff --git a/arch/powerpc/kernel/vmlinux.lds.S 
b/arch/powerpc/kernel/vmlinux.lds.S
index 8295f51..9f4d85e 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -97,7 +97,7 @@ SECTIONS
.text : AT(ADDR(.text) - LOAD_OFFSET) {
ALIGN_FUNCTION();
/* careful! __ftr_alt_* sections need to be close to .text */
-   *(.text .fixup __ftr_alt_* .ref.text)
+   *(.text.hot .text .text.fixup .text.unlikely .fixup __ftr_alt_* 
.ref.text .sfpr) \
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
@@ -256,7 +256,9 @@ SECTIONS
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
*(.sdata)
+   *(.sdata2)
*(.got.plt) *(.got)
+   *(.plt)
}
 #else
.data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -317,6 +319,16 @@ SECTIONS
_end = . ;
PROVIDE32 (end = .);
 
-   /* Sections to be discarded. */
+   STABS_DEBUG
+
+   DWARF_DEBUG
+
DISCARDS
+   /DISCARD/ : {
+   *(*.EMB.apuinfo)
+   *(.glink .iplt .plt .rela* .comment)
+   *(.gnu.version*)
+   *(.gnu.attributes)
+   *(.eh_frame)
+   }
 }
diff --git a/include/asm-generic/vmlinux.lds.h 
b/include/asm-generic/vmlinux.lds.h
index 3e42bcd..264ebb3 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -581,6 +581,7 @@
 #define SBSS(sbss_align)   \
. = ALIGN(sbss_align);  \
.sbss : AT(ADDR(.sbss) - LOAD_OFFSET) { \
+   *(.dynsbss) \
*(.sbss)\
*(.scommon) \
}
@@ -627,6 +628,8 @@
.debug_str  0 : { *(.debug_str) }   \
.debug_loc  0 : { *(.debug_loc) }   \
.debug_macinfo  0 : { *(.debug_macinfo) }   \
+   /* DWARF 3 */   \
+   .debug_ranges   0 : { *(.debug_ranges) }\
/* SGI/MIPS DWARF 2 extensions */   \
.debug_weaknames 0 : { *(.debug_weaknames) }\
.debug_funcnames 0 : { *(.debug_funcnames) }\
-- 
2.9.3

[PATCH] powerpc/64: option to force run-at-load to test relocation

2016-10-12 Thread Nicholas Piggin

This adds a config option that can help exercise the case when
the kernel is not running at PAGE_OFFSET.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/Kconfig   | 9 +
 arch/powerpc/kernel/head_64.S  | 4 
 arch/powerpc/kernel/setup-common.c | 3 +++
 3 files changed, 16 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 65fba4c..5d43cb8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -478,6 +478,15 @@ config RELOCATABLE
  setting can still be useful to bootwrappers that need to know the
  load address of the kernel (eg. u-boot/mkimage).
 
+config RELOCATABLE_TEST
+   bool "Test relocatable kernel"
+   depends on (PPC64 && RELOCATABLE)
+   default n
+   help
+ This runs the relocatable kernel at the address it was initially
+ loaded at, which tends to be non-zero and therefore test the
+ relocation code.
+
 config CRASH_DUMP
bool "Build a kdump crash kernel"
depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP)
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 79da0641..bc9ceac 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -111,8 +111,12 @@ __secondary_hold_acknowledge:
.globl  __run_at_load
 __run_at_load:
 DEFINE_FIXED_SYMBOL(__run_at_load)
+#ifdef CONFIG_RELOCATABLE_TEST
+   .long   0x1 /* Test relocation, do not relocate to 0 */
+#else
.long   0x72756e30  /* "run0" -- relocate to 0 by default */
 #endif
+#endif
 
. = 0x60
 /*
diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index dba265c..18e0f19 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -795,6 +795,9 @@ static __init void print_system_info(void)
pr_info("mmu_features  = 0x%08x\n", cur_cpu_spec->mmu_features);
 #ifdef CONFIG_PPC64
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
+
+   if (get_paca()->kernelbase != PAGE_OFFSET)
+   pr_info("kernelbase= 0x%llx\n", get_paca()->kernelbase);
 #endif
 
 #ifdef CONFIG_PPC_STD_MMU_64
-- 
2.9.3

59 matches

Mail list logo