[PATCH] include/kcore: Remove left-over instances of 'kclist_add_remap()'

2019-03-25 Thread Bhupesh Sharma
Commit bf904d2762ee ("x86/pti/64: Remove the SYSCALL64 entry trampoline")
removed the sole usage of 'kclist_add_remap()' from
'arch/x86/mm/cpu_entry_area.c', but it did not remove the left-over
definition from the include file.

Fix the same.

Cc: James Morse 
Cc: Boris Petkov 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Michael Ellerman 
Cc: Dave Anderson 
Cc: Dave Young 
Cc: x...@kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-ker...@vger.kernel.org
Cc: ke...@lists.infradead.org
Signed-off-by: Bhupesh Sharma 
---
 include/linux/kcore.h | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index c843f4a9c512..da676cdbd727 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -38,12 +38,6 @@ struct vmcoredd_node {
 
 #ifdef CONFIG_PROC_KCORE
 void __init kclist_add(struct kcore_list *, void *, size_t, int type);
-static inline
-void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
-{
-   m->vaddr = (unsigned long)vaddr;
-   kclist_add(m, addr, sz, KCORE_REMAP);
-}
 
 extern int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn));
 #else
@@ -51,11 +45,6 @@ static inline
 void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
 {
 }
-
-static inline
-void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
-{
-}
 #endif
 
 #endif /* _LINUX_KCORE_H */
-- 
2.7.4



[PATCH v2 3/3] powerpc/mm: print hash info in a helper

2019-03-25 Thread Christophe Leroy
Reduce #ifdef mess by defining a helper to print
hash info at startup.

In the meantime, remove the display of hash table address
to reduce leak of non necessary information.

Signed-off-by: Christophe Leroy 
---
v2: added header to avoid sparse warning

 arch/powerpc/kernel/setup-common.c | 19 +--
 arch/powerpc/mm/hash_utils_64.c| 10 ++
 arch/powerpc/mm/mmu_decl.h |  5 -
 arch/powerpc/mm/ppc_mmu_32.c   |  9 -
 4 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 2e5dfb6e0823..f24a74f7912d 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -799,12 +799,6 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
 static __init void print_system_info(void)
 {
pr_info("-\n");
-#ifdef CONFIG_PPC_BOOK3S_64
-   pr_info("ppc64_pft_size= 0x%llx\n", ppc64_pft_size);
-#endif
-#ifdef CONFIG_PPC_BOOK3S_32
-   pr_info("Hash_size = 0x%lx\n", Hash_size);
-#endif
pr_info("phys_mem_size = 0x%llx\n",
(unsigned long long)memblock_phys_mem_size());
 
@@ -826,18 +820,7 @@ static __init void print_system_info(void)
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
 #endif
 
-#ifdef CONFIG_PPC_BOOK3S_64
-   if (htab_address)
-   pr_info("htab_address  = 0x%p\n", htab_address);
-   if (htab_hash_mask)
-   pr_info("htab_hash_mask= 0x%lx\n", htab_hash_mask);
-#endif
-#ifdef CONFIG_PPC_BOOK3S_32
-   if (Hash)
-   pr_info("Hash  = 0x%p\n", Hash);
-   if (Hash_mask)
-   pr_info("Hash_mask = 0x%lx\n", Hash_mask);
-#endif
+   print_system_hash_info();
 
if (PHYSICAL_START > 0)
pr_info("physical_start= 0x%llx\n",
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0a4f939a8161..cef2d05fdb9b 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -65,6 +65,8 @@
 #include 
 #include 
 
+#include "mmu_decl.h"
+
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
 #else
@@ -1909,3 +1911,11 @@ static int __init hash64_debugfs(void)
 }
 machine_device_initcall(pseries, hash64_debugfs);
 #endif /* CONFIG_DEBUG_FS */
+
+void __init print_system_hash_info(void)
+{
+   pr_info("ppc64_pft_size= 0x%llx\n", ppc64_pft_size);
+
+   if (htab_hash_mask)
+   pr_info("htab_hash_mask= 0x%lx\n", htab_hash_mask);
+}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index f7f1374ba3ee..dc617ade83ab 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -83,6 +83,8 @@ static inline void _tlbivax_bcast(unsigned long address, 
unsigned int pid,
 }
 #endif
 
+static inline void print_system_hash_info(void) {}
+
 #else /* CONFIG_PPC_MMU_NOHASH */
 
 extern void hash_preload(struct mm_struct *mm, unsigned long ea,
@@ -92,6 +94,8 @@ extern void hash_preload(struct mm_struct *mm, unsigned long 
ea,
 extern void _tlbie(unsigned long address);
 extern void _tlbia(void);
 
+void print_system_hash_info(void);
+
 #endif /* CONFIG_PPC_MMU_NOHASH */
 
 #ifdef CONFIG_PPC32
@@ -105,7 +109,6 @@ extern unsigned int rtas_data, rtas_size;
 
 struct hash_pte;
 extern struct hash_pte *Hash;
-extern unsigned long Hash_size, Hash_mask;
 
 #endif /* CONFIG_PPC32 */
 
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 088f14d57cce..864096489b6d 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -37,7 +37,7 @@
 #include "mmu_decl.h"
 
 struct hash_pte *Hash;
-unsigned long Hash_size, Hash_mask;
+static unsigned long Hash_size, Hash_mask;
 unsigned long _SDR1;
 
 struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */
@@ -392,3 +392,10 @@ void setup_initial_memory_limit(phys_addr_t 
first_memblock_base,
else /* Anything else has 256M mapped */
memblock_set_current_limit(min_t(u64, first_memblock_size, 
0x1000));
 }
+
+void __init print_system_hash_info(void)
+{
+   pr_info("Hash_size = 0x%lx\n", Hash_size);
+   if (Hash_mask)
+   pr_info("Hash_mask = 0x%lx\n", Hash_mask);
+}
-- 
2.13.3



[PATCH v2 2/3] powerpc/32s: don't try to print hash table address.

2019-03-25 Thread Christophe Leroy
Due to %p, (ptrval) is printed in lieu of the hash table address.

showing the hash table address isn't an operationnal need so just
don't print it.

Signed-off-by: Christophe Leroy 
---
v2: no change

 arch/powerpc/mm/ppc_mmu_32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 302409fcfbc7..088f14d57cce 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -345,8 +345,8 @@ void __init MMU_init_hw(void)
  __func__, Hash_size, Hash_size);
_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
 
-   printk("Total memory = %lldMB; using %ldkB for hash table (at %p)\n",
-  (unsigned long long)(total_memory >> 20), Hash_size >> 10, Hash);
+   pr_info("Total memory = %lldMB; using %ldkB for hash table\n",
+   (unsigned long long)(total_memory >> 20), Hash_size >> 10);
 
 
/*
-- 
2.13.3



[PATCH v2 1/3] powerpc/32s: drop Hash_end

2019-03-25 Thread Christophe Leroy
Hash_end has never been used, drop it.

Signed-off-by: Christophe Leroy 
---
v2: no change

 arch/powerpc/mm/mmu_decl.h   | 2 +-
 arch/powerpc/mm/ppc_mmu_32.c | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 74ff61dabcb1..f7f1374ba3ee 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -104,7 +104,7 @@ extern int __map_without_bats;
 extern unsigned int rtas_data, rtas_size;
 
 struct hash_pte;
-extern struct hash_pte *Hash, *Hash_end;
+extern struct hash_pte *Hash;
 extern unsigned long Hash_size, Hash_mask;
 
 #endif /* CONFIG_PPC32 */
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index f29d2f118b44..302409fcfbc7 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -36,7 +36,7 @@
 
 #include "mmu_decl.h"
 
-struct hash_pte *Hash, *Hash_end;
+struct hash_pte *Hash;
 unsigned long Hash_size, Hash_mask;
 unsigned long _SDR1;
 
@@ -345,8 +345,6 @@ void __init MMU_init_hw(void)
  __func__, Hash_size, Hash_size);
_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
 
-   Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
-
printk("Total memory = %lldMB; using %ldkB for hash table (at %p)\n",
   (unsigned long long)(total_memory >> 20), Hash_size >> 10, Hash);
 
-- 
2.13.3



Re: [PATCH 3/7] powerpc/setup: define cpu_pvr at all time

2019-03-25 Thread Christophe Leroy




Le 22/03/2019 à 09:08, Christophe Leroy a écrit :

To avoid ifdefs, define cpu_pvr at all time.

Signed-off-by: Christophe Leroy 


This patch introduces a sparse warning.

I guess we can skip it for now and rework more deeply the use of cpu_pvr 
versus SPRN_PVR which is re-read in many places in the code.


Christophe


---
  arch/powerpc/kernel/setup-common.c | 12 +---
  1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index a90e8367ccde..a4ed9301e815 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -190,9 +190,7 @@ void machine_halt(void)
machine_hang();
  }
  
-#ifdef CONFIG_SMP

  DEFINE_PER_CPU(unsigned int, cpu_pvr);
-#endif
  
  static void show_cpuinfo_summary(struct seq_file *m)

  {
@@ -234,11 +232,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
unsigned short maj;
unsigned short min;
  
-#ifdef CONFIG_SMP

-   pvr = per_cpu(cpu_pvr, cpu_id);
-#else
-   pvr = mfspr(SPRN_PVR);
-#endif
+   if (IS_ENABLED(CONFIG_SMP))
+   pvr = per_cpu(cpu_pvr, cpu_id);
+   else
+   pvr = mfspr(SPRN_PVR);
+
maj = (pvr >> 8) & 0xFF;
min = pvr & 0xFF;
  



Re: [PATCH v3] powerpc/8xx: fix possible object reference leak

2019-03-25 Thread Christophe Leroy




Le 26/03/2019 à 11:29, Peng Hao a écrit :

Could you fix your clock or clock setup ?

This emails appears to have been sent today at 11:29 (Paris Time ie 
GMT+1) allthough it is only 7am at the time being.


Christophe


From: Wen Yang 

The call to of_find_compatible_node returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.
irq_domain_add_linear also calls of_node_get to increase refcount,
so irq_domain will not be affected when it is released.

Detected by coccinelle with the following warnings:
./arch/powerpc/platforms/8xx/pic.c:158:1-7: ERROR: missing of_node_put; 
acquired a node pointer with refcount incremented on line 136, but without a 
corresponding object release within this function.

Fixes: a8db8cf0d894 ("irq_domain: Replace irq_alloc_host() with
revmap-specific initializers")
Signed-off-by: Wen Yang 
Suggested-by: Christophe Leroy 
Suggested-by: Michael Ellerman 
Reviewed-by: Peng Hao 
Reviewed-by: Christophe Leroy 
Cc: Vitaly Bordug 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-ker...@vger.kernel.org
---
v3->v2: set ret to zero explicitly.
v2->v1: add a Fixes tag.

  arch/powerpc/platforms/8xx/pic.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
index 8d5a25d..9993998 100644
--- a/arch/powerpc/platforms/8xx/pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -153,9 +153,9 @@ int mpc8xx_pic_init(void)
if (mpc8xx_pic_host == NULL) {
printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
ret = -ENOMEM;
-   goto out;
}
-   return 0;
+
+   ret = 0;
  
  out:

of_node_put(np);



Re: [PATCH] compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING

2019-03-25 Thread Christophe Leroy

Hi Masahiro,

Le 25/03/2019 à 07:44, Masahiro Yamada a écrit :

Hi Christophe,


On Sat, Mar 23, 2019 at 5:27 PM LEROY Christophe
 wrote:


Arnd Bergmann  a écrit :


On Wed, Mar 20, 2019 at 10:41 AM Arnd Bergmann  wrote:


I've added your patch to my randconfig test setup and will let you
know if I see anything noticeable. I'm currently testing clang-arm32,
clang-arm64 and gcc-x86.


This is the only additional bug that has come up so far:

`.exit.text' referenced in section `.alt.smp.init' of
drivers/char/ipmi/ipmi_msghandler.o: defined in discarded section
`exit.text' of drivers/char/ipmi/ipmi_msghandler.o


Wouldn't it be useful to activate -Winline gcc warning to ease finding
out problematic usage of inline keyword ?



Yes, it is useful to find out
which function is causing the error.
Thanks for the tip.



I did a mass build on kisskb. Almost ok, see results at 
http://kisskb.ellerman.id.au/kisskb/head/203d912edf8dde291977c71aa64024065e197f79/


ps3_defconfig with GCC 5 fails 
(http://kisskb.ellerman.id.au/kisskb/buildresult/13742711/) with:


arch/powerpc/mm/tlb-radix.c:148:2: error: asm operand 3 probably doesn't 
match constraints [-Werror]

arch/powerpc/mm/tlb-radix.c:148:2: error: impossible constraint in 'asm'
arch/powerpc/mm/tlb-radix.c:118:2: error: asm operand 3 probably doesn't 
match constraints [-Werror]
arch/powerpc/mm/tlb-radix.c:104:2: error: asm operand 3 probably doesn't 
match constraints [-Werror]


ps3_defconfig with GCC 4.6 fails 
(http://kisskb.ellerman.id.au/kisskb/buildresult/13742591/) with:


arch/powerpc/mm/tlb-radix.c:148:2: error: asm operand 3 probably doesn't 
match constraints [-Werror]
arch/powerpc/mm/tlb-radix.c:118:2: error: asm operand 3 probably doesn't 
match constraints [-Werror]
arch/powerpc/mm/tlb-radix.c:104:2: error: asm operand 3 probably doesn't 
match constraints [-Werror]


randconfig with GCC 4.6 fails 
(http://kisskb.ellerman.id.au/kisskb/buildresult/13742698/) with:


arch/powerpc/mm/tlb-radix.c:104:2: error: impossible constraint in 'asm'

Christophe


Re: [PATCH v2] arch/powerpc: Rework local_paca to avoid LTO warnings

2019-03-25 Thread Nicholas Piggin
Alastair D'Silva's on March 14, 2019 12:31 pm:
> From: Alastair D'Silva 
> 
> When building an LTO kernel, the existing code generates warnings:
> ./arch/powerpc/include/asm/paca.h:37:30: warning: register of
> ‘local_paca’ used for multiple global register variables
>  register struct paca_struct *local_paca asm("r13");
>   ^
> ./arch/powerpc/include/asm/paca.h:37:30: note: conflicts with
> ‘local_paca’

Isn't this a bogus warning? It doesn't look like there's a way to 
define it any other way.

> 
> This patch reworks local_paca into an inline getter & setter function,
> which addresses the warning.
> 
> Changelog:
> V2
>   - Address whitespace issues
>   - keep new implementation close to where the old implementation was
> 
> Signed-off-by: Alastair D'Silva 
> ---
>  arch/powerpc/include/asm/paca.h | 37 +
>  arch/powerpc/kernel/paca.c  |  2 +-
>  2 files changed, 29 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index e843bc5d1a0f..2fa0b43357c9 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -34,19 +34,38 @@
>  #include 
>  #include 
>  
> -register struct paca_struct *local_paca asm("r13");
> -
>  #if defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_SMP)
>  extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
> -/*
> - * Add standard checks that preemption cannot occur when using get_paca():
> - * otherwise the paca_struct it points to may be the wrong one just after.
> - */
> -#define get_paca()   ((void) debug_smp_processor_id(), local_paca)
> -#else
> -#define get_paca()   local_paca
>  #endif
>  
> +static inline struct paca_struct *get_paca_no_preempt_check(void)
> +{
> + register struct paca_struct *paca asm("r13");
> +
> + return paca;
> +}

Problem is it now changes the global register variable to a local 
register variable. The compiler would presumably be within its rights
to "cache" that return value or use another register for it, which
is not really what we want.

Thanks,
Nick



[PATCH] powerpc: Add force enable of DAWR on P9 option

2019-03-25 Thread Michael Neuling
This adds a flag so that the DAWR can be enabled on P9 via:
  echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous

The DAWR was previously force disabled on POWER9 in:
  9654153158 powerpc: Disable DAWR in the base POWER9 CPU features
Also see Documentation/powerpc/DAWR-POWER9.txt

This is a dangerous setting, USE AT YOUR OWN RISK.

Some users may not care about a bad user crashing their box
(ie. single user/desktop systems) and really want the DAWR.  This
allows them to force enable DAWR.

This flag can also be used to disable DAWR access. Once this is
cleared, all DAWR access should be cleared immediately and your
machine once again safe from crashing.

Userspace may get confused by toggling this. If DAWR is force
enabled/disabled between getting the number of breakpoints (via
PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an
inconsistent view of what's available. Similarly for guests.

For the DAWR to be enabled in a KVM guest, the DAWR needs to be force
enabled in the host AND the guest. For this reason, this won't work on
POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the
dawr_enable_dangerous file will fail if the hypervisor doesn't support
writing the DAWR.

To double check the DAWR is working, run this kernel selftest:
  tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
Any errors/failures/skips mean something is wrong.

Signed-off-by: Michael Neuling 
---
 Documentation/powerpc/DAWR-POWER9.txt| 32 +
 arch/powerpc/include/asm/hw_breakpoint.h |  7 +++
 arch/powerpc/kernel/hw_breakpoint.c  | 60 +++-
 arch/powerpc/kernel/process.c|  9 ++--
 arch/powerpc/kernel/ptrace.c |  3 +-
 arch/powerpc/kvm/book3s_hv.c |  3 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  | 23 +
 7 files changed, 120 insertions(+), 17 deletions(-)

diff --git a/Documentation/powerpc/DAWR-POWER9.txt 
b/Documentation/powerpc/DAWR-POWER9.txt
index 2feaa66196..bdec036509 100644
--- a/Documentation/powerpc/DAWR-POWER9.txt
+++ b/Documentation/powerpc/DAWR-POWER9.txt
@@ -56,3 +56,35 @@ POWER9. Loads and stores to the watchpoint locations will 
not be
 trapped in GDB. The watchpoint is remembered, so if the guest is
 migrated back to the POWER8 host, it will start working again.
 
+Force enabling the DAWR
+=
+Kernels (since ~v5.2) have an option to force enable the DAWR via:
+
+  echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous
+
+This enables the DAWR even on POWER9.
+
+This is a dangerous setting, USE AT YOUR OWN RISK.
+
+Some users may not care about a bad user crashing their box
+(ie. single user/desktop systems) and really want the DAWR.  This
+allows them to force enable DAWR.
+
+This flag can also be used to disable DAWR access. Once this is
+cleared, all DAWR access should be cleared immediately and your
+machine once again safe from crashing.
+
+Userspace may get confused by toggling this. If DAWR is force
+enabled/disabled between getting the number of breakpoints (via
+PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an
+inconsistent view of what's available. Similarly for guests.
+
+For the DAWR to be enabled in a KVM guest, the DAWR needs to be force
+enabled in the host AND the guest. For this reason, this won't work on
+POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the
+dawr_enable_dangerous file will fail if the hypervisor doesn't support
+writing the DAWR.
+
+To double check the DAWR is working, run this kernel selftest:
+  tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+Any errors/failures/skips mean something is wrong.
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index ece4dc89c9..87c2a74f64 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -90,6 +90,13 @@ static inline void hw_breakpoint_disable(void)
 extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
 int hw_breakpoint_handler(struct die_args *args);
 
+extern int set_dawr(struct arch_hw_breakpoint *brk);
+extern bool dawr_force_enable;
+static inline bool dawr_enabled(void)
+{
+   return dawr_force_enable;
+}
+
 #else  /* CONFIG_HAVE_HW_BREAKPOINT */
 static inline void hw_breakpoint_disable(void) { }
 static inline void thread_change_pc(struct task_struct *tsk,
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index fec8a67731..0b2461e1d9 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -29,11 +29,14 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /*
@@ -174,7 +177,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
if (!ppc_breakpoint_available())
return -ENODEV;
length_max = 8; /* DABR */
-   if (cpu_has_feature(CPU_FTR_

Re: [PATCH] kbuild: strip whitespace in cmd_record_mcount findstring

2019-03-25 Thread Masahiro Yamada
On Tue, Mar 26, 2019 at 1:05 AM Joe Lawrence  wrote:
>
> CC_FLAGS_FTRACE may contain trailing whitespace that interferes with
> findstring.
>
> For example, commit 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on
> GCC 4.9 and newer") introduced a change such that on my ppc64le box,
> CC_FLAGS_FTRACE="-pg -mprofile-kernel ".  (Note the trailing space.)
> When cmd_record_mcount is now invoked, findstring fails as the ftrace
> flags were found at very end of _c_flags, without the trailing space.
>
>   _c_flags=" ... -pg -mprofile-kernel"
>   CC_FLAGS_FTRACE="-pg -mprofile-kernel "
>^
> findstring is looking for this extra space
>
> Remove the redundant whitespaces from CC_FLAGS_FTRACE in
> cmd_record_mcount to avoid this problem.
>
> Fixes: 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on GCC 4.9 and newer").
> Signed-off-by: Joe Lawrence 
> ---
>
> Standard disclaimer: I'm not a kbuild expert, but this works around the
> problem I reported where ftrace and livepatch self-tests were failing as
> specified object files were not run through the recordmcount.pl script:
>
> ppc64le: ftrace self-tests and $(CC_FLAGS_FTRACE) broken?
> https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-March/187298.html
>
>  scripts/Makefile.build | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/scripts/Makefile.build b/scripts/Makefile.build
> index 2554a15ecf2b..74d402b5aa3c 100644
> --- a/scripts/Makefile.build
> +++ b/scripts/Makefile.build
> @@ -199,10 +199,10 @@ sub_cmd_record_mcount = perl 
> $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
> "$(if $(part-of-module),1,0)" "$(@)";
>  recordmcount_source := $(srctree)/scripts/recordmcount.pl
>  endif # BUILD_C_RECORDMCOUNT
> -cmd_record_mcount =\
> -   if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" =   \
> -"$(CC_FLAGS_FTRACE)" ]; then   \
> -   $(sub_cmd_record_mcount)\
> +cmd_record_mcount =\
> +   if [ "$(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags))" =  \
> +"$(strip $(CC_FLAGS_FTRACE))" ]; then  \
> +   $(sub_cmd_record_mcount)\
> fi
>  endif # CC_USING_RECORD_MCOUNT
>  endif # CONFIG_FTRACE_MCOUNT_RECORD
> --
> 2.20.1
>



I do not see a point in using the shell command here
in the first place.

Instead of adding crappy workarounds,
I guess the following simple code should work:


index 2554a15..5f13021 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -199,11 +199,8 @@ sub_cmd_record_mcount = perl
$(srctree)/scripts/recordmcount.pl "$(ARCH)" \
"$(if $(part-of-module),1,0)" "$(@)";
 recordmcount_source := $(srctree)/scripts/recordmcount.pl
 endif # BUILD_C_RECORDMCOUNT
-cmd_record_mcount =\
-   if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" =   \
-"$(CC_FLAGS_FTRACE)" ]; then   \
-   $(sub_cmd_record_mcount)\
-   fi
+cmd_record_mcount = $(if $(findstring $(CC_FLAGS_FTRACE),$(_c_flags)),\
+ $(sub_cmd_record_mcount))
 endif # CC_USING_RECORD_MCOUNT
 endif # CONFIG_FTRACE_MCOUNT_RECORD



-- 
Best Regards
Masahiro Yamada


[PATCH v3] powerpc/8xx: fix possible object reference leak

2019-03-25 Thread Peng Hao
From: Wen Yang 

The call to of_find_compatible_node returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.
irq_domain_add_linear also calls of_node_get to increase refcount,
so irq_domain will not be affected when it is released.

Detected by coccinelle with the following warnings:
./arch/powerpc/platforms/8xx/pic.c:158:1-7: ERROR: missing of_node_put; 
acquired a node pointer with refcount incremented on line 136, but without a 
corresponding object release within this function.

Fixes: a8db8cf0d894 ("irq_domain: Replace irq_alloc_host() with
revmap-specific initializers")
Signed-off-by: Wen Yang 
Suggested-by: Christophe Leroy 
Suggested-by: Michael Ellerman 
Reviewed-by: Peng Hao 
Reviewed-by: Christophe Leroy 
Cc: Vitaly Bordug 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-ker...@vger.kernel.org
---
v3->v2: set ret to zero explicitly.
v2->v1: add a Fixes tag.  

 arch/powerpc/platforms/8xx/pic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
index 8d5a25d..9993998 100644
--- a/arch/powerpc/platforms/8xx/pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -153,9 +153,9 @@ int mpc8xx_pic_init(void)
if (mpc8xx_pic_host == NULL) {
printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
ret = -ENOMEM;
-   goto out;
}
-   return 0;
+
+   ret = 0;
 
 out:
of_node_put(np);
-- 
2.9.5



答复: Re: [PATCH v2] powerpc/8xx: fix possible object reference leak

2019-03-25 Thread wen.yang99
>> The call to of_find_compatible_node returns a node pointer with refcount
>> incremented thus it must be explicitly decremented after the last
>> usage.
>> irq_domain_add_linear also calls of_node_get to increase refcount,
>> so irq_domain will not be affected when it is released.
>>
>> Detected by coccinelle with the following warnings:
>> ./arch/powerpc/platforms/8xx/pic.c:158:1-7: ERROR: missing of_node_put; 
>> acquired a node pointer with refcount incremented on line 136, but without a 
>> corresponding object release within this function.
>>
>> Fixes: a8db8cf0d894 ("irq_domain: Replace irq_alloc_host() with
>> revmap-specific initializers")
>> Signed-off-by: Wen Yang 
>> Suggested-by: Christophe Leroy 
>> Reviewed-by: Peng Hao 
>> Cc: Vitaly Bordug 
>> Cc: Benjamin Herrenschmidt 
>> Cc: Paul Mackerras 
>> Cc: Michael Ellerman 
>> Cc: linuxppc-dev@lists.ozlabs.org
>> Cc: linux-ker...@vger.kernel.org
>> ---
>>  arch/powerpc/platforms/8xx/pic.c | 2 --
>>  1 file changed, 2 deletions(-)
>>
>> diff --git a/arch/powerpc/platforms/8xx/pic.c 
>> b/arch/powerpc/platforms/8xx/pic.c
>> index 8d5a25d..4453df6 100644
>> --- a/arch/powerpc/platforms/8xx/pic.c
>> +++ b/arch/powerpc/platforms/8xx/pic.c
>> @@ -153,9 +153,7 @@ int mpc8xx_pic_init(void)
>>  if (mpc8xx_pic_host == NULL) {
>>  printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
>>  ret = -ENOMEM;
>> -goto out;
>>  }
>> -return 0;
> 
> It's fragile to rely on ret being equal to zero here. If the code
> further up is changed it's easy enough to miss that ret is expected to
> be zero.
> 
> Better to set it to zero here explicitly, this is the success path after
> all, eg:
> 
> ret = 0;
> 

Thank you for your comments.
I'll fix it soon.

Thanks and regards,
Wen

Re: [PATCH] powerpc: vmlinux.lds: Drop Binutils 2.18 workarounds

2019-03-25 Thread Michael Ellerman
Joel Stanley  writes:
> Segher added some workarounds for GCC 4.2 and bintuils 2.18. We now set
> 4.6 and 2.20 as the minimum, so they can be dropped.
>
> This is mostly a revert of c6995fe4 ("powerpc: Fix build bug with
> binutils < 2.18 and GCC < 4.2").
>
> Signed-off-by: Joel Stanley 
> ---
>  arch/powerpc/kernel/vmlinux.lds.S | 35 ---
>  1 file changed, 4 insertions(+), 31 deletions(-)

Seems this breaks some toolchains, at least the one from kernel.org:

  
/opt/cross/kisskb/korg/gcc-8.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: 
.tmp_vmlinux1: Not enough room for program headers, try linking with -N
  
/opt/cross/kisskb/korg/gcc-8.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: 
final link failed: Bad value
  make[1]: *** [/kisskb/src/Makefile:1024: vmlinux] Error 1

http://kisskb.ellerman.id.au/kisskb/buildresult/13743374/

Not sure why.

That's binutils 2.30.

cheers


Re: Bad file pattern in MAINTAINERS section 'IBM Power Virtual Accelerator Switchboard'

2019-03-25 Thread Joe Perches
On Mon, 2019-03-25 at 16:35 -0700, Joe Perches wrote:
> A file pattern line in this section of the MAINTAINERS file in linux-next
> does not have a match in the linux source files.
> 
> This could occur because a matching filename was never added, was deleted
> or renamed in some other commit.
> 
> The commits that added and if found renamed or removed the file pattern
> are shown below.
> 
> Please fix this defect appropriately.
> 
> 1: ---
> 
> linux-next MAINTAINERS section:
> 
>   7396IBM Power Virtual Accelerator Switchboard
>   7397M:  Sukadev Bhattiprolu

btw: Your name needs an email address here too.

This should be:

M:  Sukadev Bhattiprolu 




Bad file pattern in MAINTAINERS section 'IBM Power Virtual Accelerator Switchboard'

2019-03-25 Thread Joe Perches
A file pattern line in this section of the MAINTAINERS file in linux-next
does not have a match in the linux source files.

This could occur because a matching filename was never added, was deleted
or renamed in some other commit.

The commits that added and if found renamed or removed the file pattern
are shown below.

Please fix this defect appropriately.

1: ---

linux-next MAINTAINERS section:

7396IBM Power Virtual Accelerator Switchboard
7397M:  Sukadev Bhattiprolu
7398L:  linuxppc-dev@lists.ozlabs.org
7399S:  Supported
7400F:  arch/powerpc/platforms/powernv/vas*
7401F:  arch/powerpc/platforms/powernv/copy-paste.h
7402F:  arch/powerpc/include/asm/vas.h
--> 7403F:  arch/powerpc/include/uapi/asm/vas.h

2: ---

The most recent commit that added or modified file pattern 
'arch/powerpc/include/uapi/asm/vas.h':

commit 4dea2d1a927c61114a168d4509b56329ea6effb7
Author: Sukadev Bhattiprolu 
Date:   Mon Aug 28 23:23:33 2017 -0700

powerpc/powernv/vas: Define vas_init() and vas_exit()

Implement vas_init() and vas_exit() functions for a new VAS module.
This VAS module is essentially a library for other device drivers
and kernel users of the NX coprocessors like NX-842 and NX-GZIP.
In the future this will be extended to add support for user space
to access the NX coprocessors.

VAS is currently only supported with 64K page size.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Michael Ellerman 

 .../devicetree/bindings/powerpc/ibm,vas.txt|  22 +++
 MAINTAINERS|   8 ++
 arch/powerpc/platforms/powernv/Kconfig |  14 ++
 arch/powerpc/platforms/powernv/Makefile|   1 +
 arch/powerpc/platforms/powernv/vas-window.c|  19 +++
 arch/powerpc/platforms/powernv/vas.c   | 151 +
 arch/powerpc/platforms/powernv/vas.h   |   2 +
 7 files changed, 217 insertions(+)

3: ---

No commit with file pattern 'arch/powerpc/include/uapi/asm/vas.h' was found


Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast

2019-03-25 Thread Ira Weiny
On Mon, Mar 25, 2019 at 03:36:28PM -0700, Dan Williams wrote:
> On Mon, Mar 25, 2019 at 3:22 PM Ira Weiny  wrote:
> [..]
> > FWIW this thread is making me think my original patch which simply 
> > implemented
> > get_user_pages_fast_longterm() would be more clear.  There is some evidence
> > that the GUP API was trending that way (see get_user_pages_remote).  That 
> > seems
> > wrong but I don't know how to ensure users don't specify the wrong flag.
> 
> What about just making the existing get_user_pages_longterm() have a
> fast path option?

That would work but was not the direction we agreed upon before.[1]

At this point I would rather see this patch set applied, focus on fixing the
filesystem issues, and once that is done determine if FOLL_LONGTERM is needed
in any GUP calls.

Ira

[1] https://lkml.org/lkml/2019/2/11/2038



Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast

2019-03-25 Thread Dan Williams
On Mon, Mar 25, 2019 at 3:22 PM Ira Weiny  wrote:
[..]
> FWIW this thread is making me think my original patch which simply implemented
> get_user_pages_fast_longterm() would be more clear.  There is some evidence
> that the GUP API was trending that way (see get_user_pages_remote).  That 
> seems
> wrong but I don't know how to ensure users don't specify the wrong flag.

What about just making the existing get_user_pages_longterm() have a
fast path option?


Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast

2019-03-25 Thread Ira Weiny
On Mon, Mar 25, 2019 at 02:51:50PM -0300, Jason Gunthorpe wrote:
> On Mon, Mar 25, 2019 at 02:23:15AM -0700, Ira Weiny wrote:
> > > > Unfortunately holding the lock is required to support FOLL_LONGTERM (to 
> > > > check
> > > > the VMAs) but we don't want to hold the lock to be optimal 
> > > > (specifically allow
> > > > FAULT_FOLL_ALLOW_RETRY).  So I'm maintaining the optimization for 
> > > > *_fast users
> > > > who do not specify FOLL_LONGTERM.
> > > > 
> > > > Another way to do this would have been to define 
> > > > __gup_longterm_unlocked with
> > > > the above logic, but that seemed overkill at this point.
> > > 
> > > get_user_pages_unlocked() is an exported symbol, shouldn't it work
> > > with the FOLL_LONGTERM flag?
> > > 
> > > I think it should even though we have no user..
> > > 
> > > Otherwise the GUP API just gets more confusing.
> > 
> > I agree WRT to the API.  But I think callers of get_user_pages_unlocked() 
> > are
> > not going to get the behavior they want if they specify FOLL_LONGTERM.
> 
> Oh? Isn't the only thing FOLL_LONGTERM does is block the call on DAX?

>From an API yes.

> Why does the locking mode matter to this test?

DAX checks for VMA's being Filesystem DAX.  Therefore, it requires collection
of VMA's as the GUP code executes.  The unlocked version can drop the lock and
therefore the VMAs may become invalid.  Therefore, the 2 code paths are
incompatible.

Users of GUP unlocked are going to want the benefit of FAULT_FOLL_ALLOW_RETRY.
So I don't anticipate anyone using FOLL_LONGTERM with
get_user_pages_unlocked().

FWIW this thread is making me think my original patch which simply implemented
get_user_pages_fast_longterm() would be more clear.  There is some evidence
that the GUP API was trending that way (see get_user_pages_remote).  That seems
wrong but I don't know how to ensure users don't specify the wrong flag.

> 
> > What I could do is BUG_ON (or just WARN_ON) if unlocked is called with
> > FOLL_LONGTERM similar to the code in get_user_pages_locked() which does not
> > allow locked and vmas to be passed together:
> 
> The GUP call should fail if you are doing something like this. But I'd
> rather not see confusing specialc cases in code without a clear
> comment explaining why it has to be there.

Code comment would be necessary, sure.  Was just throwing ideas out there.

Ira



RE: [PATCH v4 0/4] ocxl: OpenCAPI Cleanup

2019-03-25 Thread Alastair D'Silva
> -Original Message-
> From: Frederic Barrat 
> Sent: Tuesday, 26 March 2019 4:34 AM
> To: Greg Kurz ; Alastair D'Silva 
> Cc: alast...@d-silva.org; Arnd Bergmann ; Greg Kroah-
> Hartman ; linux-ker...@vger.kernel.org;
> Andrew Donnellan ; linuxppc-
> d...@lists.ozlabs.org
> Subject: Re: [PATCH v4 0/4] ocxl: OpenCAPI Cleanup
> 
> 
> 
> Le 25/03/2019 à 17:49, Greg Kurz a écrit :
> > Hi Alastair,
> >
> > I forgot to mention it during v3 but please don't link new version of
> > a patchset to the previous one with --in-reply-to. This is to ensure I
> > can see them in my email client without having to scroll back many
> > days in the past (which likely means a fair number of e-mails on
> > linuxppc-dev).
> 
> 
> I'm also seeing the other series (ocxl refactoring) somehow under the same
> thread. I haven't checked why, and there may be some mail client bug in the
> way, I just mention it in case you see a reason for it in the way you submit
> the patch set.
> 

I probably grabbed the wrong message-id.

-- 
Alastair D'Silva   mob: 0423 762 819
skype: alastair_dsilva msn: alast...@d-silva.org
blog: http://alastair.d-silva.orgTwitter: @EvilDeece



Re: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations

2019-03-25 Thread Andreas Schwab
On Mär 25 2019, Michael Ellerman  wrote:

> So I'm inclined to just switch to always using SPARSEMEM on 64-bit
> Book3S, because that's what's well tested and we hardly need more code
> paths to test. Unless anyone has a strong objection, I haven't actually
> benchmarked FLATMEM vs SPARSEMEM on a G5.

Configuring with SPARSEMEM saves about 32Mb of memory.

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


Re: [RESEND 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM

2019-03-25 Thread Ira Weiny
On Fri, Mar 22, 2019 at 02:24:40PM -0700, Dan Williams wrote:
> On Sun, Mar 17, 2019 at 7:36 PM  wrote:

[snip]

> > + * __gup_longterm_locked() is a wrapper for __get_uer_pages_locked which
> 
> s/uer/user/
> 
> > + * allows us to process the FOLL_LONGTERM flag if present.
> > + *
> > + * FOLL_LONGTERM Checks for either DAX VMAs or PPC CMA regions and either 
> > fails
> > + * the pin or attempts to migrate the page as appropriate.
> > + *
> > + * In the filesystem-dax case mappings are subject to the lifetime 
> > enforced by
> > + * the filesystem and we need guarantees that longterm users like RDMA and 
> > V4L2
> > + * only establish mappings that have a kernel enforced revocation 
> > mechanism.
> > + *
> > + * In the CMA case pages can't be pinned in a CMA region as this would
> > + * unnecessarily fragment that region.  So CMA attempts to migrate the page
> > + * before pinning.
> >   *
> >   * "longterm" == userspace controlled elevated page count lifetime.
> >   * Contrast this to iov_iter_get_pages() usages which are transient.
> 
> Ah, here's the longterm documentation, but if I was a developer
> considering whether to use FOLL_LONGTERM or not I would expect to find
> the documentation at the flag definition site.
> 
> I think it has become more clear since get_user_pages_longterm() was
> initially merged that we need to warn people not to use it, or at
> least seriously reconsider whether they want an interface to support
> indefinite pins.

I will move the comment to the flag definition but...

In reviewing this comment it occurs to me that the addition of special casing
CMA regions via FOLL_LONGTERM has made it less experimental/temporary and now
simply implies intent to the GUP code as to the use of the pages.

As I'm not super familiar with the CMA use case I can't say for certain but it
seems that it is not a temporary solution.

So I'm not going to refrain from a FIXME WRT removing the flag.

New suggested text below.

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6831077d126c..5db9d8e894aa 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2596,7 +2596,28 @@ struct page *follow_page(struct vm_area_struct *vma, 
unsigned long address,
 #define FOLL_REMOTE0x2000  /* we are working on non-current tsk/mm */
 #define FOLL_COW   0x4000  /* internal GUP flag */
 #define FOLL_ANON  0x8000  /* don't do file mappings */
-#define FOLL_LONGTERM  0x1 /* mapping is intended for a long term pin */
+#define FOLL_LONGTERM  0x1 /* mapping lifetime is indefinite: see below */
+
+/*
+ * NOTE on FOLL_LONGTERM:
+ *
+ * FOLL_LONGTERM indicates that the page will be held for an indefinite time
+ * period _often_ under userspace control.  This is contrasted with
+ * iov_iter_get_pages() where usages which are transient.
+ *
+ * FIXME: For pages which are part of a filesystem, mappings are subject to the
+ * lifetime enforced by the filesystem and we need guarantees that longterm
+ * users like RDMA and V4L2 only establish mappings which coordinate usage with
+ * the filesystem.  Ideas for this coordination include revoking the longterm
+ * pin, delaying writeback, bounce buffer page writeback, etc.  As FS DAX was
+ * added after the problem with filesystems was found FS DAX VMAs are
+ * specifically failed.  Filesystem pages are still subject to bugs and use of
+ * FOLL_LONGTERM should be avoided on those pages.
+ *
+ * In the CMA case: longterm pins in a CMA region would unnecessarily fragment
+ * that region.  And so CMA attempts to migrate the page before pinning when
+ * FOLL_LONGTERM is specified.
+ */
 
 static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
 {



Re: [PATCH v3] powerpc/64: Fix memcmp reading past the end of src/dest

2019-03-25 Thread Segher Boessenkool
On Mon, Mar 25, 2019 at 11:33:56PM +1100, Michael Ellerman wrote:
> Segher Boessenkool  writes:
> > On Fri, Mar 22, 2019 at 11:37:24PM +1100, Michael Ellerman wrote:
> >> +  clrldi  r6,r4,(64-12)   // r6 = r4 & 0xfff
> >
> > You can just write
> >   rlwinm r6,r4,0,0x0fff
> 
> > if that is clearer?  Or do you still want a comment with that :-)
> 
> I don't think it's clearer doing a rotate of zero bits :)
> 
> And yeah I'd probably still leave the comment, so I'm inclined to stick
> with the clrldi?

I always have to think what the clrldi etc. do exactly, while with rlwinm
it is obvious.  But yeah this may be different for other people who are
used to different idiom.

> Would be nice if the assembler could support:
> 
>   andir6, r4, 0x0fff
> 
> And turn it into the rlwinm, or rldicl :)

The extended mnemonics are *simple*, *one-to-one* mappings.  Having
"andi. 6,4,0x0f0f" a valid insn, but an extended mnemonic "andi 6,4,0x0f0f"
that is not (and the other way around for say 0xffff) would violate that.

You could do some assembler macro, that can also expand to multiple insns
where that is useful.  Also one for loading constants, etc.  The downside
to that is you often do care how many insns are generated.

Instead you could do a macro for only those cases that can be done with *one*
insn.  But that then is pretty restricted in use, and people have to learn
what values are valid.

I don't see a perfect solution.


Segher


Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast

2019-03-25 Thread Jason Gunthorpe
On Mon, Mar 25, 2019 at 02:23:15AM -0700, Ira Weiny wrote:
> > > Unfortunately holding the lock is required to support FOLL_LONGTERM (to 
> > > check
> > > the VMAs) but we don't want to hold the lock to be optimal (specifically 
> > > allow
> > > FAULT_FOLL_ALLOW_RETRY).  So I'm maintaining the optimization for *_fast 
> > > users
> > > who do not specify FOLL_LONGTERM.
> > > 
> > > Another way to do this would have been to define __gup_longterm_unlocked 
> > > with
> > > the above logic, but that seemed overkill at this point.
> > 
> > get_user_pages_unlocked() is an exported symbol, shouldn't it work
> > with the FOLL_LONGTERM flag?
> > 
> > I think it should even though we have no user..
> > 
> > Otherwise the GUP API just gets more confusing.
> 
> I agree WRT to the API.  But I think callers of get_user_pages_unlocked() are
> not going to get the behavior they want if they specify FOLL_LONGTERM.

Oh? Isn't the only thing FOLL_LONGTERM does is block the call on DAX?
Why does the locking mode matter to this test?

> What I could do is BUG_ON (or just WARN_ON) if unlocked is called with
> FOLL_LONGTERM similar to the code in get_user_pages_locked() which does not
> allow locked and vmas to be passed together:

The GUP call should fail if you are doing something like this. But I'd
rather not see confusing specialc cases in code without a clear
comment explaining why it has to be there.

Jason


Re: [PATCH 2/2] arch: add pidfd and io_uring syscalls everywhere

2019-03-25 Thread Paul Burton
Hi Arnd,

On Mon, Mar 25, 2019 at 03:47:37PM +0100, Arnd Bergmann wrote:
> Add the io_uring and pidfd_send_signal system calls to all architectures.
> 
> These system calls are designed to handle both native and compat tasks,
> so all entries are the same across architectures, only arm-compat and
> the generic tale still use an old format.
> 
> Signed-off-by: Arnd Bergmann 
> ---
>%
> diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl 
> b/arch/mips/kernel/syscalls/syscall_n64.tbl
> index c85502e67b44..c4a49f7d57bb 100644
> --- a/arch/mips/kernel/syscalls/syscall_n64.tbl
> +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
> @@ -338,3 +338,7 @@
>  327  n64 rseqsys_rseq
>  328  n64 io_pgetevents   sys_io_pgetevents
>  # 329 through 423 are reserved to sync up with other architectures
> +424  common  pidfd_send_signal   sys_pidfd_send_signal
> +425  common  io_uring_setup  sys_io_uring_setup
> +426  common  io_uring_enter  sys_io_uring_enter
> +427  common  io_uring_register   sys_io_uring_register

Shouldn't these declare the ABI as "n64"?

I don't see anywhere that it would actually change the generated code,
but a comment at the top of the file says that every entry should use
"n64" and so far they all do. Did you have something else in mind here?

Thanks,
Paul


Re: [PATCH v4 0/4] ocxl: OpenCAPI Cleanup

2019-03-25 Thread Frederic Barrat




Le 25/03/2019 à 17:49, Greg Kurz a écrit :

Hi Alastair,

I forgot to mention it during v3 but please don't link new version
of a patchset to the previous one with --in-reply-to. This is to
ensure I can see them in my email client without having to scroll
back many days in the past (which likely means a fair number of
e-mails on linuxppc-dev).



I'm also seeing the other series (ocxl refactoring) somehow under the 
same thread. I haven't checked why, and there may be some mail client 
bug in the way, I just mention it in case you see a reason for it in the 
way you submit the patch set.


  Fred




Cheers,

--
Greg

On Mon, 25 Mar 2019 16:34:51 +1100
"Alastair D'Silva"  wrote:


From: Alastair D'Silva 

Some minor cleanups for the OpenCAPI driver as a prerequisite
for an ocxl driver refactoring to allow the driver core to
be utilised by external drivers.

Changelog:
V4:
   - Drop printf format changes as they omit the format indicator for '0'
V3:
   - Add missed header in 'ocxl: Remove some unused exported symbols'.
 This addresses the introduced sparse warnings
V2:
   - remove intermediate assignment of 'link' var in
 'Rename struct link to ocxl_link'
   - Don't shift definition of ocxl_context_attach in
 'Remove some unused exported symbols'


Alastair D'Silva (4):
   ocxl: Rename struct link to ocxl_link
   ocxl: read_pasid never returns an error, so make it void
   ocxl: Remove superfluous 'extern' from headers
   ocxl: Remove some unused exported symbols

  drivers/misc/ocxl/config.c| 13 ++---
  drivers/misc/ocxl/file.c  |  5 +-
  drivers/misc/ocxl/link.c  | 36 ++---
  drivers/misc/ocxl/ocxl_internal.h | 85 +++
  include/misc/ocxl.h   | 53 ++-
  5 files changed, 91 insertions(+), 101 deletions(-)







Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast

2019-03-25 Thread Ira Weiny
On Mon, Mar 25, 2019 at 01:47:13PM -0300, Jason Gunthorpe wrote:
> On Mon, Mar 25, 2019 at 01:42:26AM -0700, Ira Weiny wrote:
> > On Fri, Mar 22, 2019 at 03:12:55PM -0700, Dan Williams wrote:
> > > On Sun, Mar 17, 2019 at 7:36 PM  wrote:
> > > >
> > > > From: Ira Weiny 
> > > >
> > > > DAX pages were previously unprotected from longterm pins when users
> > > > called get_user_pages_fast().
> > > >
> > > > Use the new FOLL_LONGTERM flag to check for DEVMAP pages and fall
> > > > back to regular GUP processing if a DEVMAP page is encountered.
> > > >
> > > > Signed-off-by: Ira Weiny 
> > > >  mm/gup.c | 29 +
> > > >  1 file changed, 25 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/mm/gup.c b/mm/gup.c
> > > > index 0684a9536207..173db0c44678 100644
> > > > +++ b/mm/gup.c
> > > > @@ -1600,6 +1600,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long 
> > > > addr, unsigned long end,
> > > > goto pte_unmap;
> > > >
> > > > if (pte_devmap(pte)) {
> > > > +   if (unlikely(flags & FOLL_LONGTERM))
> > > > +   goto pte_unmap;
> > > > +
> > > > pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
> > > > if (unlikely(!pgmap)) {
> > > > undo_dev_pagemap(nr, nr_start, pages);
> > > > @@ -1739,8 +1742,11 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, 
> > > > unsigned long addr,
> > > > if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
> > > > return 0;
> > > >
> > > > -   if (pmd_devmap(orig))
> > > > +   if (pmd_devmap(orig)) {
> > > > +   if (unlikely(flags & FOLL_LONGTERM))
> > > > +   return 0;
> > > > return __gup_device_huge_pmd(orig, pmdp, addr, end, 
> > > > pages, nr);
> > > > +   }
> > > >
> > > > refs = 0;
> > > > page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
> > > > @@ -1777,8 +1783,11 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, 
> > > > unsigned long addr,
> > > > if (!pud_access_permitted(orig, flags & FOLL_WRITE))
> > > > return 0;
> > > >
> > > > -   if (pud_devmap(orig))
> > > > +   if (pud_devmap(orig)) {
> > > > +   if (unlikely(flags & FOLL_LONGTERM))
> > > > +   return 0;
> > > > return __gup_device_huge_pud(orig, pudp, addr, end, 
> > > > pages, nr);
> > > > +   }
> > > >
> > > > refs = 0;
> > > > page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
> > > > @@ -2066,8 +2075,20 @@ int get_user_pages_fast(unsigned long start, int 
> > > > nr_pages,
> > > > start += nr << PAGE_SHIFT;
> > > > pages += nr;
> > > >
> > > > -   ret = get_user_pages_unlocked(start, nr_pages - nr, 
> > > > pages,
> > > > - gup_flags);
> > > > +   if (gup_flags & FOLL_LONGTERM) {
> > > > +   down_read(¤t->mm->mmap_sem);
> > > > +   ret = __gup_longterm_locked(current, 
> > > > current->mm,
> > > > +   start, nr_pages - 
> > > > nr,
> > > > +   pages, NULL, 
> > > > gup_flags);
> > > > +   up_read(¤t->mm->mmap_sem);
> > > > +   } else {
> > > > +   /*
> > > > +* retain FAULT_FOLL_ALLOW_RETRY optimization if
> > > > +* possible
> > > > +*/
> > > > +   ret = get_user_pages_unlocked(start, nr_pages - 
> > > > nr,
> > > > + pages, gup_flags);
> > > 
> > > I couldn't immediately grok why this path needs to branch on
> > > FOLL_LONGTERM? Won't get_user_pages_unlocked(..., FOLL_LONGTERM) do
> > > the right thing?
> > 
> > Unfortunately holding the lock is required to support FOLL_LONGTERM (to 
> > check
> > the VMAs) but we don't want to hold the lock to be optimal (specifically 
> > allow
> > FAULT_FOLL_ALLOW_RETRY).  So I'm maintaining the optimization for *_fast 
> > users
> > who do not specify FOLL_LONGTERM.
> > 
> > Another way to do this would have been to define __gup_longterm_unlocked 
> > with
> > the above logic, but that seemed overkill at this point.
> 
> get_user_pages_unlocked() is an exported symbol, shouldn't it work
> with the FOLL_LONGTERM flag?
> 
> I think it should even though we have no user..
> 
> Otherwise the GUP API just gets more confusing.

I agree WRT to the API.  But I think callers of get_user_pages_unlocked() are
not going to get the behavior they want if they specify FOLL_LONGTERM.

What I could do is BUG_ON (or just WARN_ON) if unlocked is called with
FOLL_LONGTERM similar to the code in get_user_pages_locked() which does not
allow locked and

Re: [PATCH v4 4/4] ocxl: Remove some unused exported symbols

2019-03-25 Thread Greg Kurz
On Mon, 25 Mar 2019 16:34:55 +1100
"Alastair D'Silva"  wrote:

> From: Alastair D'Silva 
> 
> Remove some unused exported symbols.
> 
> Signed-off-by: Alastair D'Silva 
> ---

Reviewed-by: Greg Kurz 

>  drivers/misc/ocxl/config.c|  4 +---
>  drivers/misc/ocxl/ocxl_internal.h | 23 +++
>  include/misc/ocxl.h   | 23 ---
>  3 files changed, 24 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
> index 4dc11897237d..5e65acb8e134 100644
> --- a/drivers/misc/ocxl/config.c
> +++ b/drivers/misc/ocxl/config.c
> @@ -2,8 +2,8 @@
>  // Copyright 2017 IBM Corp.
>  #include 
>  #include 
> -#include 
>  #include 
> +#include "ocxl_internal.h"
>  
>  #define EXTRACT_BIT(val, bit) (!!(val & BIT(bit)))
>  #define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s)
> @@ -299,7 +299,6 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
>   }
>   return 1;
>  }
> -EXPORT_SYMBOL_GPL(ocxl_config_check_afu_index);
>  
>  static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn,
>   struct ocxl_afu_config *afu)
> @@ -535,7 +534,6 @@ int ocxl_config_get_pasid_info(struct pci_dev *dev, int 
> *count)
>  {
>   return pnv_ocxl_get_pasid_count(dev, count);
>  }
> -EXPORT_SYMBOL_GPL(ocxl_config_get_pasid_info);
>  
>  void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base,
>   u32 pasid_count_log)
> diff --git a/drivers/misc/ocxl/ocxl_internal.h 
> b/drivers/misc/ocxl/ocxl_internal.h
> index 321b29e77f45..06fd98c989c8 100644
> --- a/drivers/misc/ocxl/ocxl_internal.h
> +++ b/drivers/misc/ocxl/ocxl_internal.h
> @@ -107,6 +107,29 @@ void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, 
> u32 size);
>  int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size);
>  void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
>  
> +/*
> + * Get the max PASID value that can be used by the function
> + */
> +int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count);
> +
> +/*
> + * Check if an AFU index is valid for the given function.
> + *
> + * AFU indexes can be sparse, so a driver should check all indexes up
> + * to the maximum found in the function description
> + */
> +int ocxl_config_check_afu_index(struct pci_dev *dev,
> + struct ocxl_fn_config *fn, int afu_idx);
> +
> +/**
> + * Update values within a Process Element
> + *
> + * link_handle: the link handle associated with the process element
> + * pasid: the PASID for the AFU context
> + * tid: the new thread id for the process element
> + */
> +int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid);
> +
>  struct ocxl_context *ocxl_context_alloc(void);
>  int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
>   struct address_space *mapping);
> diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
> index 4544573cc93c..9530d3be1b30 100644
> --- a/include/misc/ocxl.h
> +++ b/include/misc/ocxl.h
> @@ -56,15 +56,6 @@ struct ocxl_fn_config {
>  int ocxl_config_read_function(struct pci_dev *dev,
>   struct ocxl_fn_config *fn);
>  
> -/*
> - * Check if an AFU index is valid for the given function.
> - *
> - * AFU indexes can be sparse, so a driver should check all indexes up
> - * to the maximum found in the function description
> - */
> -int ocxl_config_check_afu_index(struct pci_dev *dev,
> - struct ocxl_fn_config *fn, int afu_idx);
> -
>  /*
>   * Read the configuration space of a function for the AFU specified by
>   * the index 'afu_idx'. Fills in a ocxl_afu_config structure
> @@ -74,11 +65,6 @@ int ocxl_config_read_afu(struct pci_dev *dev,
>   struct ocxl_afu_config *afu,
>   u8 afu_idx);
>  
> -/*
> - * Get the max PASID value that can be used by the function
> - */
> -int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count);
> -
>  /*
>   * Tell an AFU, by writing in the configuration space, the PASIDs that
>   * it can use. Range starts at 'pasid_base' and its size is a multiple
> @@ -188,15 +174,6 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 
> pidr, u32 tidr,
>   void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
>   void *xsl_err_data);
>  
> -/**
> - * Update values within a Process Element
> - *
> - * link_handle: the link handle associated with the process element
> - * pasid: the PASID for the AFU context
> - * tid: the new thread id for the process element
> - */
> -int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid);
> -
>  /*
>   * Remove a Process Element from the Shared Process Area for a link
>   */



Re: [PATCH v4 0/4] ocxl: OpenCAPI Cleanup

2019-03-25 Thread Greg Kurz
Hi Alastair,

I forgot to mention it during v3 but please don't link new version
of a patchset to the previous one with --in-reply-to. This is to
ensure I can see them in my email client without having to scroll
back many days in the past (which likely means a fair number of
e-mails on linuxppc-dev).

Cheers,

--
Greg

On Mon, 25 Mar 2019 16:34:51 +1100
"Alastair D'Silva"  wrote:

> From: Alastair D'Silva 
> 
> Some minor cleanups for the OpenCAPI driver as a prerequisite
> for an ocxl driver refactoring to allow the driver core to
> be utilised by external drivers.
> 
> Changelog:
> V4:
>   - Drop printf format changes as they omit the format indicator for '0'
> V3:
>   - Add missed header in 'ocxl: Remove some unused exported symbols'.
> This addresses the introduced sparse warnings
> V2:
>   - remove intermediate assignment of 'link' var in
> 'Rename struct link to ocxl_link'
>   - Don't shift definition of ocxl_context_attach in
> 'Remove some unused exported symbols'
> 
> 
> Alastair D'Silva (4):
>   ocxl: Rename struct link to ocxl_link
>   ocxl: read_pasid never returns an error, so make it void
>   ocxl: Remove superfluous 'extern' from headers
>   ocxl: Remove some unused exported symbols
> 
>  drivers/misc/ocxl/config.c| 13 ++---
>  drivers/misc/ocxl/file.c  |  5 +-
>  drivers/misc/ocxl/link.c  | 36 ++---
>  drivers/misc/ocxl/ocxl_internal.h | 85 +++
>  include/misc/ocxl.h   | 53 ++-
>  5 files changed, 91 insertions(+), 101 deletions(-)
> 



Re: [PATCH v4 3/4] ocxl: Remove superfluous 'extern' from headers

2019-03-25 Thread Greg Kurz
On Mon, 25 Mar 2019 16:34:54 +1100
"Alastair D'Silva"  wrote:

> From: Alastair D'Silva 
> 
> The 'extern' keyword adds no value here.
> 
> Signed-off-by: Alastair D'Silva 
> ---

Reviewed-by: Greg Kurz 

>  drivers/misc/ocxl/ocxl_internal.h | 54 +++
>  include/misc/ocxl.h   | 36 ++---
>  2 files changed, 44 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/misc/ocxl/ocxl_internal.h 
> b/drivers/misc/ocxl/ocxl_internal.h
> index a32f2151029f..321b29e77f45 100644
> --- a/drivers/misc/ocxl/ocxl_internal.h
> +++ b/drivers/misc/ocxl/ocxl_internal.h
> @@ -16,7 +16,6 @@
>  
>  extern struct pci_driver ocxl_pci_driver;
>  
> -
>  struct ocxl_fn {
>   struct device dev;
>   int bar_used[3];
> @@ -92,41 +91,40 @@ struct ocxl_process_element {
>   __be32 software_state;
>  };
>  
> +struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu);
> +void ocxl_afu_put(struct ocxl_afu *afu);
>  
> -extern struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu);
> -extern void ocxl_afu_put(struct ocxl_afu *afu);
> -
> -extern int ocxl_create_cdev(struct ocxl_afu *afu);
> -extern void ocxl_destroy_cdev(struct ocxl_afu *afu);
> -extern int ocxl_register_afu(struct ocxl_afu *afu);
> -extern void ocxl_unregister_afu(struct ocxl_afu *afu);
> +int ocxl_create_cdev(struct ocxl_afu *afu);
> +void ocxl_destroy_cdev(struct ocxl_afu *afu);
> +int ocxl_register_afu(struct ocxl_afu *afu);
> +void ocxl_unregister_afu(struct ocxl_afu *afu);
>  
> -extern int ocxl_file_init(void);
> -extern void ocxl_file_exit(void);
> +int ocxl_file_init(void);
> +void ocxl_file_exit(void);
>  
> -extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size);
> -extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
> -extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size);
> -extern void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
> +int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size);
> +void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
> +int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size);
> +void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
>  
> -extern struct ocxl_context *ocxl_context_alloc(void);
> -extern int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
> +struct ocxl_context *ocxl_context_alloc(void);
> +int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
>   struct address_space *mapping);
> -extern int ocxl_context_attach(struct ocxl_context *ctx, u64 amr);
> -extern int ocxl_context_mmap(struct ocxl_context *ctx,
> +int ocxl_context_attach(struct ocxl_context *ctx, u64 amr);
> +int ocxl_context_mmap(struct ocxl_context *ctx,
>   struct vm_area_struct *vma);
> -extern int ocxl_context_detach(struct ocxl_context *ctx);
> -extern void ocxl_context_detach_all(struct ocxl_afu *afu);
> -extern void ocxl_context_free(struct ocxl_context *ctx);
> +int ocxl_context_detach(struct ocxl_context *ctx);
> +void ocxl_context_detach_all(struct ocxl_afu *afu);
> +void ocxl_context_free(struct ocxl_context *ctx);
>  
> -extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu);
> -extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu);
> +int ocxl_sysfs_add_afu(struct ocxl_afu *afu);
> +void ocxl_sysfs_remove_afu(struct ocxl_afu *afu);
>  
> -extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset);
> -extern int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset);
> -extern void ocxl_afu_irq_free_all(struct ocxl_context *ctx);
> -extern int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset,
> +int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset);
> +int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset);
> +void ocxl_afu_irq_free_all(struct ocxl_context *ctx);
> +int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset,
>   int eventfd);
> -extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset);
> +u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset);
>  
>  #endif /* _OCXL_INTERNAL_H_ */
> diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
> index 9ff6ddc28e22..4544573cc93c 100644
> --- a/include/misc/ocxl.h
> +++ b/include/misc/ocxl.h
> @@ -53,7 +53,7 @@ struct ocxl_fn_config {
>   * Read the configuration space of a function and fill in a
>   * ocxl_fn_config structure with all the function details
>   */
> -extern int ocxl_config_read_function(struct pci_dev *dev,
> +int ocxl_config_read_function(struct pci_dev *dev,
>   struct ocxl_fn_config *fn);
>  
>  /*
> @@ -62,14 +62,14 @@ extern int ocxl_config_read_function(struct pci_dev *dev,
>   * AFU indexes can be sparse, so a driver should check all indexes up
>   * to the maximum found in the function description
>   */
> -extern int ocxl_config_check_afu_index(struct pci_dev *dev,
> +int ocxl_config_check_afu_index(struct pci_dev *dev,
>

Re: [RESEND 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM

2019-03-25 Thread Ira Weiny
On Mon, Mar 25, 2019 at 09:45:12AM -0700, Dan Williams wrote:
> On Mon, Mar 25, 2019 at 7:21 AM Ira Weiny  wrote:
> [..]
> > > > @@ -1268,10 +1246,14 @@ static long 
> > > > check_and_migrate_cma_pages(unsigned long start, long nr_pages,
> > > > putback_movable_pages(&cma_page_list);
> > > > }
> > > > /*
> > > > -* We did migrate all the pages, Try to get the page 
> > > > references again
> > > > -* migrating any new CMA pages which we failed to 
> > > > isolate earlier.
> > > > +* We did migrate all the pages, Try to get the page 
> > > > references
> > > > +* again migrating any new CMA pages which we failed to 
> > > > isolate
> > > > +* earlier.
> > > >  */
> > > > -   nr_pages = get_user_pages(start, nr_pages, gup_flags, 
> > > > pages, vmas);
> > > > +   nr_pages = __get_user_pages_locked(tsk, mm, start, 
> > > > nr_pages,
> > > > +  pages, vmas, NULL,
> > > > +  gup_flags);
> > > > +
> > >
> > > Why did this need to change to __get_user_pages_locked?
> >
> > __get_uer_pages_locked() is now the "internal call" for get_user_pages.
> >
> > Technically it did not _have_ to change but there is no need to call
> > get_user_pages() again because the FOLL_TOUCH flags is already set.  Also 
> > this
> > call then matches the __get_user_pages_locked() which was called on the 
> > pages
> > we migrated from.  Mostly this keeps the code "symmetrical" in that we 
> > called
> > __get_user_pages_locked() on the pages we are migrating from and the same 
> > call
> > on the pages we migrated to.
> >
> > While the change here looks funny I think the final code is better.
> 
> Agree, but I think that either needs to be noted in the changelog so
> it's not a surprise, or moved to a follow-on cleanup patch.
> 

Can do...

Thanks!
Ira



Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast

2019-03-25 Thread Jason Gunthorpe
On Mon, Mar 25, 2019 at 01:42:26AM -0700, Ira Weiny wrote:
> On Fri, Mar 22, 2019 at 03:12:55PM -0700, Dan Williams wrote:
> > On Sun, Mar 17, 2019 at 7:36 PM  wrote:
> > >
> > > From: Ira Weiny 
> > >
> > > DAX pages were previously unprotected from longterm pins when users
> > > called get_user_pages_fast().
> > >
> > > Use the new FOLL_LONGTERM flag to check for DEVMAP pages and fall
> > > back to regular GUP processing if a DEVMAP page is encountered.
> > >
> > > Signed-off-by: Ira Weiny 
> > >  mm/gup.c | 29 +
> > >  1 file changed, 25 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/mm/gup.c b/mm/gup.c
> > > index 0684a9536207..173db0c44678 100644
> > > +++ b/mm/gup.c
> > > @@ -1600,6 +1600,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long 
> > > addr, unsigned long end,
> > > goto pte_unmap;
> > >
> > > if (pte_devmap(pte)) {
> > > +   if (unlikely(flags & FOLL_LONGTERM))
> > > +   goto pte_unmap;
> > > +
> > > pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
> > > if (unlikely(!pgmap)) {
> > > undo_dev_pagemap(nr, nr_start, pages);
> > > @@ -1739,8 +1742,11 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, 
> > > unsigned long addr,
> > > if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
> > > return 0;
> > >
> > > -   if (pmd_devmap(orig))
> > > +   if (pmd_devmap(orig)) {
> > > +   if (unlikely(flags & FOLL_LONGTERM))
> > > +   return 0;
> > > return __gup_device_huge_pmd(orig, pmdp, addr, end, 
> > > pages, nr);
> > > +   }
> > >
> > > refs = 0;
> > > page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
> > > @@ -1777,8 +1783,11 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, 
> > > unsigned long addr,
> > > if (!pud_access_permitted(orig, flags & FOLL_WRITE))
> > > return 0;
> > >
> > > -   if (pud_devmap(orig))
> > > +   if (pud_devmap(orig)) {
> > > +   if (unlikely(flags & FOLL_LONGTERM))
> > > +   return 0;
> > > return __gup_device_huge_pud(orig, pudp, addr, end, 
> > > pages, nr);
> > > +   }
> > >
> > > refs = 0;
> > > page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
> > > @@ -2066,8 +2075,20 @@ int get_user_pages_fast(unsigned long start, int 
> > > nr_pages,
> > > start += nr << PAGE_SHIFT;
> > > pages += nr;
> > >
> > > -   ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
> > > - gup_flags);
> > > +   if (gup_flags & FOLL_LONGTERM) {
> > > +   down_read(¤t->mm->mmap_sem);
> > > +   ret = __gup_longterm_locked(current, current->mm,
> > > +   start, nr_pages - nr,
> > > +   pages, NULL, 
> > > gup_flags);
> > > +   up_read(¤t->mm->mmap_sem);
> > > +   } else {
> > > +   /*
> > > +* retain FAULT_FOLL_ALLOW_RETRY optimization if
> > > +* possible
> > > +*/
> > > +   ret = get_user_pages_unlocked(start, nr_pages - 
> > > nr,
> > > + pages, gup_flags);
> > 
> > I couldn't immediately grok why this path needs to branch on
> > FOLL_LONGTERM? Won't get_user_pages_unlocked(..., FOLL_LONGTERM) do
> > the right thing?
> 
> Unfortunately holding the lock is required to support FOLL_LONGTERM (to check
> the VMAs) but we don't want to hold the lock to be optimal (specifically allow
> FAULT_FOLL_ALLOW_RETRY).  So I'm maintaining the optimization for *_fast users
> who do not specify FOLL_LONGTERM.
> 
> Another way to do this would have been to define __gup_longterm_unlocked with
> the above logic, but that seemed overkill at this point.

get_user_pages_unlocked() is an exported symbol, shouldn't it work
with the FOLL_LONGTERM flag?

I think it should even though we have no user..

Otherwise the GUP API just gets more confusing.

Jason


Re: [RESEND 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM

2019-03-25 Thread Dan Williams
On Mon, Mar 25, 2019 at 7:21 AM Ira Weiny  wrote:
[..]
> > > @@ -1268,10 +1246,14 @@ static long check_and_migrate_cma_pages(unsigned 
> > > long start, long nr_pages,
> > > putback_movable_pages(&cma_page_list);
> > > }
> > > /*
> > > -* We did migrate all the pages, Try to get the page 
> > > references again
> > > -* migrating any new CMA pages which we failed to isolate 
> > > earlier.
> > > +* We did migrate all the pages, Try to get the page 
> > > references
> > > +* again migrating any new CMA pages which we failed to 
> > > isolate
> > > +* earlier.
> > >  */
> > > -   nr_pages = get_user_pages(start, nr_pages, gup_flags, 
> > > pages, vmas);
> > > +   nr_pages = __get_user_pages_locked(tsk, mm, start, 
> > > nr_pages,
> > > +  pages, vmas, NULL,
> > > +  gup_flags);
> > > +
> >
> > Why did this need to change to __get_user_pages_locked?
>
> __get_uer_pages_locked() is now the "internal call" for get_user_pages.
>
> Technically it did not _have_ to change but there is no need to call
> get_user_pages() again because the FOLL_TOUCH flags is already set.  Also this
> call then matches the __get_user_pages_locked() which was called on the pages
> we migrated from.  Mostly this keeps the code "symmetrical" in that we called
> __get_user_pages_locked() on the pages we are migrating from and the same call
> on the pages we migrated to.
>
> While the change here looks funny I think the final code is better.

Agree, but I think that either needs to be noted in the changelog so
it's not a surprise, or moved to a follow-on cleanup patch.


Re: [RESEND 5/7] IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast()

2019-03-25 Thread Ira Weiny
On Fri, Mar 22, 2019 at 03:14:26PM -0700, Dan Williams wrote:
> On Sun, Mar 17, 2019 at 7:36 PM  wrote:
> >
> > From: Ira Weiny 
> >
> > Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against
> > FS DAX pages being mapped.
> >
> > Signed-off-by: Ira Weiny 
> > ---
> >  drivers/infiniband/hw/hfi1/user_pages.c | 6 --
> >  1 file changed, 4 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/infiniband/hw/hfi1/user_pages.c 
> > b/drivers/infiniband/hw/hfi1/user_pages.c
> > index 78ccacaf97d0..6a7f9cd5a94e 100644
> > --- a/drivers/infiniband/hw/hfi1/user_pages.c
> > +++ b/drivers/infiniband/hw/hfi1/user_pages.c
> > @@ -104,9 +104,11 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, 
> > unsigned long vaddr, size_t np
> > bool writable, struct page **pages)
> >  {
> > int ret;
> > +   unsigned int gup_flags = writable ? FOLL_WRITE : 0;
> 
> Maybe:
> 
> unsigned int gup_flags = FOLL_LONGTERM | (writable ? FOLL_WRITE : 0);
> 
> ?

Sure looks good.

Ira

> 
> >
> > -   ret = get_user_pages_fast(vaddr, npages, writable ? FOLL_WRITE : 0,
> > - pages);
> > +   gup_flags |= FOLL_LONGTERM;
> > +
> > +   ret = get_user_pages_fast(vaddr, npages, gup_flags, pages);
> > if (ret < 0)
> > return ret;
> >
> > --
> > 2.20.1
> >
> 


Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast

2019-03-25 Thread Ira Weiny
On Fri, Mar 22, 2019 at 03:12:55PM -0700, Dan Williams wrote:
> On Sun, Mar 17, 2019 at 7:36 PM  wrote:
> >
> > From: Ira Weiny 
> >
> > DAX pages were previously unprotected from longterm pins when users
> > called get_user_pages_fast().
> >
> > Use the new FOLL_LONGTERM flag to check for DEVMAP pages and fall
> > back to regular GUP processing if a DEVMAP page is encountered.
> >
> > Signed-off-by: Ira Weiny 
> > ---
> >  mm/gup.c | 29 +
> >  1 file changed, 25 insertions(+), 4 deletions(-)
> >
> > diff --git a/mm/gup.c b/mm/gup.c
> > index 0684a9536207..173db0c44678 100644
> > --- a/mm/gup.c
> > +++ b/mm/gup.c
> > @@ -1600,6 +1600,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long 
> > addr, unsigned long end,
> > goto pte_unmap;
> >
> > if (pte_devmap(pte)) {
> > +   if (unlikely(flags & FOLL_LONGTERM))
> > +   goto pte_unmap;
> > +
> > pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
> > if (unlikely(!pgmap)) {
> > undo_dev_pagemap(nr, nr_start, pages);
> > @@ -1739,8 +1742,11 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, 
> > unsigned long addr,
> > if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
> > return 0;
> >
> > -   if (pmd_devmap(orig))
> > +   if (pmd_devmap(orig)) {
> > +   if (unlikely(flags & FOLL_LONGTERM))
> > +   return 0;
> > return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, 
> > nr);
> > +   }
> >
> > refs = 0;
> > page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
> > @@ -1777,8 +1783,11 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, 
> > unsigned long addr,
> > if (!pud_access_permitted(orig, flags & FOLL_WRITE))
> > return 0;
> >
> > -   if (pud_devmap(orig))
> > +   if (pud_devmap(orig)) {
> > +   if (unlikely(flags & FOLL_LONGTERM))
> > +   return 0;
> > return __gup_device_huge_pud(orig, pudp, addr, end, pages, 
> > nr);
> > +   }
> >
> > refs = 0;
> > page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
> > @@ -2066,8 +2075,20 @@ int get_user_pages_fast(unsigned long start, int 
> > nr_pages,
> > start += nr << PAGE_SHIFT;
> > pages += nr;
> >
> > -   ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
> > - gup_flags);
> > +   if (gup_flags & FOLL_LONGTERM) {
> > +   down_read(¤t->mm->mmap_sem);
> > +   ret = __gup_longterm_locked(current, current->mm,
> > +   start, nr_pages - nr,
> > +   pages, NULL, gup_flags);
> > +   up_read(¤t->mm->mmap_sem);
> > +   } else {
> > +   /*
> > +* retain FAULT_FOLL_ALLOW_RETRY optimization if
> > +* possible
> > +*/
> > +   ret = get_user_pages_unlocked(start, nr_pages - nr,
> > + pages, gup_flags);
> 
> I couldn't immediately grok why this path needs to branch on
> FOLL_LONGTERM? Won't get_user_pages_unlocked(..., FOLL_LONGTERM) do
> the right thing?

Unfortunately holding the lock is required to support FOLL_LONGTERM (to check
the VMAs) but we don't want to hold the lock to be optimal (specifically allow
FAULT_FOLL_ALLOW_RETRY).  So I'm maintaining the optimization for *_fast users
who do not specify FOLL_LONGTERM.

Another way to do this would have been to define __gup_longterm_unlocked with
the above logic, but that seemed overkill at this point.

Ira



Re: [RESEND 3/7] mm/gup: Change GUP fast to use flags rather than a write 'bool'

2019-03-25 Thread Ira Weiny
On Fri, Mar 22, 2019 at 03:05:53PM -0700, Dan Williams wrote:
> On Sun, Mar 17, 2019 at 7:36 PM  wrote:
> >
> > From: Ira Weiny 
> >
> > To facilitate additional options to get_user_pages_fast() change the
> > singular write parameter to be gup_flags.
> >
> > This patch does not change any functionality.  New functionality will
> > follow in subsequent patches.
> >
> > Some of the get_user_pages_fast() call sites were unchanged because they
> > already passed FOLL_WRITE or 0 for the write parameter.
> >
> > Signed-off-by: Ira Weiny 
> >
> > ---
> > Changes from V1:
> > Rebase to current merge tree
> > arch/powerpc/mm/mmu_context_iommu.c no longer calls gup_fast
> > The gup_longterm was converted in patch 1
> >
> >  arch/mips/mm/gup.c | 11 ++-
> >  arch/powerpc/kvm/book3s_64_mmu_hv.c|  4 ++--
> >  arch/powerpc/kvm/e500_mmu.c|  2 +-
> >  arch/s390/kvm/interrupt.c  |  2 +-
> >  arch/s390/mm/gup.c | 12 ++--
> >  arch/sh/mm/gup.c   | 11 ++-
> >  arch/sparc/mm/gup.c|  9 +
> >  arch/x86/kvm/paging_tmpl.h |  2 +-
> >  arch/x86/kvm/svm.c |  2 +-
> >  drivers/fpga/dfl-afu-dma-region.c  |  2 +-
> >  drivers/gpu/drm/via/via_dmablit.c  |  3 ++-
> >  drivers/infiniband/hw/hfi1/user_pages.c|  3 ++-
> >  drivers/misc/genwqe/card_utils.c   |  2 +-
> >  drivers/misc/vmw_vmci/vmci_host.c  |  2 +-
> >  drivers/misc/vmw_vmci/vmci_queue_pair.c|  6 --
> >  drivers/platform/goldfish/goldfish_pipe.c  |  3 ++-
> >  drivers/rapidio/devices/rio_mport_cdev.c   |  4 +++-
> >  drivers/sbus/char/oradax.c |  2 +-
> >  drivers/scsi/st.c  |  3 ++-
> >  drivers/staging/gasket/gasket_page_table.c |  4 ++--
> >  drivers/tee/tee_shm.c  |  2 +-
> >  drivers/vfio/vfio_iommu_spapr_tce.c|  3 ++-
> >  drivers/vhost/vhost.c  |  2 +-
> >  drivers/video/fbdev/pvr2fb.c   |  2 +-
> >  drivers/virt/fsl_hypervisor.c  |  2 +-
> >  drivers/xen/gntdev.c   |  2 +-
> >  fs/orangefs/orangefs-bufmap.c  |  2 +-
> >  include/linux/mm.h |  4 ++--
> >  kernel/futex.c |  2 +-
> >  lib/iov_iter.c |  7 +--
> >  mm/gup.c   | 10 +-
> >  mm/util.c  |  8 
> >  net/ceph/pagevec.c |  2 +-
> >  net/rds/info.c |  2 +-
> >  net/rds/rdma.c |  3 ++-
> >  35 files changed, 79 insertions(+), 63 deletions(-)
> 
> 
> >
> > diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
> > index 0d14e0d8eacf..4c2b4483683c 100644
> > --- a/arch/mips/mm/gup.c
> > +++ b/arch/mips/mm/gup.c
> > @@ -235,7 +235,7 @@ int __get_user_pages_fast(unsigned long start, int 
> > nr_pages, int write,
> >   * get_user_pages_fast() - pin user pages in memory
> >   * @start: starting user address
> >   * @nr_pages:  number of pages from start to pin
> > - * @write: whether pages will be written to
> > + * @gup_flags: flags modifying pin behaviour
> >   * @pages: array that receives pointers to the pages pinned.
> >   * Should be at least nr_pages long.
> >   *
> > @@ -247,8 +247,8 @@ int __get_user_pages_fast(unsigned long start, int 
> > nr_pages, int write,
> >   * requested. If nr_pages is 0 or negative, returns 0. If no pages
> >   * were pinned, returns -errno.
> >   */
> > -int get_user_pages_fast(unsigned long start, int nr_pages, int write,
> > -   struct page **pages)
> > +int get_user_pages_fast(unsigned long start, int nr_pages,
> > +   unsigned int gup_flags, struct page **pages)
> 
> This looks a tad scary given all related thrash especially when it's
> only 1 user that wants to do get_user_page_fast_longterm, right?

Agreed but the discussion back in Feb agreed that it would be better to make
get_user_pages_fast() use flags rather than add another *_longterm call, and I
agree.

> Maybe
> something like the following. Note I explicitly moved the flags to the
> end so that someone half paying attention that calls
> __get_user_pages_fast will get a compile error if they specify the
> args in the same order.

I did this to remain consistent with the other public calls.  They put the
"return" pages parameter at the end.  For example get_user_pages() is defined
this way with pages and vmas at the end.

long get_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas)

I'm pretty sure I got all the callers.  Is this worth making the signature
"non-standard" WRT the other calls?

Ir

Re: [PATCH] kbuild: strip whitespace in cmd_record_mcount findstring

2019-03-25 Thread Steven Rostedt
On Mon, 25 Mar 2019 12:04:38 -0400
Joe Lawrence  wrote:

> CC_FLAGS_FTRACE may contain trailing whitespace that interferes with
> findstring.
> 
> For example, commit 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on
> GCC 4.9 and newer") introduced a change such that on my ppc64le box,
> CC_FLAGS_FTRACE="-pg -mprofile-kernel ".  (Note the trailing space.)
> When cmd_record_mcount is now invoked, findstring fails as the ftrace
> flags were found at very end of _c_flags, without the trailing space.
> 
>   _c_flags=" ... -pg -mprofile-kernel"
>   CC_FLAGS_FTRACE="-pg -mprofile-kernel "
>^
> findstring is looking for this extra space

Wow, what a bug.

> 
> Remove the redundant whitespaces from CC_FLAGS_FTRACE in
> cmd_record_mcount to avoid this problem.
> 
> Fixes: 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on GCC 4.9 and newer").
> Signed-off-by: Joe Lawrence 

Acked-by: Steven Rostedt (VMware) 

-- Steve

> ---
> 
> Standard disclaimer: I'm not a kbuild expert, but this works around the
> problem I reported where ftrace and livepatch self-tests were failing as
> specified object files were not run through the recordmcount.pl script:
> 
> ppc64le: ftrace self-tests and $(CC_FLAGS_FTRACE) broken?
> https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-March/187298.html
> 
>  scripts/Makefile.build | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/scripts/Makefile.build b/scripts/Makefile.build
> index 2554a15ecf2b..74d402b5aa3c 100644
> --- a/scripts/Makefile.build
> +++ b/scripts/Makefile.build
> @@ -199,10 +199,10 @@ sub_cmd_record_mcount = perl 
> $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
>   "$(if $(part-of-module),1,0)" "$(@)";
>  recordmcount_source := $(srctree)/scripts/recordmcount.pl
>  endif # BUILD_C_RECORDMCOUNT
> -cmd_record_mcount =  \
> - if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" =   \
> -  "$(CC_FLAGS_FTRACE)" ]; then   \
> - $(sub_cmd_record_mcount)\
> +cmd_record_mcount =  \
> + if [ "$(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags))" =  \
> +  "$(strip $(CC_FLAGS_FTRACE))" ]; then  \
> + $(sub_cmd_record_mcount)\
>   fi
>  endif # CC_USING_RECORD_MCOUNT
>  endif # CONFIG_FTRACE_MCOUNT_RECORD



Re: Regression in 5.1.0-rc2: PowerBook G4 Aluminum fails to boot - bisected to commit 0df977eafc792

2019-03-25 Thread Larry Finger

On 3/25/19 3:46 AM, Christophe Leroy wrote:



Le 25/03/2019 à 01:49, Larry Finger a écrit :
A build of kernel 5.1.0-rc2 resulted in a failure to boot on my PowerBook G4 
Aluminum. The bootstrap loads the initial kernel and issues the appropriate 
start, but the machine hangs at that point.


The problem does not depend on the choice of PPC32 processor type. This 
machine has a 7447A according to /proc/cpuinfo.


The problem was bisected to the following:

commit 0df977eafc792a5365a7f81d8d5920132e03afad
Author: Christophe Leroy 
Date:   Thu Feb 21 10:37:54 2019 +

 powerpc/6xx: Don't use SPRN_SPRG2 for storing stack pointer while in RTAS

 When calling RTAS, the stack pointer is stored in SPRN_SPRG2
 in order to be able to restore it in case of machine check in RTAS.

 As machine check is not a perfomance critical path, this patch
 frees SPRN_SPRG2 by using a field in thread struct instead.

 Signed-off-by: Christophe Leroy 
 Signed-off-by: Michael Ellerman 

I reverted this patch and found that the system began execution, and then 
failed, likely due to the reassignment of SPRN_SPRG2.


I had found this problem with 5.1.0-rc1, but -rc2 was out by the time I 
finished the bisection. Unfortunately, none of the changes in -rc2 fixed the 
problem.


I think I identified the issue, see https://patchwork.ozlabs.org/patch/1063962/

It is now booting properly under QEMU with your .config

Could you please test it on your target ?


Thanks. That patch fixes the issue.

Larry





Re: [PATCH] powerpc/rtas: fix early boot failure.

2019-03-25 Thread Larry Finger

On 3/25/19 3:43 AM, Christophe Leroy wrote:

Commit 0df977eafc79 ("powerpc/6xx: Don't use SPRN_SPRG2 for storing
stack pointer while in RTAS") changes the code to use a field in
thread struct to store the stack pointer while in RTAS instead of
using SPRN_SPRG2. It therefore converts all places which were
manipulating SPRN_SPRG2 to use that field. During early startup,
the zeroing of SPRN_SPRG2 has been replaced by a zeroing of that
field in thread struct. But at least in start_here, that's done
wrongly because it used the physical address of the fields while
MMU is on at that time.

So the virtual address of the field should be used instead, but in
the meantime, thread struct has already been zeroised and initialised
so we can just drop this initialisation.

Reported-by: Larry Finger 
Fixes: 0df977eafc79 ("powerpc/6xx: Don't use SPRN_SPRG2 for storing stack pointer 
while in RTAS")
Signed-off-by: Christophe Leroy 


Tested-by: Larry Finger 

My PPC box now boots OK.

Larry



Re: ppc64le: ftrace self-tests and $(CC_FLAGS_FTRACE) broken?

2019-03-25 Thread Joe Lawrence

On 3/23/19 1:27 PM, Joe Lawrence wrote:

On 03/23/2019 12:17 PM, Steven Rostedt wrote:

On Sat, 23 Mar 2019 09:19:50 -0400
Joe Lawrence  wrote:


Perhaps this is gcc version specific?  I didn't see any other reports,
so maybe its specific to my config.  If I run make V=1, I can see that
gcc is called with '-pg -mprofile-kernel', but then the record_mcount
script is skipped.  Any ideas?


But you see it running the script if you remove that commit? Do you
happen to see -mrecord-mcount at all in a V=1 make?



See entire make output from the two runs here:
http://people.redhat.com/~jolawren/ppc64le-mprofile/


Mystery solved!  That other commit inadvertently added a trailing space 
to CC_FLAGS_FTRACE, which then confused the findstring call in the 
cmd_record_mcount Makefile function.


Workaround patch just posted ... kbuild experts can suggest better fixes 
if that one is sub optimal.


Thanks,

-- Joe


[PATCH] kbuild: strip whitespace in cmd_record_mcount findstring

2019-03-25 Thread Joe Lawrence
CC_FLAGS_FTRACE may contain trailing whitespace that interferes with
findstring.

For example, commit 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on
GCC 4.9 and newer") introduced a change such that on my ppc64le box,
CC_FLAGS_FTRACE="-pg -mprofile-kernel ".  (Note the trailing space.)
When cmd_record_mcount is now invoked, findstring fails as the ftrace
flags were found at very end of _c_flags, without the trailing space.

  _c_flags=" ... -pg -mprofile-kernel"
  CC_FLAGS_FTRACE="-pg -mprofile-kernel "
   ^
findstring is looking for this extra space

Remove the redundant whitespaces from CC_FLAGS_FTRACE in
cmd_record_mcount to avoid this problem.

Fixes: 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on GCC 4.9 and newer").
Signed-off-by: Joe Lawrence 
---

Standard disclaimer: I'm not a kbuild expert, but this works around the
problem I reported where ftrace and livepatch self-tests were failing as
specified object files were not run through the recordmcount.pl script:

ppc64le: ftrace self-tests and $(CC_FLAGS_FTRACE) broken?
https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-March/187298.html

 scripts/Makefile.build | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 2554a15ecf2b..74d402b5aa3c 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -199,10 +199,10 @@ sub_cmd_record_mcount = perl 
$(srctree)/scripts/recordmcount.pl "$(ARCH)" \
"$(if $(part-of-module),1,0)" "$(@)";
 recordmcount_source := $(srctree)/scripts/recordmcount.pl
 endif # BUILD_C_RECORDMCOUNT
-cmd_record_mcount =\
-   if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" =   \
-"$(CC_FLAGS_FTRACE)" ]; then   \
-   $(sub_cmd_record_mcount)\
+cmd_record_mcount =\
+   if [ "$(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags))" =  \
+"$(strip $(CC_FLAGS_FTRACE))" ]; then  \
+   $(sub_cmd_record_mcount)\
fi
 endif # CC_USING_RECORD_MCOUNT
 endif # CONFIG_FTRACE_MCOUNT_RECORD
-- 
2.20.1



Re: Regression in 5.1.0-rc2: PowerBook G4 Aluminum fails to boot - bisected to commit 0df977eafc792

2019-03-25 Thread Larry Finger

On 3/25/19 1:53 AM, Christophe Leroy wrote:



Le 25/03/2019 à 01:49, Larry Finger a écrit :
A build of kernel 5.1.0-rc2 resulted in a failure to boot on my PowerBook G4 
Aluminum. The bootstrap loads the initial kernel and issues the appropriate 
start, but the machine hangs at that point.


Can you please be more explicit ? What do you mean by "issues the appropriate 
start" ? What is "that point" ? Any messages on the console ?


Sorry to be unclear. The bootstrap code prints a line saying "Booting Linux via 
__start() @ 0x00c0" and then hangs.


Your idea of confusion between physical and virtual address sounds right. I am 
building a kernel with that patch applied now. As it seems to be going quickly, 
I should have the answer fairly quickly.


Thanks,

Larry




Re: [PATCH v3 7/7] ocxl: Provide global MMIO accessors for external drivers

2019-03-25 Thread Frederic Barrat




Le 25/03/2019 à 06:44, Alastair D'Silva a écrit :

From: Alastair D'Silva 

External drivers that communicate via OpenCAPI will need to make
MMIO calls to interact with the devices.

Signed-off-by: Alastair D'Silva 
Reviewed-by: Greg Kurz 
---



Acked-by: Frederic Barrat 



  drivers/misc/ocxl/Makefile |   2 +-
  drivers/misc/ocxl/mmio.c   | 234 +
  include/misc/ocxl.h| 110 +
  3 files changed, 345 insertions(+), 1 deletion(-)
  create mode 100644 drivers/misc/ocxl/mmio.c

diff --git a/drivers/misc/ocxl/Makefile b/drivers/misc/ocxl/Makefile
index bc4e39bfda7b..d07d1bb8e8d4 100644
--- a/drivers/misc/ocxl/Makefile
+++ b/drivers/misc/ocxl/Makefile
@@ -1,7 +1,7 @@
  # SPDX-License-Identifier: GPL-2.0+
  ccflags-$(CONFIG_PPC_WERROR)  += -Werror
  
-ocxl-y+= main.o pci.o config.o file.o pasid.o

+ocxl-y += main.o pci.o config.o file.o pasid.o mmio.o
  ocxl-y+= link.o context.o afu_irq.o sysfs.o 
trace.o
  ocxl-y+= core.o
  obj-$(CONFIG_OCXL)+= ocxl.o
diff --git a/drivers/misc/ocxl/mmio.c b/drivers/misc/ocxl/mmio.c
new file mode 100644
index ..aae713db4ebe
--- /dev/null
+++ b/drivers/misc/ocxl/mmio.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2019 IBM Corp.
+#include 
+#include "trace.h"
+#include "ocxl_internal.h"
+
+int ocxl_global_mmio_read32(struct ocxl_afu *afu, size_t offset,
+   enum ocxl_endian endian, u32 *val)
+{
+   if (offset > afu->config.global_mmio_size - 4)
+   return -EINVAL;
+
+#ifdef __BIG_ENDIAN__
+   if (endian == OCXL_HOST_ENDIAN)
+   endian = OCXL_BIG_ENDIAN;
+#endif
+
+   switch (endian) {
+   case OCXL_BIG_ENDIAN:
+   *val = readl_be((char *)afu->global_mmio_ptr + offset);
+   break;
+
+   default:
+   *val = readl((char *)afu->global_mmio_ptr + offset);
+   break;
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_global_mmio_read32);
+
+int ocxl_global_mmio_read64(struct ocxl_afu *afu, size_t offset,
+   enum ocxl_endian endian, u64 *val)
+{
+   if (offset > afu->config.global_mmio_size - 8)
+   return -EINVAL;
+
+#ifdef __BIG_ENDIAN__
+   if (endian == OCXL_HOST_ENDIAN)
+   endian = OCXL_BIG_ENDIAN;
+#endif
+
+   switch (endian) {
+   case OCXL_BIG_ENDIAN:
+   *val = readq_be((char *)afu->global_mmio_ptr + offset);
+   break;
+
+   default:
+   *val = readq((char *)afu->global_mmio_ptr + offset);
+   break;
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_global_mmio_read64);
+
+int ocxl_global_mmio_write32(struct ocxl_afu *afu, size_t offset,
+   enum ocxl_endian endian, u32 val)
+{
+   if (offset > afu->config.global_mmio_size - 4)
+   return -EINVAL;
+
+#ifdef __BIG_ENDIAN__
+   if (endian == OCXL_HOST_ENDIAN)
+   endian = OCXL_BIG_ENDIAN;
+#endif
+
+   switch (endian) {
+   case OCXL_BIG_ENDIAN:
+   writel_be(val, (char *)afu->global_mmio_ptr + offset);
+   break;
+
+   default:
+   writel(val, (char *)afu->global_mmio_ptr + offset);
+   break;
+   }
+
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_global_mmio_write32);
+
+int ocxl_global_mmio_write64(struct ocxl_afu *afu, size_t offset,
+   enum ocxl_endian endian, u64 val)
+{
+   if (offset > afu->config.global_mmio_size - 8)
+   return -EINVAL;
+
+#ifdef __BIG_ENDIAN__
+   if (endian == OCXL_HOST_ENDIAN)
+   endian = OCXL_BIG_ENDIAN;
+#endif
+
+   switch (endian) {
+   case OCXL_BIG_ENDIAN:
+   writeq_be(val, (char *)afu->global_mmio_ptr + offset);
+   break;
+
+   default:
+   writeq(val, (char *)afu->global_mmio_ptr + offset);
+   break;
+   }
+
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_global_mmio_write64);
+
+int ocxl_global_mmio_set32(struct ocxl_afu *afu, size_t offset,
+   enum ocxl_endian endian, u32 mask)
+{
+   u32 tmp;
+
+   if (offset > afu->config.global_mmio_size - 4)
+   return -EINVAL;
+
+#ifdef __BIG_ENDIAN__
+   if (endian == OCXL_HOST_ENDIAN)
+   endian = OCXL_BIG_ENDIAN;
+#endif
+
+   switch (endian) {
+   case OCXL_BIG_ENDIAN:
+   tmp = readl_be((char *)afu->global_mmio_ptr + offset);
+   tmp |= mask;
+   writel_be(tmp, (char *)afu->global_mmio_ptr + offset);
+   break;
+
+   default:
+   tmp = readl((char *)afu->global_mmio_ptr + offset);
+   tmp |= mask;
+   writel(tmp, (char *)afu->global_mmio_ptr + offset);
+   break;
+   }
+
+  

Re: [PATCH v3 6/7] ocxl: move event_fd handling to frontend

2019-03-25 Thread Frederic Barrat




Le 25/03/2019 à 06:44, Alastair D'Silva a écrit :

From: Alastair D'Silva 

Event_fd is only used in the driver frontend, so it does not
need to exist in the backend code. Relocate it to the frontend
and provide an opaque mechanism for consumers instead.

Signed-off-by: Alastair D'Silva 
---
  drivers/misc/ocxl/afu_irq.c   | 76 ++-
  drivers/misc/ocxl/file.c  | 22 -
  drivers/misc/ocxl/ocxl_internal.h |  5 --
  include/misc/ocxl.h   | 46 +++
  4 files changed, 111 insertions(+), 38 deletions(-)

diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c
index 2d410cd6f817..d71e62df7d31 100644
--- a/drivers/misc/ocxl/afu_irq.c
+++ b/drivers/misc/ocxl/afu_irq.c
@@ -1,7 +1,7 @@
  // SPDX-License-Identifier: GPL-2.0+
  // Copyright 2017 IBM Corp.
  #include 
-#include 
+#include 
  #include "ocxl_internal.h"
  #include "trace.h"
  
@@ -11,7 +11,9 @@ struct afu_irq {

unsigned int virq;
char *name;
u64 trigger_page;
-   struct eventfd_ctx *ev_ctx;
+   irqreturn_t (*handler)(void *private);
+   void (*free_private)(void *private);
+   void *private;
  };
  
  int ocxl_irq_offset_to_id(struct ocxl_context *ctx, u64 offset)

@@ -24,14 +26,43 @@ u64 ocxl_irq_id_to_offset(struct ocxl_context *ctx, int 
irq_id)
return ctx->afu->irq_base_offset + (irq_id << PAGE_SHIFT);
  }
  
+int ocxl_irq_set_handler(struct ocxl_context *ctx, int irq_id,

+   irqreturn_t (*handler)(void *private),
+   void (*free_private)(void *private),
+   void *private)
+{
+   struct afu_irq *irq;
+   int rc;
+
+   mutex_lock(&ctx->irq_lock);
+   irq = idr_find(&ctx->irq_idr, irq_id);
+   if (!irq) {
+   rc = -EINVAL;
+   goto unlock;
+   }
+
+   irq->handler = handler;
+   irq->private = private;
+
+   rc = 0;
+   goto unlock;



That goto is too much.



+
+unlock:
+   mutex_unlock(&ctx->irq_lock);
+   return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_irq_set_handler);
+
  static irqreturn_t afu_irq_handler(int virq, void *data)
  {
struct afu_irq *irq = (struct afu_irq *) data;
  
  	trace_ocxl_afu_irq_receive(virq);

-   if (irq->ev_ctx)
-   eventfd_signal(irq->ev_ctx, 1);
-   return IRQ_HANDLED;
+
+   if (irq->handler)
+   return irq->handler(irq->private);
+
+   return IRQ_HANDLED; // Just drop it on the ground
  }
  
  static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq)

@@ -118,6 +149,8 @@ int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int 
*irq_id)
return rc;
  }
  
+EXPORT_SYMBOL_GPL(ocxl_afu_irq_alloc);



nit: checkpatch doesn't like the empty line between the function and its 
symbol export. Also true for a few other symbols in that file.





diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
index a8fe0ce4ea67..1b48e9d63abb 100644
--- a/include/misc/ocxl.h
+++ b/include/misc/ocxl.h
@@ -161,6 +161,52 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr,
   */
  int ocxl_context_detach(struct ocxl_context *ctx);
  
+// AFU IRQs

+
+/**
+ * Allocate an IRQ associated with an AFU context
+ * @ctx: the AFU context
+ * @irq_id: out, the IRQ ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id);
+
+/**
+ * Frees an IRQ associated with an AFU context
+ * @ctx: the AFU context
+ * @irq_id: the IRQ ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+extern int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id);
+
+/**
+ * Gets the address of the trigger page for an IRQ
+ * This can then be provided to an AFU which will write to that
+ * page to trigger the IRQ.
+ * @ctx: The AFU context that the IRQ is associated with
+ * @irq_id: The IRQ ID
+ *
+ * returns the trigger page address, or 0 if the IRQ is not valid
+ */
+extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id);
+
+/**
+ * Provide a callback to be called when an IRQ is triggered
+ * @ctx: The AFU context that the IRQ is associated with
+ * @irq_id: The IRQ ID
+ * @handler: the callback to be called when the IRQ is triggered
+ * @free_private: the callback to be called when the IRQ is freed


We should mention free_private can be NULL, in which case it is ignored.

  Fred



Re: [PATCH v5 3/3] locking/rwsem: Optimize down_read_trylock()

2019-03-25 Thread Christophe Leroy

Hi,

Could you share the microbenchmark you are using ?

I'd like to test the series on powerpc.

Thanks
Christophe

Le 22/03/2019 à 15:30, Waiman Long a écrit :

Modify __down_read_trylock() to optimize for an unlocked rwsem and make
it generate slightly better code.

Before this patch, down_read_trylock:

0x <+0>: callq  0x5 
0x0005 <+5>: jmp0x18 
0x0007 <+7>: lea0x1(%rdx),%rcx
0x000b <+11>:mov%rdx,%rax
0x000e <+14>:lock cmpxchg %rcx,(%rdi)
0x0013 <+19>:cmp%rax,%rdx
0x0016 <+22>:je 0x23 
0x0018 <+24>:mov(%rdi),%rdx
0x001b <+27>:test   %rdx,%rdx
0x001e <+30>:jns0x7 
0x0020 <+32>:xor%eax,%eax
0x0022 <+34>:retq
0x0023 <+35>:mov%gs:0x0,%rax
0x002c <+44>:or $0x3,%rax
0x0030 <+48>:mov%rax,0x20(%rdi)
0x0034 <+52>:mov$0x1,%eax
0x0039 <+57>:retq

After patch, down_read_trylock:

0x <+0>:  callq  0x5 
0x0005 <+5>:  xor%eax,%eax
0x0007 <+7>:  lea0x1(%rax),%rdx
0x000b <+11>: lock cmpxchg %rdx,(%rdi)
0x0010 <+16>: jne0x29 
0x0012 <+18>: mov%gs:0x0,%rax
0x001b <+27>: or $0x3,%rax
0x001f <+31>: mov%rax,0x20(%rdi)
0x0023 <+35>: mov$0x1,%eax
0x0028 <+40>: retq
0x0029 <+41>: test   %rax,%rax
0x002c <+44>: jns0x7 
0x002e <+46>: xor%eax,%eax
0x0030 <+48>: retq

By using a rwsem microbenchmark, the down_read_trylock() rate (with a
load of 10 to lengthen the lock critical section) on a x86-64 system
before and after the patch were:

  Before PatchAfter Patch
# of Threads rlock   rlock
 -   -
 1   14,496  14,716
 28,644   8,453
46,799   6,983
85,664   7,190

On a ARM64 system, the performance results were:

  Before PatchAfter Patch
# of Threads rlock   rlock
 -   -
 1   23,676  24,488
 27,697   9,502
 44,945   3,440
 82,641   1,603

For the uncontended case (1 thread), the new down_read_trylock() is a
little bit faster. For the contended cases, the new down_read_trylock()
perform pretty well in x86-64, but performance degrades at high
contention level on ARM64.

Suggested-by: Linus Torvalds 
Signed-off-by: Waiman Long 
---
  kernel/locking/rwsem.h | 13 -
  1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 45ee00236e03..1f5775aa6a1d 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -174,14 +174,17 @@ static inline int __down_read_killable(struct 
rw_semaphore *sem)
  
  static inline int __down_read_trylock(struct rw_semaphore *sem)

  {
-   long tmp;
+   /*
+* Optimize for the case when the rwsem is not locked at all.
+*/
+   long tmp = RWSEM_UNLOCKED_VALUE;
  
-	while ((tmp = atomic_long_read(&sem->count)) >= 0) {

-   if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
-  tmp + RWSEM_ACTIVE_READ_BIAS)) {
+   do {
+   if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
+   tmp + RWSEM_ACTIVE_READ_BIAS)) {
return 1;
}
-   }
+   } while (tmp >= 0);
return 0;
  }
  



Re: [PATCH v3 5/7] ocxl: afu_irq only deals with IRQ IDs, not offsets

2019-03-25 Thread Frederic Barrat




Le 25/03/2019 à 06:44, Alastair D'Silva a écrit :

From: Alastair D'Silva 

The use of offsets is required only in the frontend, so alter
the IRQ API to only work with IRQ IDs in the backend.

Signed-off-by: Alastair D'Silva 
---



Acked-by: Frederic Barrat 



  drivers/misc/ocxl/afu_irq.c   | 34 +++
  drivers/misc/ocxl/context.c   |  7 +--
  drivers/misc/ocxl/file.c  | 13 +++-
  drivers/misc/ocxl/ocxl_internal.h | 10 +
  drivers/misc/ocxl/trace.h | 12 ---
  5 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c
index 11ab996657a2..2d410cd6f817 100644
--- a/drivers/misc/ocxl/afu_irq.c
+++ b/drivers/misc/ocxl/afu_irq.c
@@ -14,14 +14,14 @@ struct afu_irq {
struct eventfd_ctx *ev_ctx;
  };
  
-static int irq_offset_to_id(struct ocxl_context *ctx, u64 offset)

+int ocxl_irq_offset_to_id(struct ocxl_context *ctx, u64 offset)
  {
return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT;
  }
  
-static u64 irq_id_to_offset(struct ocxl_context *ctx, int id)

+u64 ocxl_irq_id_to_offset(struct ocxl_context *ctx, int irq_id)
  {
-   return ctx->afu->irq_base_offset + (id << PAGE_SHIFT);
+   return ctx->afu->irq_base_offset + (irq_id << PAGE_SHIFT);
  }
  
  static irqreturn_t afu_irq_handler(int virq, void *data)

@@ -69,7 +69,7 @@ static void release_afu_irq(struct afu_irq *irq)
kfree(irq->name);
  }
  
-int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset)

+int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id)
  {
struct afu_irq *irq;
int rc;
@@ -101,11 +101,11 @@ int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 
*irq_offset)
if (rc)
goto err_alloc;
  
-	*irq_offset = irq_id_to_offset(ctx, irq->id);

-
-   trace_ocxl_afu_irq_alloc(ctx->pasid, irq->id, irq->virq, irq->hw_irq,
-   *irq_offset);
+   trace_ocxl_afu_irq_alloc(ctx->pasid, irq->id, irq->virq, irq->hw_irq);
mutex_unlock(&ctx->irq_lock);
+
+   *irq_id = irq->id;
+
return 0;
  
  err_alloc:

@@ -123,7 +123,7 @@ static void afu_irq_free(struct afu_irq *irq, struct 
ocxl_context *ctx)
trace_ocxl_afu_irq_free(ctx->pasid, irq->id);
if (ctx->mapping)
unmap_mapping_range(ctx->mapping,
-   irq_id_to_offset(ctx, irq->id),
+   ocxl_irq_id_to_offset(ctx, irq->id),
1 << PAGE_SHIFT, 1);
release_afu_irq(irq);
if (irq->ev_ctx)
@@ -132,14 +132,13 @@ static void afu_irq_free(struct afu_irq *irq, struct 
ocxl_context *ctx)
kfree(irq);
  }
  
-int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset)

+int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id)
  {
struct afu_irq *irq;
-   int id = irq_offset_to_id(ctx, irq_offset);
  
  	mutex_lock(&ctx->irq_lock);
  
-	irq = idr_find(&ctx->irq_idr, id);

+   irq = idr_find(&ctx->irq_idr, irq_id);
if (!irq) {
mutex_unlock(&ctx->irq_lock);
return -EINVAL;
@@ -161,14 +160,14 @@ void ocxl_afu_irq_free_all(struct ocxl_context *ctx)
mutex_unlock(&ctx->irq_lock);
  }
  
-int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, int eventfd)

+int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, int irq_id, int eventfd)
  {
struct afu_irq *irq;
struct eventfd_ctx *ev_ctx;
-   int rc = 0, id = irq_offset_to_id(ctx, irq_offset);
+   int rc = 0;
  
  	mutex_lock(&ctx->irq_lock);

-   irq = idr_find(&ctx->irq_idr, id);
+   irq = idr_find(&ctx->irq_idr, irq_id);
if (!irq) {
rc = -EINVAL;
goto unlock;
@@ -186,14 +185,13 @@ int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 
irq_offset, int eventfd)
return rc;
  }
  
-u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset)

+u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id)
  {
struct afu_irq *irq;
-   int id = irq_offset_to_id(ctx, irq_offset);
u64 addr = 0;
  
  	mutex_lock(&ctx->irq_lock);

-   irq = idr_find(&ctx->irq_idr, id);
+   irq = idr_find(&ctx->irq_idr, irq_id);
if (irq)
addr = irq->trigger_page;
mutex_unlock(&ctx->irq_lock);
diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
index 8b97b0f19db8..d6056883b85d 100644
--- a/drivers/misc/ocxl/context.c
+++ b/drivers/misc/ocxl/context.c
@@ -93,8 +93,9 @@ static vm_fault_t map_afu_irq(struct vm_area_struct *vma, 
unsigned long address,
u64 offset, struct ocxl_context *ctx)
  {
u64 trigger_addr;
+   int irq_id = ocxl_irq_offset_to_id(ctx, offset);
  
-	trigger_addr = ocxl_afu_irq_get_addr(ctx, offset);

+   trigger_addr = ocxl_afu_irq_get_addr(ctx, irq_id);
if (!trigger_addr)

Re: [PATCH v3 4/7] ocxl: Allow external drivers to use OpenCAPI contexts

2019-03-25 Thread Frederic Barrat




Le 25/03/2019 à 06:44, Alastair D'Silva a écrit :

From: Alastair D'Silva 

Most OpenCAPI operations require a valid context, so
exposing these functions to external drivers is necessary.

Signed-off-by: Alastair D'Silva 
Reviewed-by: Greg Kurz 
---


See comment on previous patch regarding merging ocxl_context_alloc() and 
ocxl_context_init(), it would impact the exported symbols.


  Fred



  drivers/misc/ocxl/context.c   |  9 +--
  drivers/misc/ocxl/file.c  |  2 +-
  drivers/misc/ocxl/ocxl_internal.h |  6 -
  include/misc/ocxl.h   | 45 +++
  4 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
index c73a859d2224..8b97b0f19db8 100644
--- a/drivers/misc/ocxl/context.c
+++ b/drivers/misc/ocxl/context.c
@@ -8,6 +8,7 @@ struct ocxl_context *ocxl_context_alloc(void)
  {
return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
  }
+EXPORT_SYMBOL_GPL(ocxl_context_alloc);
  
  int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,

struct address_space *mapping)
@@ -43,6 +44,7 @@ int ocxl_context_init(struct ocxl_context *ctx, struct 
ocxl_afu *afu,
ocxl_afu_get(afu);
return 0;
  }
+EXPORT_SYMBOL_GPL(ocxl_context_init);
  
  /*

   * Callback for when a translation fault triggers an error
@@ -63,7 +65,7 @@ static void xsl_fault_error(void *data, u64 addr, u64 dsisr)
wake_up_all(&ctx->events_wq);
  }
  
-int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)

+int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct 
*mm)
  {
int rc;
  
@@ -75,7 +77,7 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)

}
  
  	rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid,

-   current->mm->context.id, ctx->tidr, amr, current->mm,
+   mm->context.id, ctx->tidr, amr, mm,
xsl_fault_error, ctx);
if (rc)
goto out;
@@ -85,6 +87,7 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)
mutex_unlock(&ctx->status_mutex);
return rc;
  }
+EXPORT_SYMBOL_GPL(ocxl_context_attach);
  
  static vm_fault_t map_afu_irq(struct vm_area_struct *vma, unsigned long address,

u64 offset, struct ocxl_context *ctx)
@@ -243,6 +246,7 @@ int ocxl_context_detach(struct ocxl_context *ctx)
}
return 0;
  }
+EXPORT_SYMBOL_GPL(ocxl_context_detach);
  
  void ocxl_context_detach_all(struct ocxl_afu *afu)

  {
@@ -280,3 +284,4 @@ void ocxl_context_free(struct ocxl_context *ctx)
ocxl_afu_put(ctx->afu);
kfree(ctx);
  }
+EXPORT_SYMBOL_GPL(ocxl_context_free);
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index e6e6121cd9a3..e51578186fd4 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -94,7 +94,7 @@ static long afu_ioctl_attach(struct ocxl_context *ctx,
return -EINVAL;
  
  	amr = arg.amr & mfspr(SPRN_UAMOR);

-   rc = ocxl_context_attach(ctx, amr);
+   rc = ocxl_context_attach(ctx, amr, current->mm);
return rc;
  }
  
diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h

index e04e547df29e..cda1e7667fc8 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -130,15 +130,9 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
   */
  int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid);
  
-struct ocxl_context *ocxl_context_alloc(void);

-int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
-   struct address_space *mapping);
-int ocxl_context_attach(struct ocxl_context *ctx, u64 amr);
  int ocxl_context_mmap(struct ocxl_context *ctx,
struct vm_area_struct *vma);
-int ocxl_context_detach(struct ocxl_context *ctx);
  void ocxl_context_detach_all(struct ocxl_afu *afu);
-void ocxl_context_free(struct ocxl_context *ctx);
  
  int ocxl_sysfs_register_afu(struct ocxl_afu *afu);

  void ocxl_sysfs_unregister_afu(struct ocxl_afu *afu);
diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
index 8bafd748e380..a8fe0ce4ea67 100644
--- a/include/misc/ocxl.h
+++ b/include/misc/ocxl.h
@@ -116,6 +116,51 @@ const struct ocxl_fn_config *ocxl_function_config(struct 
ocxl_fn *fn);
   */
  void ocxl_function_close(struct ocxl_fn *fn);
  
+// Context allocation

+
+/**
+ * Allocate space for a new OpenCAPI context
+ *
+ * Returns NULL on failure
+ */
+struct ocxl_context *ocxl_context_alloc(void);
+
+/**
+ * Initialize an OpenCAPI context
+ *
+ * @ctx: The OpenCAPI context to initialize
+ * @afu: The AFU the context belongs to
+ * @mapping: The mapping to unmap when the context is closed (may be NULL)
+ */
+int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
+   struct address_space *mapping);
+
+/**
+ * Free an OpenCAPI context
+ *
+ * 

Re: [PATCH v3 3/7] ocxl: Create a clear delineation between ocxl backend & frontend

2019-03-25 Thread Frederic Barrat
This is a huge patch, there are probably ways to split it in smaller 
pieces to make the review easier. However, considering how much time 
we've already invested discussing and reviewing it, it's with by me to 
keep it as is.
The ref-counting and device management look good to me now. A few 
details below.




--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -17,12 +17,10 @@ static struct class *ocxl_class;
  static struct mutex minors_idr_lock;
  static struct idr minors_idr;
  
-static struct ocxl_afu *find_and_get_afu(dev_t devno)

+static struct ocxl_file_info *find_file_info(dev_t devno)
  {
-   struct ocxl_afu *afu;
-   int afu_minor;
+   struct ocxl_file_info *info;
  
-	afu_minor = MINOR(devno);

/*
 * We don't declare an RCU critical section here, as our AFU
 * is protected by a reference counter on the device. By the time the
@@ -30,56 +28,52 @@ static struct ocxl_afu *find_and_get_afu(dev_t devno)
 * the device is already at 0, so no user API will access that AFU and
 * this function can't return it.
 */



The comment is still true, but needs tuning. Something like:
"We don't declare an RCU critical section here, as our info structure is 
protected by a reference counter on the device. By the time the info 
reference is removed from the idr, the ref count of the device is 
already at 0, so no user API will access the corresponding AFU and this 
function can't return it."




-   afu = idr_find(&minors_idr, afu_minor);
-   if (afu)
-   ocxl_afu_get(afu);
-   return afu;
+   info = idr_find(&minors_idr, MINOR(devno));
+   return info;
  }
  
-static int allocate_afu_minor(struct ocxl_afu *afu)

+static int allocate_minor(struct ocxl_file_info *info)
  {
int minor;
  
  	mutex_lock(&minors_idr_lock);

-   minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL);
+   minor = idr_alloc(&minors_idr, info, 0, OCXL_NUM_MINORS, GFP_KERNEL);
mutex_unlock(&minors_idr_lock);
return minor;
  }
  
-static void free_afu_minor(struct ocxl_afu *afu)

+static void free_minor(struct ocxl_file_info *info)
  {
mutex_lock(&minors_idr_lock);
-   idr_remove(&minors_idr, MINOR(afu->dev.devt));
+   idr_remove(&minors_idr, MINOR(info->dev.devt));
mutex_unlock(&minors_idr_lock);
  }
  
  static int afu_open(struct inode *inode, struct file *file)

  {
-   struct ocxl_afu *afu;
+   struct ocxl_file_info *info;
struct ocxl_context *ctx;
int rc;
  
  	pr_debug("%s for device %x\n", __func__, inode->i_rdev);
  
-	afu = find_and_get_afu(inode->i_rdev);

-   if (!afu)
+   info = find_file_info(inode->i_rdev);
+   if (!info)
return -ENODEV;
  
  	ctx = ocxl_context_alloc();

if (!ctx) {
rc = -ENOMEM;
-   goto put_afu;
+   goto err;
}
  
-	rc = ocxl_context_init(ctx, afu, inode->i_mapping);

+   rc = ocxl_context_init(ctx, info->afu, inode->i_mapping);
if (rc)
-   goto put_afu;
+   goto err;
file->private_data = ctx;
-   ocxl_afu_put(afu);
return 0;
  
-put_afu:

-   ocxl_afu_put(afu);
+err:
return rc;



The error path with goto is here useless. However, if 
ocxl_context_init() fails, the memory for the context is never released.
You may consider either getting rid of ocxl_context_alloc(), which is 
just a simple wrapper around kzalloc(), or merging the allocation in 
ocxl_context_init(). It would impact the external API, but having 2 
calls (alloc and init) feels like there's one too many.





+static int ocxl_file_make_visible(struct ocxl_afu *afu)
  {
int rc;
+   struct ocxl_file_info *info = ocxl_afu_get_private(afu);
  
-	cdev_init(&afu->cdev, &ocxl_afu_fops);

-   rc = cdev_add(&afu->cdev, afu->dev.devt, 1);
+   cdev_init(&info->cdev, &ocxl_afu_fops);
+   rc = cdev_add(&info->cdev, info->dev.devt, 1);
if (rc) {
-   dev_err(&afu->dev, "Unable to add afu char device: %d\n", rc);
+   dev_err(&info->dev, "Unable to add afu char device: %d\n", rc);
return rc;
}
+
return 0;
  }
  
-void ocxl_destroy_cdev(struct ocxl_afu *afu)

+void ocxl_file_make_invisible(struct ocxl_afu *afu)



This function is not called anywhere?




-void ocxl_unregister_afu(struct ocxl_afu *afu)
+void ocxl_file_unregister_afu(struct ocxl_afu *afu)
  {
-   free_afu_minor(afu);
+   struct ocxl_file_info *info = ocxl_afu_get_private(afu);
+
+   if (!info)
+   return;
+



So that's likely where we miss the "make invisible" call. However, is it 
enough to rely on the private data to be set on the AFU?





diff --git a/drivers/misc/ocxl/ocxl_internal.h 
b/drivers/misc/ocxl/ocxl_internal.h
index 81086534dab5..e04e547df29e 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_intern

[PATCH 2/2] arch: add pidfd and io_uring syscalls everywhere

2019-03-25 Thread Arnd Bergmann
Add the io_uring and pidfd_send_signal system calls to all architectures.

These system calls are designed to handle both native and compat tasks,
so all entries are the same across architectures, only arm-compat and
the generic tale still use an old format.

Signed-off-by: Arnd Bergmann 
---
 arch/alpha/kernel/syscalls/syscall.tbl  | 4 
 arch/arm/tools/syscall.tbl  | 4 
 arch/arm64/include/asm/unistd.h | 2 +-
 arch/arm64/include/asm/unistd32.h   | 8 
 arch/ia64/kernel/syscalls/syscall.tbl   | 4 
 arch/m68k/kernel/syscalls/syscall.tbl   | 4 
 arch/microblaze/kernel/syscalls/syscall.tbl | 4 
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 4 
 arch/mips/kernel/syscalls/syscall_n64.tbl   | 4 
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 4 
 arch/parisc/kernel/syscalls/syscall.tbl | 4 
 arch/powerpc/kernel/syscalls/syscall.tbl| 4 
 arch/s390/kernel/syscalls/syscall.tbl   | 4 
 arch/sh/kernel/syscalls/syscall.tbl | 4 
 arch/sparc/kernel/syscalls/syscall.tbl  | 4 
 arch/xtensa/kernel/syscalls/syscall.tbl | 4 
 16 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
b/arch/alpha/kernel/syscalls/syscall.tbl
index 63ed39cbd3bd..165f268beafc 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -463,3 +463,7 @@
 532common  getppid sys_getppid
 # all other architectures have common numbers for new syscall, alpha
 # is the exception.
+534common  pidfd_send_signal   sys_pidfd_send_signal
+535common  io_uring_setup  sys_io_uring_setup
+536common  io_uring_enter  sys_io_uring_enter
+537common  io_uring_register   sys_io_uring_register
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 9016f4081bb9..0393917eaa57 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -437,3 +437,7 @@
 421common  rt_sigtimedwait_time64  sys_rt_sigtimedwait
 422common  futex_time64sys_futex
 423common  sched_rr_get_interval_time64sys_sched_rr_get_interval
+424common  pidfd_send_signal   sys_pidfd_send_signal
+425common  io_uring_setup  sys_io_uring_setup
+426common  io_uring_enter  sys_io_uring_enter
+427common  io_uring_register   sys_io_uring_register
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 310d8f1cae7a..c6946fe640e6 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -49,7 +49,7 @@
 #define __ARM_NR_compat_set_tls(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls   424
+#define __NR_compat_syscalls   428
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index 5590f2623690..23f1a44acada 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -866,6 +866,14 @@ __SYSCALL(__NR_rt_sigtimedwait_time64, 
compat_sys_rt_sigtimedwait_time64)
 __SYSCALL(__NR_futex_time64, sys_futex)
 #define __NR_sched_rr_get_interval_time64 423
 __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
+#define __NR_io_uring_setup 425
+__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
+#define __NR_io_uring_enter 426
+__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
+#define __NR_io_uring_register 427
+__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl 
b/arch/ia64/kernel/syscalls/syscall.tbl
index ab9cda5f6136..56e3d0b685e1 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -344,3 +344,7 @@
 332common  pkey_free   sys_pkey_free
 333common  rseqsys_rseq
 # 334 through 423 are reserved to sync up with other architectures
+424common  pidfd_send_signal   sys_pidfd_send_signal
+425common  io_uring_setup  sys_io_uring_setup
+426common  io_uring_enter  sys_io_uring_enter
+427common  io_uring_register   sys_io_uring_register
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl 
b/arch/m68k/kernel/syscalls/syscall.tbl
index 125c14178979..df4ec3ec71d1 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -423,3 +423,7 @@
 421common  rt_sigtimedwait_time64  sys_rt_sigtimedwait
 422common  futex_time64sys_futex
 4

Re: [RESEND 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM

2019-03-25 Thread Ira Weiny
On Fri, Mar 22, 2019 at 02:24:40PM -0700, Dan Williams wrote:
> On Sun, Mar 17, 2019 at 7:36 PM  wrote:
> >
> > From: Ira Weiny 
> >
> > Rather than have a separate get_user_pages_longterm() call,
> > introduce FOLL_LONGTERM and change the longterm callers to use
> > it.
> >
> > This patch does not change any functionality.
> >
> > FOLL_LONGTERM can only be supported with get_user_pages() as it
> > requires vmas to determine if DAX is in use.
> >
> > CC: Aneesh Kumar K.V 
> > CC: Andrew Morton 
> > CC: Michal Hocko 
> > Signed-off-by: Ira Weiny 
> [..]
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 2d483dbdffc0..6831077d126c 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> [..]
> > @@ -2609,6 +2596,7 @@ struct page *follow_page(struct vm_area_struct *vma, 
> > unsigned long address,
> >  #define FOLL_REMOTE0x2000  /* we are working on non-current tsk/mm */
> >  #define FOLL_COW   0x4000  /* internal GUP flag */
> >  #define FOLL_ANON  0x8000  /* don't do file mappings */
> > +#define FOLL_LONGTERM  0x1 /* mapping is intended for a long term pin 
> > */
> 
> Let's change this comment to say something like /* mapping lifetime is
> indefinite / at the discretion of userspace */, since "longterm is not
> well defined.
> 
> I think it should also include a /* FIXME: */ to say something about
> the havoc a long term pin might wreak on fs and mm code paths.

Will do.

> 
> >  static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
> >  {
> > diff --git a/mm/gup.c b/mm/gup.c
> > index f84e22685aaa..8cb4cff067bc 100644
> > --- a/mm/gup.c
> > +++ b/mm/gup.c
> > @@ -1112,26 +1112,7 @@ long get_user_pages_remote(struct task_struct *tsk, 
> > struct mm_struct *mm,
> >  }
> >  EXPORT_SYMBOL(get_user_pages_remote);
> >
> > -/*
> > - * This is the same as get_user_pages_remote(), just with a
> > - * less-flexible calling convention where we assume that the task
> > - * and mm being operated on are the current task's and don't allow
> > - * passing of a locked parameter.  We also obviously don't pass
> > - * FOLL_REMOTE in here.
> > - */
> > -long get_user_pages(unsigned long start, unsigned long nr_pages,
> > -   unsigned int gup_flags, struct page **pages,
> > -   struct vm_area_struct **vmas)
> > -{
> > -   return __get_user_pages_locked(current, current->mm, start, 
> > nr_pages,
> > -  pages, vmas, NULL,
> > -  gup_flags | FOLL_TOUCH);
> > -}
> > -EXPORT_SYMBOL(get_user_pages);
> > -
> >  #if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)
> > -
> > -#ifdef CONFIG_FS_DAX
> >  static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
> >  {
> > long i;
> > @@ -1150,12 +1131,6 @@ static bool check_dax_vmas(struct vm_area_struct 
> > **vmas, long nr_pages)
> > }
> > return false;
> >  }
> > -#else
> > -static inline bool check_dax_vmas(struct vm_area_struct **vmas, long 
> > nr_pages)
> > -{
> > -   return false;
> > -}
> > -#endif
> >
> >  #ifdef CONFIG_CMA
> >  static struct page *new_non_cma_page(struct page *page, unsigned long 
> > private)
> > @@ -1209,10 +1184,13 @@ static struct page *new_non_cma_page(struct page 
> > *page, unsigned long private)
> > return __alloc_pages_node(nid, gfp_mask, 0);
> >  }
> >
> > -static long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
> > -   unsigned int gup_flags,
> > +static long check_and_migrate_cma_pages(struct task_struct *tsk,
> > +   struct mm_struct *mm,
> > +   unsigned long start,
> > +   unsigned long nr_pages,
> > struct page **pages,
> > -   struct vm_area_struct **vmas)
> > +   struct vm_area_struct **vmas,
> > +   unsigned int gup_flags)
> >  {
> > long i;
> > bool drain_allow = true;
> > @@ -1268,10 +1246,14 @@ static long check_and_migrate_cma_pages(unsigned 
> > long start, long nr_pages,
> > putback_movable_pages(&cma_page_list);
> > }
> > /*
> > -* We did migrate all the pages, Try to get the page 
> > references again
> > -* migrating any new CMA pages which we failed to isolate 
> > earlier.
> > +* We did migrate all the pages, Try to get the page 
> > references
> > +* again migrating any new CMA pages which we failed to 
> > isolate
> > +* earlier.
> >  */
> > -   nr_pages = get_user_pages(start, nr_pages, gup_flags, 
> > pages, vmas);
> > +   nr_pages = __get_user_pages_locked(tsk, mm, start, nr_pages,
> > +  

Re: [PATCH v3] powerpc/64: Fix memcmp reading past the end of src/dest

2019-03-25 Thread Michael Ellerman
Segher Boessenkool  writes:
> On Fri, Mar 22, 2019 at 11:37:24PM +1100, Michael Ellerman wrote:
>>  .Lcmp_rest_lt8bytes:
>> -/* Here we have only less than 8 bytes to compare with. at least s1
>> - * Address is aligned with 8 bytes.
>> - * The next double words are load and shift right with appropriate
>> - * bits.
>> +/*
>> + * Here we have less than 8 bytes to compare. At least s1 is aligned to
>> + * 8 bytes, but s2 may not be. We must make sure s2 + 8 doesn't cross a
>
> "s2 + 7"?  The code is fine though (bgt, not bge).

Duh, thanks for catching it.

>> + * page boundary, otherwise we might read past the end of the buffer and
>> + * trigger a page fault. We use 4K as the conservative minimum page
>> + * size. If we detect that case we go to the byte-by-byte loop.
>> + *
>> + * Otherwise the next double word is loaded from s1 and s2, and shifted
>> + * right to compare the appropriate bits.
>>   */
>> +clrldi  r6,r4,(64-12)   // r6 = r4 & 0xfff
>
> You can just write
>   rlwinm r6,r4,0,0x0fff

> if that is clearer?  Or do you still want a comment with that :-)

I don't think it's clearer doing a rotate of zero bits :)

And yeah I'd probably still leave the comment, so I'm inclined to stick
with the clrldi?

Would be nice if the assembler could support:

andir6, r4, 0x0fff

And turn it into the rlwinm, or rldicl :)

>> +cmpdi   r6,0xff8
>> +bgt .Lshort
>
> Reviewed-by: Segher Boessenkool 

I'll fixup the comment. Thanks.

cheers


PowerPC fixes 5.1-3: CONFIG_SPARSEMEM doesn't exist in the kernel source code

2019-03-25 Thread Christian Zigotzky
I was able to activate CONFIG_SPARSEMEM in the kernel configuration. But 
does the P.A. Semi Nemo board need this option?


-- Christian

On 25 March 2019 at 12:00PM, Christian Zigotzky wrote:

Hi All,

I wasn't able to compile the RC2 today because of the following error 
messages:


  CC  arch/powerpc/mm/slb.o
In file included from ./arch/powerpc/include/asm/book3s/64/mmu.h:39:0,
 from ./arch/powerpc/include/asm/mmu.h:360,
 from ./arch/powerpc/include/asm/lppaca.h:36,
 from ./arch/powerpc/include/asm/paca.h:21,
 from ./arch/powerpc/include/asm/current.h:16,
 from ./include/linux/thread_info.h:21,
 from ./include/asm-generic/preempt.h:5,
 from ./arch/powerpc/include/generated/asm/preempt.h:1,
 from ./include/linux/preempt.h:78,
 from ./include/linux/spinlock.h:51,
 from ./include/linux/mmzone.h:8,
 from ./include/linux/gfp.h:6,
 from ./include/linux/mm.h:10,
 from ./arch/powerpc/include/asm/cacheflush.h:12,
 from ./arch/powerpc/include/asm/asm-prototypes.h:16,
 from arch/powerpc/mm/slb.c:17:
./arch/powerpc/include/asm/book3s/64/mmu-hash.h:584:6: warning: 
"MAX_PHYSMEM_BITS" is not defined [-Wundef]

 #if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
  ^
arch/powerpc/mm/slb.c: In function 'slb_allocate_kernel':
arch/powerpc/mm/slb.c:697:37: error: 'MAX_PHYSMEM_BITS' undeclared 
(first use in this function)

   if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS))
 ^
arch/powerpc/mm/slb.c:697:37: note: each undeclared identifier is 
reported only once for each function it appears in
scripts/Makefile.build:278: recipe for target 'arch/powerpc/mm/slb.o' 
failed

make[3]: *** [arch/powerpc/mm/slb.o] Error 1
scripts/Makefile.build:489: recipe for target 'arch/powerpc/mm' failed
make[2]: *** [arch/powerpc/mm] Error 2
/home/christian/Downloads/a/Makefile:1046: recipe for target 
'arch/powerpc' failed

make[1]: *** [arch/powerpc] Error 2
Makefile:170: recipe for target 'sub-make' failed
make: *** [sub-make] Error 2

-

The variable MAX_PHYSMEM_BITS isn't defined. The problem is in the 
last PowerPC fixes 5.1-3: 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h=v5.1-rc2&id=a5ed1e96cafde5ba48638f486bfca0685dc6ddc9


Commit log: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM 
configurations


Problematic fix:

diff --git a/arch/powerpc/include/asm/mmu.h 
b/arch/powerpc/include/asm/mmu.h

index d34ad1657d7b..598cdcdd1355 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -352,7 +352,7 @@ static inline bool strict_kernel_rwx_enabled(void)
 #if defined(CONFIG_SPARSEMEM_VMEMMAP) && 
defined(CONFIG_SPARSEMEM_EXTREME) &&   \

    defined (CONFIG_PPC_64K_PAGES)
 #define MAX_PHYSMEM_BITS    51
-#else
+#elif defined(CONFIG_SPARSEMEM)
 #define MAX_PHYSMEM_BITS    46
 #endif

The first if statement isn't successfull because the variables 
CONFIG_SPARSEMEM_EXTREME and CONFIG_PPC_64K_PAGES aren't activated in 
our kernel configuration. Therefore we need MAX_PHYSMEM_BITS 46 for 
our Nemo board. Unfortunately CONFIG_SPARSEMEM doesn't exist in the 
kernel source code so we can't activate it in the kernel config.


I replaced '#elif defined(CONFIG_SPARSEMEM)' with '#else' and after 
that the compiling of the RC2 works again.


Please check if CONFIG_SPARSEMEM exists.

Thanks,
Christian




Re: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations

2019-03-25 Thread Christophe Leroy




Le 25/03/2019 à 12:35, Michael Ellerman a écrit :

Ben Hutchings  writes:

On Mon, 2019-03-25 at 01:03 +0100, Andreas Schwab wrote:

On Mär 24 2019, Ben Hutchings  wrote:


Presumably you have CONFIG_PPC_BOOK3S_64 enabled and
CONFIG_SPARSEMEM
disabled?  Was this configuration actually usable?


Why not?


I assume that CONFIG_SPARSEMEM is the default for a good reason.
What I don't know is how strong that reason is (I am not a Power expert
at all).  Looking a bit further, it seems to be related to CONFIG_NUMA
in that you can enable CONFIG_FLATMEM if and only if that's disabled.
So I suppose the configuration you used works for non-NUMA systems.


Aneesh pointed out this fix would break FLATMEM after I'd merged it, but
it didn't break any of our defconfigs so I wondered if anyone would
notice.

I checked today and a G5 will boot with FLATMEM, which I assume is what
Andreas is using.

I guess we should fix this build break for now.

Even some G5's have discontiguous memory, so FLATMEM is not clearly a
good choice even for all G5's, and actually a fresh g5_defconfig uses
SPARSEMEM.

So I'm inclined to just switch to always using SPARSEMEM on 64-bit
Book3S, because that's what's well tested and we hardly need more code
paths to test. Unless anyone has a strong objection, I haven't actually
benchmarked FLATMEM vs SPARSEMEM on a G5.



Christian Zigotzky reported a build failure with this patch.

Christophe


Re: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations

2019-03-25 Thread Michael Ellerman
Ben Hutchings  writes:
> On Mon, 2019-03-25 at 01:03 +0100, Andreas Schwab wrote:
>> On Mär 24 2019, Ben Hutchings  wrote:
>> 
>> > Presumably you have CONFIG_PPC_BOOK3S_64 enabled and
>> > CONFIG_SPARSEMEM
>> > disabled?  Was this configuration actually usable?
>> 
>> Why not?
>
> I assume that CONFIG_SPARSEMEM is the default for a good reason.
> What I don't know is how strong that reason is (I am not a Power expert
> at all).  Looking a bit further, it seems to be related to CONFIG_NUMA
> in that you can enable CONFIG_FLATMEM if and only if that's disabled. 
> So I suppose the configuration you used works for non-NUMA systems.

Aneesh pointed out this fix would break FLATMEM after I'd merged it, but
it didn't break any of our defconfigs so I wondered if anyone would
notice.

I checked today and a G5 will boot with FLATMEM, which I assume is what
Andreas is using.

I guess we should fix this build break for now.

Even some G5's have discontiguous memory, so FLATMEM is not clearly a
good choice even for all G5's, and actually a fresh g5_defconfig uses
SPARSEMEM.

So I'm inclined to just switch to always using SPARSEMEM on 64-bit
Book3S, because that's what's well tested and we hardly need more code
paths to test. Unless anyone has a strong objection, I haven't actually
benchmarked FLATMEM vs SPARSEMEM on a G5.

cheers


PowerPC fixes 5.1-3: CONFIG_SPARSEMEM doesn't exist in the kernel source code

2019-03-25 Thread Christian Zigotzky

Hi All,

I wasn't able to compile the RC2 today because of the following error 
messages:


  CC  arch/powerpc/mm/slb.o
In file included from ./arch/powerpc/include/asm/book3s/64/mmu.h:39:0,
 from ./arch/powerpc/include/asm/mmu.h:360,
 from ./arch/powerpc/include/asm/lppaca.h:36,
 from ./arch/powerpc/include/asm/paca.h:21,
 from ./arch/powerpc/include/asm/current.h:16,
 from ./include/linux/thread_info.h:21,
 from ./include/asm-generic/preempt.h:5,
 from ./arch/powerpc/include/generated/asm/preempt.h:1,
 from ./include/linux/preempt.h:78,
 from ./include/linux/spinlock.h:51,
 from ./include/linux/mmzone.h:8,
 from ./include/linux/gfp.h:6,
 from ./include/linux/mm.h:10,
 from ./arch/powerpc/include/asm/cacheflush.h:12,
 from ./arch/powerpc/include/asm/asm-prototypes.h:16,
 from arch/powerpc/mm/slb.c:17:
./arch/powerpc/include/asm/book3s/64/mmu-hash.h:584:6: warning: 
"MAX_PHYSMEM_BITS" is not defined [-Wundef]

 #if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
  ^
arch/powerpc/mm/slb.c: In function 'slb_allocate_kernel':
arch/powerpc/mm/slb.c:697:37: error: 'MAX_PHYSMEM_BITS' undeclared 
(first use in this function)

   if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS))
 ^
arch/powerpc/mm/slb.c:697:37: note: each undeclared identifier is 
reported only once for each function it appears in

scripts/Makefile.build:278: recipe for target 'arch/powerpc/mm/slb.o' failed
make[3]: *** [arch/powerpc/mm/slb.o] Error 1
scripts/Makefile.build:489: recipe for target 'arch/powerpc/mm' failed
make[2]: *** [arch/powerpc/mm] Error 2
/home/christian/Downloads/a/Makefile:1046: recipe for target 
'arch/powerpc' failed

make[1]: *** [arch/powerpc] Error 2
Makefile:170: recipe for target 'sub-make' failed
make: *** [sub-make] Error 2

-

The variable MAX_PHYSMEM_BITS isn't defined. The problem is in the last 
PowerPC fixes 5.1-3: 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h=v5.1-rc2&id=a5ed1e96cafde5ba48638f486bfca0685dc6ddc9


Commit log: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM 
configurations


Problematic fix:

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index d34ad1657d7b..598cdcdd1355 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -352,7 +352,7 @@ static inline bool strict_kernel_rwx_enabled(void)
 #if defined(CONFIG_SPARSEMEM_VMEMMAP) && 
defined(CONFIG_SPARSEMEM_EXTREME) &&   \

    defined (CONFIG_PPC_64K_PAGES)
 #define MAX_PHYSMEM_BITS    51
-#else
+#elif defined(CONFIG_SPARSEMEM)
 #define MAX_PHYSMEM_BITS    46
 #endif

The first if statement isn't successfull because the variables 
CONFIG_SPARSEMEM_EXTREME and CONFIG_PPC_64K_PAGES aren't activated in 
our kernel configuration. Therefore we need MAX_PHYSMEM_BITS 46 for our 
Nemo board. Unfortunately CONFIG_SPARSEMEM doesn't exist in the kernel 
source code so we can't activate it in the kernel config.


I replaced '#elif defined(CONFIG_SPARSEMEM)' with '#else' and after that 
the compiling of the RC2 works again.


Please check if CONFIG_SPARSEMEM exists.

Thanks,
Christian


Re: [PATCH v2] powerpc/8xx: fix possible object reference leak

2019-03-25 Thread Michael Ellerman
Peng Hao  writes:

> From: Wen Yang 
>
> The call to of_find_compatible_node returns a node pointer with refcount
> incremented thus it must be explicitly decremented after the last
> usage.
> irq_domain_add_linear also calls of_node_get to increase refcount,
> so irq_domain will not be affected when it is released.
>
> Detected by coccinelle with the following warnings:
> ./arch/powerpc/platforms/8xx/pic.c:158:1-7: ERROR: missing of_node_put; 
> acquired a node pointer with refcount incremented on line 136, but without a 
> corresponding object release within this function.
>
> Fixes: a8db8cf0d894 ("irq_domain: Replace irq_alloc_host() with
> revmap-specific initializers")
> Signed-off-by: Wen Yang 
> Suggested-by: Christophe Leroy 
> Reviewed-by: Peng Hao 
> Cc: Vitaly Bordug 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-ker...@vger.kernel.org
> ---
>  arch/powerpc/platforms/8xx/pic.c | 2 --
>  1 file changed, 2 deletions(-)
>
> diff --git a/arch/powerpc/platforms/8xx/pic.c 
> b/arch/powerpc/platforms/8xx/pic.c
> index 8d5a25d..4453df6 100644
> --- a/arch/powerpc/platforms/8xx/pic.c
> +++ b/arch/powerpc/platforms/8xx/pic.c
> @@ -153,9 +153,7 @@ int mpc8xx_pic_init(void)
>   if (mpc8xx_pic_host == NULL) {
>   printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
>   ret = -ENOMEM;
> - goto out;
>   }
> - return 0;

It's fragile to rely on ret being equal to zero here. If the code
further up is changed it's easy enough to miss that ret is expected to
be zero.

Better to set it to zero here explicitly, this is the success path after
all, eg:

ret = 0;

>  out:
>   of_node_put(np);


cheers


Re: [PATCH] compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING

2019-03-25 Thread Arnd Bergmann
On Mon, Mar 25, 2019 at 8:55 AM Masahiro Yamada
 wrote:
>
> On Mon, Mar 25, 2019 at 4:33 PM Arnd Bergmann  wrote:
> >
> > On Mon, Mar 25, 2019 at 7:11 AM Masahiro Yamada
> >  wrote:
> > > I do not know why to reproduce it,
> > > but is "__init __noreturn" more sensible than
> > > "__always_inline" here?
> >
> > It's in a header file, so it has to be 'inline'. We could make it
> > static inline __init __noreturn,
>
> Yes, I like 'static inline __init __noreturn'
>
> > but I don't see an advantage over
> > __always_inline there.
>
> __always_inline takes away the compiler's freedom.
>
> I'd like to leave it up to the compiler where possible.

Ok, fair enough.

  Arnd


Re: [PATCH] compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING

2019-03-25 Thread Arnd Bergmann
On Wed, Mar 20, 2019 at 2:34 PM Arnd Bergmann  wrote:
>
> On Wed, Mar 20, 2019 at 10:41 AM Arnd Bergmann  wrote:
> >
> > I've added your patch to my randconfig test setup and will let you
> > know if I see anything noticeable. I'm currently testing clang-arm32,
> > clang-arm64 and gcc-x86.
>
> This is the only additional bug that has come up so far:
>
> `.exit.text' referenced in section `.alt.smp.init' of
> drivers/char/ipmi/ipmi_msghandler.o: defined in discarded section
> `exit.text' of drivers/char/ipmi/ipmi_msghandler.o

FWIW, the message up there does not match the patch anyway,
that was an unrelated bug.

 Arnd


Re: [PATCH v3 2/7] ocxl: Don't pass pci_dev around

2019-03-25 Thread Frederic Barrat




Le 25/03/2019 à 06:44, Alastair D'Silva a écrit :

From: Alastair D'Silva 

This data is already available in a struct

Signed-off-by: Alastair D'Silva 
---


Acked-by: Frederic Barrat 



  drivers/misc/ocxl/core.c | 38 +-
  1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/drivers/misc/ocxl/core.c b/drivers/misc/ocxl/core.c
index 1a4411b72d35..2f2fe12eac1e 100644
--- a/drivers/misc/ocxl/core.c
+++ b/drivers/misc/ocxl/core.c
@@ -66,10 +66,11 @@ static int set_afu_device(struct ocxl_afu *afu, const char 
*location)
return rc;
  }
  
-static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev)

+static int assign_afu_actag(struct ocxl_afu *afu)
  {
struct ocxl_fn *fn = afu->fn;
int actag_count, actag_offset;
+   struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent);
  
  	/*

 * if there were not enough actags for the function, each afu
@@ -79,16 +80,16 @@ static int assign_afu_actag(struct ocxl_afu *afu, struct 
pci_dev *dev)
fn->actag_enabled / fn->actag_supported;
actag_offset = ocxl_actag_afu_alloc(fn, actag_count);
if (actag_offset < 0) {
-   dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n",
+   dev_err(&pci_dev->dev, "Can't allocate %d actags for AFU: %d\n",
actag_count, actag_offset);
return actag_offset;
}
afu->actag_base = fn->actag_base + actag_offset;
afu->actag_enabled = actag_count;
  
-	ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos,

+   ocxl_config_set_afu_actag(pci_dev, afu->config.dvsec_afu_control_pos,
afu->actag_base, afu->actag_enabled);
-   dev_dbg(&afu->dev, "actag base=%d enabled=%d\n",
+   dev_dbg(&pci_dev->dev, "actag base=%d enabled=%d\n",
afu->actag_base, afu->actag_enabled);
return 0;
  }
@@ -103,10 +104,11 @@ static void reclaim_afu_actag(struct ocxl_afu *afu)
ocxl_actag_afu_free(afu->fn, start_offset, size);
  }
  
-static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev)

+static int assign_afu_pasid(struct ocxl_afu *afu)
  {
struct ocxl_fn *fn = afu->fn;
int pasid_count, pasid_offset;
+   struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent);
  
  	/*

 * We only support the case where the function configuration
@@ -115,7 +117,7 @@ static int assign_afu_pasid(struct ocxl_afu *afu, struct 
pci_dev *dev)
pasid_count = 1 << afu->config.pasid_supported_log;
pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count);
if (pasid_offset < 0) {
-   dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n",
+   dev_err(&pci_dev->dev, "Can't allocate %d PASIDs for AFU: %d\n",
pasid_count, pasid_offset);
return pasid_offset;
}
@@ -123,10 +125,10 @@ static int assign_afu_pasid(struct ocxl_afu *afu, struct 
pci_dev *dev)
afu->pasid_count = 0;
afu->pasid_max = pasid_count;
  
-	ocxl_config_set_afu_pasid(dev, afu->config.dvsec_afu_control_pos,

+   ocxl_config_set_afu_pasid(pci_dev, afu->config.dvsec_afu_control_pos,
afu->pasid_base,
afu->config.pasid_supported_log);
-   dev_dbg(&afu->dev, "PASID base=%d, enabled=%d\n",
+   dev_dbg(&pci_dev->dev, "PASID base=%d, enabled=%d\n",
afu->pasid_base, pasid_count);
return 0;
  }
@@ -172,9 +174,10 @@ static void release_fn_bar(struct ocxl_fn *fn, int bar)
WARN_ON(fn->bar_used[idx] < 0);
  }
  
-static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev)

+static int map_mmio_areas(struct ocxl_afu *afu)
  {
int rc;
+   struct pci_dev *pci_dev = to_pci_dev(afu->fn->dev.parent);
  
  	rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar);

if (rc)
@@ -187,10 +190,10 @@ static int map_mmio_areas(struct ocxl_afu *afu, struct 
pci_dev *dev)
}
  
  	afu->global_mmio_start =

-   pci_resource_start(dev, afu->config.global_mmio_bar) +
+   pci_resource_start(pci_dev, afu->config.global_mmio_bar) +
afu->config.global_mmio_offset;
afu->pp_mmio_start =
-   pci_resource_start(dev, afu->config.pp_mmio_bar) +
+   pci_resource_start(pci_dev, afu->config.pp_mmio_bar) +
afu->config.pp_mmio_offset;
  
  	afu->global_mmio_ptr = ioremap(afu->global_mmio_start,

@@ -198,7 +201,7 @@ static int map_mmio_areas(struct ocxl_afu *afu, struct 
pci_dev *dev)
if (!afu->global_mmio_ptr) {
release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
release_fn_bar(afu->fn, afu->config.global_mmio_bar);
-   dev_err(&dev->dev, "Error mapping global mmio area\n");
+   dev_err(&pci_dev->dev, "Error mapping global mmio area\n");
 

Re: [PATCH v3 1/7] ocxl: Split pci.c

2019-03-25 Thread Frederic Barrat




Le 25/03/2019 à 06:44, Alastair D'Silva a écrit :

From: Alastair D'Silva 

In preparation for making core code available for external drivers,
move the core code out of pci.c and into core.c

Signed-off-by: Alastair D'Silva 
---


Acked-by: Frederic Barrat 



  drivers/misc/ocxl/Makefile|   1 +
  drivers/misc/ocxl/core.c  | 517 +
  drivers/misc/ocxl/ocxl_internal.h |   5 +
  drivers/misc/ocxl/pci.c   | 519 +-
  4 files changed, 524 insertions(+), 518 deletions(-)
  create mode 100644 drivers/misc/ocxl/core.c

diff --git a/drivers/misc/ocxl/Makefile b/drivers/misc/ocxl/Makefile
index 5229dcda8297..bc4e39bfda7b 100644
--- a/drivers/misc/ocxl/Makefile
+++ b/drivers/misc/ocxl/Makefile
@@ -3,6 +3,7 @@ ccflags-$(CONFIG_PPC_WERROR)+= -Werror
  
  ocxl-y+= main.o pci.o config.o file.o pasid.o

  ocxl-y+= link.o context.o afu_irq.o sysfs.o 
trace.o
+ocxl-y += core.o
  obj-$(CONFIG_OCXL)+= ocxl.o
  
  # For tracepoints to include our trace.h from tracepoint infrastructure:

diff --git a/drivers/misc/ocxl/core.c b/drivers/misc/ocxl/core.c
new file mode 100644
index ..1a4411b72d35
--- /dev/null
+++ b/drivers/misc/ocxl/core.c
@@ -0,0 +1,517 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2019 IBM Corp.
+#include 
+#include "ocxl_internal.h"
+
+static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn)
+{
+   return (get_device(&fn->dev) == NULL) ? NULL : fn;
+}
+
+static void ocxl_fn_put(struct ocxl_fn *fn)
+{
+   put_device(&fn->dev);
+}
+
+struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu)
+{
+   return (get_device(&afu->dev) == NULL) ? NULL : afu;
+}
+
+void ocxl_afu_put(struct ocxl_afu *afu)
+{
+   put_device(&afu->dev);
+}
+
+static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn)
+{
+   struct ocxl_afu *afu;
+
+   afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL);
+   if (!afu)
+   return NULL;
+
+   mutex_init(&afu->contexts_lock);
+   mutex_init(&afu->afu_control_lock);
+   idr_init(&afu->contexts_idr);
+   afu->fn = fn;
+   ocxl_fn_get(fn);
+   return afu;
+}
+
+static void free_afu(struct ocxl_afu *afu)
+{
+   idr_destroy(&afu->contexts_idr);
+   ocxl_fn_put(afu->fn);
+   kfree(afu);
+}
+
+static void free_afu_dev(struct device *dev)
+{
+   struct ocxl_afu *afu = to_ocxl_afu(dev);
+
+   ocxl_unregister_afu(afu);
+   free_afu(afu);
+}
+
+static int set_afu_device(struct ocxl_afu *afu, const char *location)
+{
+   struct ocxl_fn *fn = afu->fn;
+   int rc;
+
+   afu->dev.parent = &fn->dev;
+   afu->dev.release = free_afu_dev;
+   rc = dev_set_name(&afu->dev, "%s.%s.%hhu", afu->config.name, location,
+   afu->config.idx);
+   return rc;
+}
+
+static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+   struct ocxl_fn *fn = afu->fn;
+   int actag_count, actag_offset;
+
+   /*
+* if there were not enough actags for the function, each afu
+* reduces its count as well
+*/
+   actag_count = afu->config.actag_supported *
+   fn->actag_enabled / fn->actag_supported;
+   actag_offset = ocxl_actag_afu_alloc(fn, actag_count);
+   if (actag_offset < 0) {
+   dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n",
+   actag_count, actag_offset);
+   return actag_offset;
+   }
+   afu->actag_base = fn->actag_base + actag_offset;
+   afu->actag_enabled = actag_count;
+
+   ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos,
+   afu->actag_base, afu->actag_enabled);
+   dev_dbg(&afu->dev, "actag base=%d enabled=%d\n",
+   afu->actag_base, afu->actag_enabled);
+   return 0;
+}
+
+static void reclaim_afu_actag(struct ocxl_afu *afu)
+{
+   struct ocxl_fn *fn = afu->fn;
+   int start_offset, size;
+
+   start_offset = afu->actag_base - fn->actag_base;
+   size = afu->actag_enabled;
+   ocxl_actag_afu_free(afu->fn, start_offset, size);
+}
+
+static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+   struct ocxl_fn *fn = afu->fn;
+   int pasid_count, pasid_offset;
+
+   /*
+* We only support the case where the function configuration
+* requested enough PASIDs to cover all AFUs.
+*/
+   pasid_count = 1 << afu->config.pasid_supported_log;
+   pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count);
+   if (pasid_offset < 0) {
+   dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n",
+   pasid_count, pasid_offset);
+   return pasid_offset;
+   }
+   afu->pasid_base = fn->pasid_base + pasid_offset;
+   afu->pasid_count = 0;
+   afu->pasid_max = pasid_count;
+
+   ocxl_config_set_afu_pasid(d

[PATCH v2] powerpc/8xx: fix possible object reference leak

2019-03-25 Thread Peng Hao
From: Wen Yang 

The call to of_find_compatible_node returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.
irq_domain_add_linear also calls of_node_get to increase refcount,
so irq_domain will not be affected when it is released.

Detected by coccinelle with the following warnings:
./arch/powerpc/platforms/8xx/pic.c:158:1-7: ERROR: missing of_node_put; 
acquired a node pointer with refcount incremented on line 136, but without a 
corresponding object release within this function.

Fixes: a8db8cf0d894 ("irq_domain: Replace irq_alloc_host() with
revmap-specific initializers")
Signed-off-by: Wen Yang 
Suggested-by: Christophe Leroy 
Reviewed-by: Peng Hao 
Cc: Vitaly Bordug 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-ker...@vger.kernel.org
---
 arch/powerpc/platforms/8xx/pic.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
index 8d5a25d..4453df6 100644
--- a/arch/powerpc/platforms/8xx/pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -153,9 +153,7 @@ int mpc8xx_pic_init(void)
if (mpc8xx_pic_host == NULL) {
printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
ret = -ENOMEM;
-   goto out;
}
-   return 0;
 
 out:
of_node_put(np);
-- 
2.9.5



Re: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations

2019-03-25 Thread Rui Salvaterra
Hi, Ben

> Presumably you have CONFIG_PPC_BOOK3S_64 enabled and CONFIG_SPARSEMEM 
> disabled?
> Was this configuration actually usable?

I just noticed these build warnings too. FWIW, I've been using
CONFIG_FLATMEM on my Power Mac G5 for about three years, so I guess
the answer is "yes" (for this specific machine, at least).


Re: [PATCH v2] powerpc/8xx: fix possible object reference leak

2019-03-25 Thread Christophe Leroy




Le 25/03/2019 à 18:11, Peng Hao a écrit :

From: Wen Yang 

The call to of_find_compatible_node returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.
irq_domain_add_linear also calls of_node_get to increase refcount,
so irq_domain will not be affected when it is released.

Detected by coccinelle with the following warnings:
./arch/powerpc/platforms/8xx/pic.c:158:1-7: ERROR: missing of_node_put; 
acquired a node pointer with refcount incremented on line 136, but without a 
corresponding object release within this function.

Fixes: a8db8cf0d894 ("irq_domain: Replace irq_alloc_host() with
revmap-specific initializers")
Signed-off-by: Wen Yang 
Suggested-by: Christophe Leroy 
Reviewed-by: Peng Hao 


Reviewed-by: Christophe Leroy 


Cc: Vitaly Bordug 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-ker...@vger.kernel.org
---
  arch/powerpc/platforms/8xx/pic.c | 2 --
  1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
index 8d5a25d..4453df6 100644
--- a/arch/powerpc/platforms/8xx/pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -153,9 +153,7 @@ int mpc8xx_pic_init(void)
if (mpc8xx_pic_host == NULL) {
printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
ret = -ENOMEM;
-   goto out;
}
-   return 0;
  
  out:

of_node_put(np);



Re: Regression in 5.1.0-rc2: PowerBook G4 Aluminum fails to boot - bisected to commit 0df977eafc792

2019-03-25 Thread Christophe Leroy




Le 25/03/2019 à 01:49, Larry Finger a écrit :
A build of kernel 5.1.0-rc2 resulted in a failure to boot on my 
PowerBook G4 Aluminum. The bootstrap loads the initial kernel and issues 
the appropriate start, but the machine hangs at that point.


The problem does not depend on the choice of PPC32 processor type. This 
machine has a 7447A according to /proc/cpuinfo.


The problem was bisected to the following:

commit 0df977eafc792a5365a7f81d8d5920132e03afad
Author: Christophe Leroy 
Date:   Thu Feb 21 10:37:54 2019 +

     powerpc/6xx: Don't use SPRN_SPRG2 for storing stack pointer while 
in RTAS


     When calling RTAS, the stack pointer is stored in SPRN_SPRG2
     in order to be able to restore it in case of machine check in RTAS.

     As machine check is not a perfomance critical path, this patch
     frees SPRN_SPRG2 by using a field in thread struct instead.

     Signed-off-by: Christophe Leroy 
     Signed-off-by: Michael Ellerman 

I reverted this patch and found that the system began execution, and 
then failed, likely due to the reassignment of SPRN_SPRG2.


I had found this problem with 5.1.0-rc1, but -rc2 was out by the time I 
finished the bisection. Unfortunately, none of the changes in -rc2 fixed 
the problem.


I think I identified the issue, see 
https://patchwork.ozlabs.org/patch/1063962/


It is now booting properly under QEMU with your .config

Could you please test it on your target ?

Thanks
Christophe



Attached is the .config that I used.

Thanks,

Larry


[PATCH] powerpc/rtas: fix early boot failure.

2019-03-25 Thread Christophe Leroy
Commit 0df977eafc79 ("powerpc/6xx: Don't use SPRN_SPRG2 for storing
stack pointer while in RTAS") changes the code to use a field in
thread struct to store the stack pointer while in RTAS instead of
using SPRN_SPRG2. It therefore converts all places which were
manipulating SPRN_SPRG2 to use that field. During early startup,
the zeroing of SPRN_SPRG2 has been replaced by a zeroing of that
field in thread struct. But at least in start_here, that's done
wrongly because it used the physical address of the fields while
MMU is on at that time.

So the virtual address of the field should be used instead, but in
the meantime, thread struct has already been zeroised and initialised
so we can just drop this initialisation.

Reported-by: Larry Finger 
Fixes: 0df977eafc79 ("powerpc/6xx: Don't use SPRN_SPRG2 for storing stack 
pointer while in RTAS")
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.S | 8 
 1 file changed, 8 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 48051c8977c5..e25b615e9f9e 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -851,10 +851,6 @@ __secondary_start:
tophys(r4,r2)
addir4,r4,THREAD/* phys address of our thread_struct */
mtspr   SPRN_SPRG_THREAD,r4
-#ifdef CONFIG_PPC_RTAS
-   li  r3,0
-   stw r3, RTAS_SP(r4) /* 0 => not in RTAS */
-#endif
lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
mtspr   SPRN_SPRG_PGDIR, r4
@@ -941,10 +937,6 @@ start_here:
tophys(r4,r2)
addir4,r4,THREAD/* init task's THREAD */
mtspr   SPRN_SPRG_THREAD,r4
-#ifdef CONFIG_PPC_RTAS
-   li  r3,0
-   stw r3, RTAS_SP(r4) /* 0 => not in RTAS */
-#endif
lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
mtspr   SPRN_SPRG_PGDIR, r4
-- 
2.13.3



Re: [PATCH] compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING

2019-03-25 Thread Masahiro Yamada
On Mon, Mar 25, 2019 at 4:33 PM Arnd Bergmann  wrote:
>
> On Mon, Mar 25, 2019 at 7:11 AM Masahiro Yamada
>  wrote:
> > On Wed, Mar 20, 2019 at 10:34 PM Arnd Bergmann  wrote:
> > >
> > > On Wed, Mar 20, 2019 at 10:41 AM Arnd Bergmann  wrote:
> > > >
> > > > I've added your patch to my randconfig test setup and will let you
> > > > know if I see anything noticeable. I'm currently testing clang-arm32,
> > > > clang-arm64 and gcc-x86.
> > >
> > > This is the only additional bug that has come up so far:
> > >
> > > `.exit.text' referenced in section `.alt.smp.init' of
> > > drivers/char/ipmi/ipmi_msghandler.o: defined in discarded section
> > > `exit.text' of drivers/char/ipmi/ipmi_msghandler.o
> > >
> > > diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h
> > > index 201100226301..84b12e33104d 100644
> > > --- a/arch/arm/kernel/atags.h
> > > +++ b/arch/arm/kernel/atags.h
> > > @@ -5,7 +5,7 @@ void convert_to_tag_list(struct tag *tags);
> > >  const struct machine_desc *setup_machine_tags(phys_addr_t 
> > > __atags_pointer,
> > > unsigned int machine_nr);
> > >  #else
> > > -static inline const struct machine_desc *
> > > +static __always_inline const struct machine_desc *
> > >  setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr)
> > >  {
> > > early_print("no ATAGS support: can't continue\n");
> > >
> >
> >
> > I do not know why to reproduce it,
> > but is "__init __noreturn" more sensible than
> > "__always_inline" here?
>
> It's in a header file, so it has to be 'inline'. We could make it
> static inline __init __noreturn,

Yes, I like 'static inline __init __noreturn'

> but I don't see an advantage over
> __always_inline there.

__always_inline takes away the compiler's freedom.

I'd like to leave it up to the compiler where possible.

The inlining decision may change
depending on -O2, -Os, -Og(which was rejected)
or whatever optimization strategy.


>
> Arnd
>
> __
> Linux MTD discussion mailing list
> http://lists.infradead.org/mailman/listinfo/linux-mtd/



-- 
Best Regards
Masahiro Yamada


Re: [PATCH] compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING

2019-03-25 Thread Arnd Bergmann
On Mon, Mar 25, 2019 at 7:11 AM Masahiro Yamada
 wrote:
> On Wed, Mar 20, 2019 at 10:34 PM Arnd Bergmann  wrote:
> >
> > On Wed, Mar 20, 2019 at 10:41 AM Arnd Bergmann  wrote:
> > >
> > > I've added your patch to my randconfig test setup and will let you
> > > know if I see anything noticeable. I'm currently testing clang-arm32,
> > > clang-arm64 and gcc-x86.
> >
> > This is the only additional bug that has come up so far:
> >
> > `.exit.text' referenced in section `.alt.smp.init' of
> > drivers/char/ipmi/ipmi_msghandler.o: defined in discarded section
> > `exit.text' of drivers/char/ipmi/ipmi_msghandler.o
> >
> > diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h
> > index 201100226301..84b12e33104d 100644
> > --- a/arch/arm/kernel/atags.h
> > +++ b/arch/arm/kernel/atags.h
> > @@ -5,7 +5,7 @@ void convert_to_tag_list(struct tag *tags);
> >  const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer,
> > unsigned int machine_nr);
> >  #else
> > -static inline const struct machine_desc *
> > +static __always_inline const struct machine_desc *
> >  setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr)
> >  {
> > early_print("no ATAGS support: can't continue\n");
> >
>
>
> I do not know why to reproduce it,
> but is "__init __noreturn" more sensible than
> "__always_inline" here?

It's in a header file, so it has to be 'inline'. We could make it
static inline __init __noreturn, but I don't see an advantage over
__always_inline there.

Arnd